Spaces:

daily-co
/

RealtimeSDWebRTC

Sleeping

App Files Files Community

Jon Taylor commited on Dec 8, 2023

Commit

70de1d6

•

1 Parent(s): b80985f

pipelines

Browse files

Files changed (10) hide show

Dockerfile +2 -1
app/bot.py +27 -16
app/device.py +12 -0
app/pipeline.py +183 -0
app/pipelineSDXLTurbo.py +188 -0
app/pipeline_test.py +9 -0
app/utils/__init__.py +0 -0
app/utils/canny_gpu.py +44 -0
env.example +7 -0
requirements.txt +13 -1

Dockerfile CHANGED Viewed

@@ -44,4 +44,5 @@ COPY app/ app/
 COPY server.py server.py
 #ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
-CMD ["python3", "server.py"]

 COPY server.py server.py
 #ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
+#CMD ["python3", "server.py"]
+CMD ["python3", "app/pipeline_test.py"]

app/bot.py CHANGED Viewed

@@ -6,13 +6,14 @@ import logging
 import os
 from PIL import Image
-from typing import Any, Mapping
 from daily import EventHandler, CallClient, Daily
 from datetime import datetime
 from dotenv import load_dotenv
 from auth import get_meeting_token, get_room_name
 load_dotenv()
@@ -22,10 +23,11 @@ class DailyVision(EventHandler):
         room_url,
         room_name,
         expiration,
-        bot_name="Daily Bot",
     ):
         self.__client = CallClient(event_handler=self)
-        self.__pipeline = None
         self.__camera = None
         self.__time = time.time()
         self.__queue = queue.Queue()
@@ -34,6 +36,11 @@ class DailyVision(EventHandler):
         self.__room_url = room_url
         self.__room_name = room_name
         self.__expiration = expiration
         # Configure logger
         FORMAT = f"%(asctime)s {self.__room_url} %(message)s"
@@ -43,18 +50,18 @@ class DailyVision(EventHandler):
         self.logger.info(f"Expiration timer set to: {self.__expiration}")
-        # Start thread
-        self.__thread = threading.Thread(target = self.process_frames)
-        self.__thread.start()
     def run(self, meeting_url, token):
         # Join
         self.logger.info(f"Connecting to room {meeting_url} as {self.__bot_name}")
         self.__client.set_user_name(self.__bot_name)
         self.__client.join(meeting_url, token, completion=self.on_joined)
         #self.__participant_id = self.client.participants()["local"]["id"]
         # Keep-alive on thread
         self.__thread.join()
@@ -79,7 +86,7 @@ class DailyVision(EventHandler):
             self.__camera = Daily.create_camera_device("camera",
                                                        width = video_frame.width,
                                                        height = video_frame.height,
-                                                       color_format="RGB")
             self.__client.update_inputs({
                 "camera": {
                     "isEnabled": True,
@@ -91,21 +98,25 @@ class DailyVision(EventHandler):
     def process_frames(self):
         while not self.__app_quit:
             # Check expiry timer
             if time.time() > self.__expiration:
                 self.logger.info(f"Expiration timer exceeded. Exiting...")
                 self.__app_quit = True
-                return
             try:
                 video_frame = self.__queue.get(timeout=5)
                 if video_frame:
                     image = Image.frombytes("RGBA", (video_frame.width, video_frame.height), video_frame.buffer)
-                    result = self.__pipeline(image)
-                    pil = Image.fromarray(result.render()[0], mode="RGB").tobytes()
-                    self.__camera.write_frame(pil)
             except queue.Empty:
                 pass
@@ -138,6 +149,7 @@ def main():
     parser.add_argument("-t", "--private", type=bool, help="Is this room private?", default=True)
     parser.add_argument("-n", "--bot-name", type=str, help="Name of the bot", default="Daily Bot")
     parser.add_argument("-e", "--expiration", type=int, help="Duration of bot", default=os.getenv("BOT_MAX_DURATION", 300))
     args = parser.parse_args()
     Daily.init()
@@ -150,14 +162,13 @@ def main():
     if args.private:
         token = get_meeting_token(room_name, args.api_key, expiration)
-    app = DailyVision(args.url, room_name, expiration, args.bot_name)
     try :
         app.run(args.url, token)
     except KeyboardInterrupt:
         print("Ctrl-C detected. Exiting!")
     finally:
-        print("Bot loop completed. Exiting")
         app.leave()
     # Let leave finish

 import os
 from PIL import Image
 from daily import EventHandler, CallClient, Daily
 from datetime import datetime
 from dotenv import load_dotenv
 from auth import get_meeting_token, get_room_name
+from pipeline import Pipeline
+from device import device, torch_dtype
 load_dotenv()
         room_url,
         room_name,
         expiration,
+        idle,
+        bot_name="Daily Bot"
     ):
         self.__client = CallClient(event_handler=self)
+        self.__pipeline = Pipeline
         self.__camera = None
         self.__time = time.time()
         self.__queue = queue.Queue()
         self.__room_url = room_url
         self.__room_name = room_name
         self.__expiration = expiration
+        self.__idle = idle
+        # Create the pipeline (this might take a moment)
+        self.__pipeline = Pipeline(device, torch_dtype)
+        #print(self.__pipeline.InputParams.schema())
         # Configure logger
         FORMAT = f"%(asctime)s {self.__room_url} %(message)s"
         self.logger.info(f"Expiration timer set to: {self.__expiration}")
     def run(self, meeting_url, token):
         # Join
         self.logger.info(f"Connecting to room {meeting_url} as {self.__bot_name}")
         self.__client.set_user_name(self.__bot_name)
         self.__client.join(meeting_url, token, completion=self.on_joined)
         #self.__participant_id = self.client.participants()["local"]["id"]
+        # Start thread
+        self.__thread = threading.Thread(target = self.process_frames)
+        self.__thread.start()
         # Keep-alive on thread
         self.__thread.join()
             self.__camera = Daily.create_camera_device("camera",
                                                        width = video_frame.width,
                                                        height = video_frame.height,
+                                                       color_format="RGBA")
             self.__client.update_inputs({
                 "camera": {
                     "isEnabled": True,
     def process_frames(self):
         while not self.__app_quit:
+            # Is anyone watching?
+            if not self.__idle and len(self.__client.participants()) < 2:
+                self.logger.info(f"No partcipants in channel. Exiting...")
+                self.__app_quit = True
+                break
             # Check expiry timer
             if time.time() > self.__expiration:
                 self.logger.info(f"Expiration timer exceeded. Exiting...")
                 self.__app_quit = True
+                break
             try:
                 video_frame = self.__queue.get(timeout=5)
                 if video_frame:
                     image = Image.frombytes("RGBA", (video_frame.width, video_frame.height), video_frame.buffer)
+                    #result = self.__pipeline(image)
+                    #pil = Image.fromarray(result.render()[0], mode="RGB").tobytes()
+                    self.__camera.write_frame(image.tobytes())
             except queue.Empty:
                 pass
     parser.add_argument("-t", "--private", type=bool, help="Is this room private?", default=True)
     parser.add_argument("-n", "--bot-name", type=str, help="Name of the bot", default="Daily Bot")
     parser.add_argument("-e", "--expiration", type=int, help="Duration of bot", default=os.getenv("BOT_MAX_DURATION", 300))
+    parser.add_argument("-i", "--idle", type=bool, help="Wait for participants to join", default=False)
     args = parser.parse_args()
     Daily.init()
     if args.private:
         token = get_meeting_token(room_name, args.api_key, expiration)
+    app = DailyVision(args.url, room_name, expiration, args.idle, args.bot_name)
     try :
         app.run(args.url, token)
     except KeyboardInterrupt:
         print("Ctrl-C detected. Exiting!")
     finally:
         app.leave()
     # Let leave finish

app/device.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import torch
+# check if MPS is available OSX only M1/M2/M3 chips
+mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
+xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
+device = torch.device(
+    "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
+)
+torch_dtype = torch.float16
+if mps_available:
+    device = torch.device("mps")
+    torch_dtype = torch.float32

app/pipeline.py ADDED Viewed

	@@ -0,0 +1,183 @@

+from diffusers import (
+    StableDiffusionControlNetImg2ImgPipeline,
+    ControlNetModel,
+    LCMScheduler,
+    AutoencoderTiny,
+)
+from compel import Compel
+import torch
+from utils.canny_gpu import SobelOperator
+try:
+    import intel_extension_for_pytorch as ipex  # type: ignore
+except:
+    pass
+import psutil
+from pydantic import BaseModel, Field
+from PIL import Image
+import math
+import time
+import os
+taesd_model = "madebyollin/taesd"
+controlnet_model = "thibaud/controlnet-sd21-canny-diffusers"
+base_model = "stabilityai/sd-turbo"
+default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
+default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
+class Pipeline:
+    class Info(BaseModel):
+        name: str = "controlnet+sd15Turbo"
+        title: str = "SDv1.5 Turbo + Controlnet"
+        description: str = "Generates an image from a text prompt"
+        input_mode: str = "image"
+    class InputParams(BaseModel):
+        prompt: str = Field(
+            default_prompt,
+            title="Prompt",
+            field="textarea",
+            id="prompt",
+        )
+        seed: int = Field(
+            4402026899276587, min=0, title="Seed", field="seed", hide=True, id="seed"
+        )
+        steps: int = Field(
+            1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
+        )
+        width: int = Field(
+            512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
+        )
+        height: int = Field(
+            512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
+        )
+        guidance_scale: float = Field(
+            1.0,
+            min=0,
+            max=10,
+            step=0.001,
+            title="Guidance Scale",
+            field="range",
+            hide=True,
+            id="guidance_scale",
+        )
+        strength: float = Field(
+            0.8,
+            min=0.10,
+            max=1.0,
+            step=0.001,
+            title="Strength",
+            field="range",
+            hide=True,
+            id="strength",
+        )
+        controlnet_scale: float = Field(
+            0.2,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet Scale",
+            field="range",
+            hide=True,
+            id="controlnet_scale",
+        )
+        controlnet_start: float = Field(
+            0.0,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet Start",
+            field="range",
+            hide=True,
+            id="controlnet_start",
+        )
+        controlnet_end: float = Field(
+            1.0,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet End",
+            field="range",
+            hide=True,
+            id="controlnet_end",
+        )
+        canny_low_threshold: float = Field(
+            0.31,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Canny Low Threshold",
+            field="range",
+            hide=True,
+            id="canny_low_threshold",
+        )
+        canny_high_threshold: float = Field(
+            0.125,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Canny High Threshold",
+            field="range",
+            hide=True,
+            id="canny_high_threshold",
+        )
+        debug_canny: bool = Field(
+            False,
+            title="Debug Canny",
+            field="checkbox",
+            hide=True,
+            id="debug_canny",
+        )
+    def __init__(self, device: torch.device, torch_dtype: torch.dtype):
+        controlnet_canny = ControlNetModel.from_pretrained(
+            controlnet_model, torch_dtype=torch_dtype
+        ).to(device)
+        self.pipes = {}
+        self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
+            base_model,
+            controlnet=controlnet_canny,
+        )
+        self.pipe.vae = AutoencoderTiny.from_pretrained(
+            taesd_model, torch_dtype=torch_dtype, use_safetensors=True
+        ).to(device)
+        self.canny_torch = SobelOperator(device=device)
+        self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
+        self.pipe.set_progress_bar_config(disable=False)
+        self.pipe.to(device=device, dtype=torch_dtype).to(device)
+        if device.type != "mps":
+            self.pipe.unet.to(memory_format=torch.channels_last)
+        if psutil.virtual_memory().total < 64 * 1024**3:
+            self.pipe.enable_attention_slicing()
+        self.pipe.compel_proc = Compel(
+            tokenizer=self.pipe.tokenizer,
+            text_encoder=self.pipe.text_encoder,
+            truncate_long_prompts=True,
+        )
+        self.pipe.vae = AutoencoderTiny.from_pretrained(
+            taesd_model, torch_dtype=torch_dtype, use_safetensors=True
+        ).to(device)
+        if os.getenv("TORCH_COMPILE", False):
+            self.pipe.unet = torch.compile(
+                self.pipe.unet, mode="reduce-overhead", fullgraph=True
+            )
+            self.pipe.vae = torch.compile(
+                self.pipe.vae, mode="reduce-overhead", fullgraph=True
+            )
+            self.pipe(
+                prompt="warmup",
+                image=[Image.new("RGB", (768, 768))],
+                control_image=[Image.new("RGB", (768, 768))],
+        )

app/pipelineSDXLTurbo.py ADDED Viewed

	@@ -0,0 +1,188 @@

+from diffusers import (
+    StableDiffusionXLControlNetImg2ImgPipeline,
+    ControlNetModel,
+    AutoencoderKL,
+    AutoencoderTiny,
+)
+from compel import Compel, ReturnedEmbeddingsType
+from pydantic import BaseModel, Field
+from utils.canny_gpu import SobelOperator
+import torch
+try:
+    import intel_extension_for_pytorch as ipex  # type: ignore
+except:
+    pass
+import psutil
+from PIL import Image
+import math
+import time
+controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
+model_id = "stabilityai/sdxl-turbo"
+taesd_model = "madebyollin/taesdxl"
+default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
+default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
+class Pipeline:
+    class Info(BaseModel):
+        name: str = "controlnet+SDXL+Turbo"
+        title: str = "SDXL Turbo + Controlnet"
+        description: str = "Generates an image from a text prompt"
+        input_mode: str = "image"
+    class InputParams(BaseModel):
+        prompt: str = Field(
+            default_prompt,
+            title="Prompt",
+            field="textarea",
+            id="prompt",
+        )
+        negative_prompt: str = Field(
+            default_negative_prompt,
+            title="Negative Prompt",
+            field="textarea",
+            id="negative_prompt",
+            hide=True,
+        )
+        seed: int = Field(
+            2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
+        )
+        steps: int = Field(
+            2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
+        )
+        width: int = Field(
+            512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
+        )
+        height: int = Field(
+            512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
+        )
+        guidance_scale: float = Field(
+            1.0,
+            min=0,
+            max=10,
+            step=0.001,
+            title="Guidance Scale",
+            field="range",
+            hide=True,
+            id="guidance_scale",
+        )
+        strength: float = Field(
+            0.5,
+            min=0.25,
+            max=1.0,
+            step=0.001,
+            title="Strength",
+            field="range",
+            hide=True,
+            id="strength",
+        )
+        controlnet_scale: float = Field(
+            0.5,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet Scale",
+            field="range",
+            hide=True,
+            id="controlnet_scale",
+        )
+        controlnet_start: float = Field(
+            0.0,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet Start",
+            field="range",
+            hide=True,
+            id="controlnet_start",
+        )
+        controlnet_end: float = Field(
+            1.0,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Controlnet End",
+            field="range",
+            hide=True,
+            id="controlnet_end",
+        )
+        canny_low_threshold: float = Field(
+            0.31,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Canny Low Threshold",
+            field="range",
+            hide=True,
+            id="canny_low_threshold",
+        )
+        canny_high_threshold: float = Field(
+            0.125,
+            min=0,
+            max=1.0,
+            step=0.001,
+            title="Canny High Threshold",
+            field="range",
+            hide=True,
+            id="canny_high_threshold",
+        )
+        debug_canny: bool = Field(
+            False,
+            title="Debug Canny",
+            field="checkbox",
+            hide=True,
+            id="debug_canny",
+        )
+    def __init__(self, device: torch.device, torch_dtype: torch.dtype):
+        controlnet_canny = ControlNetModel.from_pretrained(
+            controlnet_model, torch_dtype=torch_dtype
+        ).to(device)
+        vae = AutoencoderKL.from_pretrained(
+            "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
+        )
+        self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
+            model_id,
+            controlnet=controlnet_canny,
+            vae=vae,
+        )
+        self.canny_torch = SobelOperator(device=device)
+        self.pipe.set_progress_bar_config(disable=True)
+        self.pipe.to(device=device, dtype=torch_dtype).to(device)
+        if device.type != "mps":
+            self.pipe.unet.to(memory_format=torch.channels_last)
+        if psutil.virtual_memory().total < 64 * 1024**3:
+            self.pipe.enable_attention_slicing()
+        self.pipe.compel_proc = Compel(
+            tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
+            text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
+            returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
+            requires_pooled=[False, True],
+        )
+        #if args.use_taesd:
+        self.pipe.vae = AutoencoderTiny.from_pretrained(
+            taesd_model, torch_dtype=torch_dtype, use_safetensors=True
+        ).to(device)
+        #if args.torch_compile:
+        self.pipe.unet = torch.compile(
+            self.pipe.unet, mode="reduce-overhead", fullgraph=True
+        )
+        self.pipe.vae = torch.compile(
+            self.pipe.vae, mode="reduce-overhead", fullgraph=True
+        )
+        self.pipe(
+            prompt="warmup",
+            image=[Image.new("RGB", (512, 512))],
+            control_image=[Image.new("RGB", (512, 512))],
+        )

app/pipeline_test.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from pipeline import Pipeline
+from device import device, torch_dtype
+def main():
+    p = Pipeline(device, torch_dtype)
+    print(p.InputParams.schema())
+if __name__ == "__main__":
+    main()

app/utils/__init__.py ADDED Viewed

File without changes

app/utils/canny_gpu.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import torch
+import torch.nn as nn
+from torchvision.transforms import ToTensor, ToPILImage
+from PIL import Image
+class SobelOperator(nn.Module):
+    def __init__(self, device="cuda"):
+        super(SobelOperator, self).__init__()
+        self.device = device
+        self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
+            self.device
+        )
+        self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
+            self.device
+        )
+        sobel_kernel_x = torch.tensor(
+            [[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]], device=self.device
+        )
+        sobel_kernel_y = torch.tensor(
+            [[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]], device=self.device
+        )
+        self.edge_conv_x.weight = nn.Parameter(sobel_kernel_x.view((1, 1, 3, 3)))
+        self.edge_conv_y.weight = nn.Parameter(sobel_kernel_y.view((1, 1, 3, 3)))
+    @torch.no_grad()
+    def forward(self, image: Image.Image, low_threshold: float, high_threshold: float):
+        # Convert PIL image to PyTorch tensor
+        image_gray = image.convert("L")
+        image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
+        # Compute gradients
+        edge_x = self.edge_conv_x(image_tensor)
+        edge_y = self.edge_conv_y(image_tensor)
+        edge = torch.sqrt(edge_x**2 + edge_y**2)
+        # Apply thresholding
+        edge = edge / edge.max()  # Normalize to 0-1
+        edge[edge >= high_threshold] = 1.0
+        edge[edge <= low_threshold] = 0.0
+        # Convert the result back to a PIL image
+        return ToPILImage()(edge.squeeze(0).cpu())

env.example ADDED Viewed

	@@ -0,0 +1,7 @@

+DAILY_API_PATH=https://api.daily.co/v1
+DAILY_API_KEY=
+DAILY_ROOM_URL=
+BOT_MAX_DURATION=300
+SAFETY_CHECKER="True"
+TORCH_COMPILE="True"
+USE_TAESD="True"

requirements.txt CHANGED Viewed

@@ -6,4 +6,16 @@ requests
 fastapi
 uvicorn[standard]
 requests
-pillow

 fastapi
 uvicorn[standard]
 requests
+pillow
+pydantic
+utils
+psutil
+transformers==4.35.2
+torch==2.1.1
+diffusers[torch]
+accelerate==0.24.0
+compel==2.0.2
+controlnet-aux==0.0.7
+peft==0.6.0