Jon Taylor commited on
Commit
70de1d6
1 Parent(s): b80985f
Dockerfile CHANGED
@@ -44,4 +44,5 @@ COPY app/ app/
44
  COPY server.py server.py
45
 
46
  #ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
47
- CMD ["python3", "server.py"]
 
 
44
  COPY server.py server.py
45
 
46
  #ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libtcmalloc.so.4
47
+ #CMD ["python3", "server.py"]
48
+ CMD ["python3", "app/pipeline_test.py"]
app/bot.py CHANGED
@@ -6,13 +6,14 @@ import logging
6
  import os
7
 
8
  from PIL import Image
9
- from typing import Any, Mapping
10
 
11
  from daily import EventHandler, CallClient, Daily
12
  from datetime import datetime
13
  from dotenv import load_dotenv
14
 
15
  from auth import get_meeting_token, get_room_name
 
 
16
 
17
  load_dotenv()
18
 
@@ -22,10 +23,11 @@ class DailyVision(EventHandler):
22
  room_url,
23
  room_name,
24
  expiration,
25
- bot_name="Daily Bot",
 
26
  ):
27
  self.__client = CallClient(event_handler=self)
28
- self.__pipeline = None
29
  self.__camera = None
30
  self.__time = time.time()
31
  self.__queue = queue.Queue()
@@ -34,6 +36,11 @@ class DailyVision(EventHandler):
34
  self.__room_url = room_url
35
  self.__room_name = room_name
36
  self.__expiration = expiration
 
 
 
 
 
37
 
38
  # Configure logger
39
  FORMAT = f"%(asctime)s {self.__room_url} %(message)s"
@@ -43,18 +50,18 @@ class DailyVision(EventHandler):
43
 
44
  self.logger.info(f"Expiration timer set to: {self.__expiration}")
45
 
46
- # Start thread
47
- self.__thread = threading.Thread(target = self.process_frames)
48
- self.__thread.start()
49
-
50
  def run(self, meeting_url, token):
51
  # Join
52
  self.logger.info(f"Connecting to room {meeting_url} as {self.__bot_name}")
53
  self.__client.set_user_name(self.__bot_name)
54
  self.__client.join(meeting_url, token, completion=self.on_joined)
55
-
56
  #self.__participant_id = self.client.participants()["local"]["id"]
57
 
 
 
 
 
58
  # Keep-alive on thread
59
  self.__thread.join()
60
 
@@ -79,7 +86,7 @@ class DailyVision(EventHandler):
79
  self.__camera = Daily.create_camera_device("camera",
80
  width = video_frame.width,
81
  height = video_frame.height,
82
- color_format="RGB")
83
  self.__client.update_inputs({
84
  "camera": {
85
  "isEnabled": True,
@@ -91,21 +98,25 @@ class DailyVision(EventHandler):
91
 
92
  def process_frames(self):
93
  while not self.__app_quit:
 
 
 
 
 
94
  # Check expiry timer
95
  if time.time() > self.__expiration:
96
  self.logger.info(f"Expiration timer exceeded. Exiting...")
97
  self.__app_quit = True
98
- return
99
  try:
100
  video_frame = self.__queue.get(timeout=5)
101
 
102
  if video_frame:
103
  image = Image.frombytes("RGBA", (video_frame.width, video_frame.height), video_frame.buffer)
104
- result = self.__pipeline(image)
105
-
106
- pil = Image.fromarray(result.render()[0], mode="RGB").tobytes()
107
 
108
- self.__camera.write_frame(pil)
109
  except queue.Empty:
110
  pass
111
 
@@ -138,6 +149,7 @@ def main():
138
  parser.add_argument("-t", "--private", type=bool, help="Is this room private?", default=True)
139
  parser.add_argument("-n", "--bot-name", type=str, help="Name of the bot", default="Daily Bot")
140
  parser.add_argument("-e", "--expiration", type=int, help="Duration of bot", default=os.getenv("BOT_MAX_DURATION", 300))
 
141
  args = parser.parse_args()
142
 
143
  Daily.init()
@@ -150,14 +162,13 @@ def main():
150
  if args.private:
151
  token = get_meeting_token(room_name, args.api_key, expiration)
152
 
153
- app = DailyVision(args.url, room_name, expiration, args.bot_name)
154
 
155
  try :
156
  app.run(args.url, token)
157
  except KeyboardInterrupt:
158
  print("Ctrl-C detected. Exiting!")
159
  finally:
160
- print("Bot loop completed. Exiting")
161
  app.leave()
162
 
163
  # Let leave finish
 
6
  import os
7
 
8
  from PIL import Image
 
9
 
10
  from daily import EventHandler, CallClient, Daily
11
  from datetime import datetime
12
  from dotenv import load_dotenv
13
 
14
  from auth import get_meeting_token, get_room_name
15
+ from pipeline import Pipeline
16
+ from device import device, torch_dtype
17
 
18
  load_dotenv()
19
 
 
23
  room_url,
24
  room_name,
25
  expiration,
26
+ idle,
27
+ bot_name="Daily Bot"
28
  ):
29
  self.__client = CallClient(event_handler=self)
30
+ self.__pipeline = Pipeline
31
  self.__camera = None
32
  self.__time = time.time()
33
  self.__queue = queue.Queue()
 
36
  self.__room_url = room_url
37
  self.__room_name = room_name
38
  self.__expiration = expiration
39
+ self.__idle = idle
40
+
41
+ # Create the pipeline (this might take a moment)
42
+ self.__pipeline = Pipeline(device, torch_dtype)
43
+ #print(self.__pipeline.InputParams.schema())
44
 
45
  # Configure logger
46
  FORMAT = f"%(asctime)s {self.__room_url} %(message)s"
 
50
 
51
  self.logger.info(f"Expiration timer set to: {self.__expiration}")
52
 
 
 
 
 
53
  def run(self, meeting_url, token):
54
  # Join
55
  self.logger.info(f"Connecting to room {meeting_url} as {self.__bot_name}")
56
  self.__client.set_user_name(self.__bot_name)
57
  self.__client.join(meeting_url, token, completion=self.on_joined)
58
+
59
  #self.__participant_id = self.client.participants()["local"]["id"]
60
 
61
+ # Start thread
62
+ self.__thread = threading.Thread(target = self.process_frames)
63
+ self.__thread.start()
64
+
65
  # Keep-alive on thread
66
  self.__thread.join()
67
 
 
86
  self.__camera = Daily.create_camera_device("camera",
87
  width = video_frame.width,
88
  height = video_frame.height,
89
+ color_format="RGBA")
90
  self.__client.update_inputs({
91
  "camera": {
92
  "isEnabled": True,
 
98
 
99
  def process_frames(self):
100
  while not self.__app_quit:
101
+ # Is anyone watching?
102
+ if not self.__idle and len(self.__client.participants()) < 2:
103
+ self.logger.info(f"No partcipants in channel. Exiting...")
104
+ self.__app_quit = True
105
+ break
106
  # Check expiry timer
107
  if time.time() > self.__expiration:
108
  self.logger.info(f"Expiration timer exceeded. Exiting...")
109
  self.__app_quit = True
110
+ break
111
  try:
112
  video_frame = self.__queue.get(timeout=5)
113
 
114
  if video_frame:
115
  image = Image.frombytes("RGBA", (video_frame.width, video_frame.height), video_frame.buffer)
116
+ #result = self.__pipeline(image)
117
+ #pil = Image.fromarray(result.render()[0], mode="RGB").tobytes()
 
118
 
119
+ self.__camera.write_frame(image.tobytes())
120
  except queue.Empty:
121
  pass
122
 
 
149
  parser.add_argument("-t", "--private", type=bool, help="Is this room private?", default=True)
150
  parser.add_argument("-n", "--bot-name", type=str, help="Name of the bot", default="Daily Bot")
151
  parser.add_argument("-e", "--expiration", type=int, help="Duration of bot", default=os.getenv("BOT_MAX_DURATION", 300))
152
+ parser.add_argument("-i", "--idle", type=bool, help="Wait for participants to join", default=False)
153
  args = parser.parse_args()
154
 
155
  Daily.init()
 
162
  if args.private:
163
  token = get_meeting_token(room_name, args.api_key, expiration)
164
 
165
+ app = DailyVision(args.url, room_name, expiration, args.idle, args.bot_name)
166
 
167
  try :
168
  app.run(args.url, token)
169
  except KeyboardInterrupt:
170
  print("Ctrl-C detected. Exiting!")
171
  finally:
 
172
  app.leave()
173
 
174
  # Let leave finish
app/device.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+
3
+ # check if MPS is available OSX only M1/M2/M3 chips
4
+ mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
5
+ xpu_available = hasattr(torch, "xpu") and torch.xpu.is_available()
6
+ device = torch.device(
7
+ "cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu"
8
+ )
9
+ torch_dtype = torch.float16
10
+ if mps_available:
11
+ device = torch.device("mps")
12
+ torch_dtype = torch.float32
app/pipeline.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ StableDiffusionControlNetImg2ImgPipeline,
3
+ ControlNetModel,
4
+ LCMScheduler,
5
+ AutoencoderTiny,
6
+ )
7
+ from compel import Compel
8
+ import torch
9
+ from utils.canny_gpu import SobelOperator
10
+
11
+ try:
12
+ import intel_extension_for_pytorch as ipex # type: ignore
13
+ except:
14
+ pass
15
+
16
+ import psutil
17
+ from pydantic import BaseModel, Field
18
+ from PIL import Image
19
+ import math
20
+ import time
21
+ import os
22
+
23
+ taesd_model = "madebyollin/taesd"
24
+ controlnet_model = "thibaud/controlnet-sd21-canny-diffusers"
25
+ base_model = "stabilityai/sd-turbo"
26
+
27
+ default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
28
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
29
+
30
+ class Pipeline:
31
+ class Info(BaseModel):
32
+ name: str = "controlnet+sd15Turbo"
33
+ title: str = "SDv1.5 Turbo + Controlnet"
34
+ description: str = "Generates an image from a text prompt"
35
+ input_mode: str = "image"
36
+
37
+ class InputParams(BaseModel):
38
+ prompt: str = Field(
39
+ default_prompt,
40
+ title="Prompt",
41
+ field="textarea",
42
+ id="prompt",
43
+ )
44
+ seed: int = Field(
45
+ 4402026899276587, min=0, title="Seed", field="seed", hide=True, id="seed"
46
+ )
47
+ steps: int = Field(
48
+ 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
49
+ )
50
+ width: int = Field(
51
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
52
+ )
53
+ height: int = Field(
54
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
55
+ )
56
+ guidance_scale: float = Field(
57
+ 1.0,
58
+ min=0,
59
+ max=10,
60
+ step=0.001,
61
+ title="Guidance Scale",
62
+ field="range",
63
+ hide=True,
64
+ id="guidance_scale",
65
+ )
66
+ strength: float = Field(
67
+ 0.8,
68
+ min=0.10,
69
+ max=1.0,
70
+ step=0.001,
71
+ title="Strength",
72
+ field="range",
73
+ hide=True,
74
+ id="strength",
75
+ )
76
+ controlnet_scale: float = Field(
77
+ 0.2,
78
+ min=0,
79
+ max=1.0,
80
+ step=0.001,
81
+ title="Controlnet Scale",
82
+ field="range",
83
+ hide=True,
84
+ id="controlnet_scale",
85
+ )
86
+ controlnet_start: float = Field(
87
+ 0.0,
88
+ min=0,
89
+ max=1.0,
90
+ step=0.001,
91
+ title="Controlnet Start",
92
+ field="range",
93
+ hide=True,
94
+ id="controlnet_start",
95
+ )
96
+ controlnet_end: float = Field(
97
+ 1.0,
98
+ min=0,
99
+ max=1.0,
100
+ step=0.001,
101
+ title="Controlnet End",
102
+ field="range",
103
+ hide=True,
104
+ id="controlnet_end",
105
+ )
106
+ canny_low_threshold: float = Field(
107
+ 0.31,
108
+ min=0,
109
+ max=1.0,
110
+ step=0.001,
111
+ title="Canny Low Threshold",
112
+ field="range",
113
+ hide=True,
114
+ id="canny_low_threshold",
115
+ )
116
+ canny_high_threshold: float = Field(
117
+ 0.125,
118
+ min=0,
119
+ max=1.0,
120
+ step=0.001,
121
+ title="Canny High Threshold",
122
+ field="range",
123
+ hide=True,
124
+ id="canny_high_threshold",
125
+ )
126
+ debug_canny: bool = Field(
127
+ False,
128
+ title="Debug Canny",
129
+ field="checkbox",
130
+ hide=True,
131
+ id="debug_canny",
132
+ )
133
+
134
+ def __init__(self, device: torch.device, torch_dtype: torch.dtype):
135
+ controlnet_canny = ControlNetModel.from_pretrained(
136
+ controlnet_model, torch_dtype=torch_dtype
137
+ ).to(device)
138
+
139
+ self.pipes = {}
140
+
141
+ self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
142
+ base_model,
143
+ controlnet=controlnet_canny,
144
+ )
145
+
146
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
147
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
148
+ ).to(device)
149
+ self.canny_torch = SobelOperator(device=device)
150
+
151
+ self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
152
+ self.pipe.set_progress_bar_config(disable=False)
153
+ self.pipe.to(device=device, dtype=torch_dtype).to(device)
154
+
155
+ if device.type != "mps":
156
+ self.pipe.unet.to(memory_format=torch.channels_last)
157
+
158
+ if psutil.virtual_memory().total < 64 * 1024**3:
159
+ self.pipe.enable_attention_slicing()
160
+
161
+ self.pipe.compel_proc = Compel(
162
+ tokenizer=self.pipe.tokenizer,
163
+ text_encoder=self.pipe.text_encoder,
164
+ truncate_long_prompts=True,
165
+ )
166
+
167
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
168
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
169
+ ).to(device)
170
+
171
+ if os.getenv("TORCH_COMPILE", False):
172
+ self.pipe.unet = torch.compile(
173
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
174
+ )
175
+ self.pipe.vae = torch.compile(
176
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
177
+ )
178
+
179
+ self.pipe(
180
+ prompt="warmup",
181
+ image=[Image.new("RGB", (768, 768))],
182
+ control_image=[Image.new("RGB", (768, 768))],
183
+ )
app/pipelineSDXLTurbo.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ StableDiffusionXLControlNetImg2ImgPipeline,
3
+ ControlNetModel,
4
+ AutoencoderKL,
5
+ AutoencoderTiny,
6
+ )
7
+ from compel import Compel, ReturnedEmbeddingsType
8
+ from pydantic import BaseModel, Field
9
+ from utils.canny_gpu import SobelOperator
10
+ import torch
11
+
12
+ try:
13
+ import intel_extension_for_pytorch as ipex # type: ignore
14
+ except:
15
+ pass
16
+
17
+ import psutil
18
+ from PIL import Image
19
+ import math
20
+ import time
21
+
22
+
23
+ controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
24
+ model_id = "stabilityai/sdxl-turbo"
25
+ taesd_model = "madebyollin/taesdxl"
26
+
27
+ default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
28
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
29
+
30
+ class Pipeline:
31
+ class Info(BaseModel):
32
+ name: str = "controlnet+SDXL+Turbo"
33
+ title: str = "SDXL Turbo + Controlnet"
34
+ description: str = "Generates an image from a text prompt"
35
+ input_mode: str = "image"
36
+
37
+ class InputParams(BaseModel):
38
+ prompt: str = Field(
39
+ default_prompt,
40
+ title="Prompt",
41
+ field="textarea",
42
+ id="prompt",
43
+ )
44
+ negative_prompt: str = Field(
45
+ default_negative_prompt,
46
+ title="Negative Prompt",
47
+ field="textarea",
48
+ id="negative_prompt",
49
+ hide=True,
50
+ )
51
+ seed: int = Field(
52
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
53
+ )
54
+ steps: int = Field(
55
+ 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
56
+ )
57
+ width: int = Field(
58
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
59
+ )
60
+ height: int = Field(
61
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
62
+ )
63
+ guidance_scale: float = Field(
64
+ 1.0,
65
+ min=0,
66
+ max=10,
67
+ step=0.001,
68
+ title="Guidance Scale",
69
+ field="range",
70
+ hide=True,
71
+ id="guidance_scale",
72
+ )
73
+ strength: float = Field(
74
+ 0.5,
75
+ min=0.25,
76
+ max=1.0,
77
+ step=0.001,
78
+ title="Strength",
79
+ field="range",
80
+ hide=True,
81
+ id="strength",
82
+ )
83
+ controlnet_scale: float = Field(
84
+ 0.5,
85
+ min=0,
86
+ max=1.0,
87
+ step=0.001,
88
+ title="Controlnet Scale",
89
+ field="range",
90
+ hide=True,
91
+ id="controlnet_scale",
92
+ )
93
+ controlnet_start: float = Field(
94
+ 0.0,
95
+ min=0,
96
+ max=1.0,
97
+ step=0.001,
98
+ title="Controlnet Start",
99
+ field="range",
100
+ hide=True,
101
+ id="controlnet_start",
102
+ )
103
+ controlnet_end: float = Field(
104
+ 1.0,
105
+ min=0,
106
+ max=1.0,
107
+ step=0.001,
108
+ title="Controlnet End",
109
+ field="range",
110
+ hide=True,
111
+ id="controlnet_end",
112
+ )
113
+ canny_low_threshold: float = Field(
114
+ 0.31,
115
+ min=0,
116
+ max=1.0,
117
+ step=0.001,
118
+ title="Canny Low Threshold",
119
+ field="range",
120
+ hide=True,
121
+ id="canny_low_threshold",
122
+ )
123
+ canny_high_threshold: float = Field(
124
+ 0.125,
125
+ min=0,
126
+ max=1.0,
127
+ step=0.001,
128
+ title="Canny High Threshold",
129
+ field="range",
130
+ hide=True,
131
+ id="canny_high_threshold",
132
+ )
133
+ debug_canny: bool = Field(
134
+ False,
135
+ title="Debug Canny",
136
+ field="checkbox",
137
+ hide=True,
138
+ id="debug_canny",
139
+ )
140
+
141
+ def __init__(self, device: torch.device, torch_dtype: torch.dtype):
142
+ controlnet_canny = ControlNetModel.from_pretrained(
143
+ controlnet_model, torch_dtype=torch_dtype
144
+ ).to(device)
145
+
146
+ vae = AutoencoderKL.from_pretrained(
147
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
148
+ )
149
+
150
+ self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
151
+ model_id,
152
+ controlnet=controlnet_canny,
153
+ vae=vae,
154
+ )
155
+
156
+ self.canny_torch = SobelOperator(device=device)
157
+
158
+ self.pipe.set_progress_bar_config(disable=True)
159
+ self.pipe.to(device=device, dtype=torch_dtype).to(device)
160
+ if device.type != "mps":
161
+ self.pipe.unet.to(memory_format=torch.channels_last)
162
+
163
+ if psutil.virtual_memory().total < 64 * 1024**3:
164
+ self.pipe.enable_attention_slicing()
165
+
166
+ self.pipe.compel_proc = Compel(
167
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
168
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
169
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
170
+ requires_pooled=[False, True],
171
+ )
172
+ #if args.use_taesd:
173
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
174
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
175
+ ).to(device)
176
+
177
+ #if args.torch_compile:
178
+ self.pipe.unet = torch.compile(
179
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
180
+ )
181
+ self.pipe.vae = torch.compile(
182
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
183
+ )
184
+ self.pipe(
185
+ prompt="warmup",
186
+ image=[Image.new("RGB", (512, 512))],
187
+ control_image=[Image.new("RGB", (512, 512))],
188
+ )
app/pipeline_test.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pipeline import Pipeline
2
+ from device import device, torch_dtype
3
+
4
+ def main():
5
+ p = Pipeline(device, torch_dtype)
6
+ print(p.InputParams.schema())
7
+
8
+ if __name__ == "__main__":
9
+ main()
app/utils/__init__.py ADDED
File without changes
app/utils/canny_gpu.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from torchvision.transforms import ToTensor, ToPILImage
4
+ from PIL import Image
5
+
6
+ class SobelOperator(nn.Module):
7
+ def __init__(self, device="cuda"):
8
+ super(SobelOperator, self).__init__()
9
+ self.device = device
10
+ self.edge_conv_x = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
11
+ self.device
12
+ )
13
+ self.edge_conv_y = nn.Conv2d(1, 1, kernel_size=3, padding=1, bias=False).to(
14
+ self.device
15
+ )
16
+
17
+ sobel_kernel_x = torch.tensor(
18
+ [[-1.0, 0.0, 1.0], [-2.0, 0.0, 2.0], [-1.0, 0.0, 1.0]], device=self.device
19
+ )
20
+ sobel_kernel_y = torch.tensor(
21
+ [[-1.0, -2.0, -1.0], [0.0, 0.0, 0.0], [1.0, 2.0, 1.0]], device=self.device
22
+ )
23
+
24
+ self.edge_conv_x.weight = nn.Parameter(sobel_kernel_x.view((1, 1, 3, 3)))
25
+ self.edge_conv_y.weight = nn.Parameter(sobel_kernel_y.view((1, 1, 3, 3)))
26
+
27
+ @torch.no_grad()
28
+ def forward(self, image: Image.Image, low_threshold: float, high_threshold: float):
29
+ # Convert PIL image to PyTorch tensor
30
+ image_gray = image.convert("L")
31
+ image_tensor = ToTensor()(image_gray).unsqueeze(0).to(self.device)
32
+
33
+ # Compute gradients
34
+ edge_x = self.edge_conv_x(image_tensor)
35
+ edge_y = self.edge_conv_y(image_tensor)
36
+ edge = torch.sqrt(edge_x**2 + edge_y**2)
37
+
38
+ # Apply thresholding
39
+ edge = edge / edge.max() # Normalize to 0-1
40
+ edge[edge >= high_threshold] = 1.0
41
+ edge[edge <= low_threshold] = 0.0
42
+
43
+ # Convert the result back to a PIL image
44
+ return ToPILImage()(edge.squeeze(0).cpu())
env.example ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ DAILY_API_PATH=https://api.daily.co/v1
2
+ DAILY_API_KEY=
3
+ DAILY_ROOM_URL=
4
+ BOT_MAX_DURATION=300
5
+ SAFETY_CHECKER="True"
6
+ TORCH_COMPILE="True"
7
+ USE_TAESD="True"
requirements.txt CHANGED
@@ -6,4 +6,16 @@ requests
6
  fastapi
7
  uvicorn[standard]
8
  requests
9
- pillow
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  fastapi
7
  uvicorn[standard]
8
  requests
9
+ pillow
10
+ pydantic
11
+ utils
12
+ psutil
13
+
14
+ transformers==4.35.2
15
+ torch==2.1.1
16
+ diffusers[torch]
17
+ accelerate==0.24.0
18
+ compel==2.0.2
19
+ controlnet-aux==0.0.7
20
+ peft==0.6.0
21
+