Spaces:
Runtime error
Runtime error
unknown
commited on
Commit
•
93eb0ff
1
Parent(s):
be5b973
cuda
Browse files
app.py
CHANGED
@@ -136,7 +136,12 @@ class FoleyController:
|
|
136 |
cfg_scale_slider,
|
137 |
seed_textbox,
|
138 |
):
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
140 |
vision_transform_list = [
|
141 |
torchvision.transforms.Resize((128, 128)),
|
142 |
torchvision.transforms.CenterCrop((112, 112)),
|
@@ -153,7 +158,7 @@ class FoleyController:
|
|
153 |
frames, duration = read_frames_with_moviepy(input_video, max_frame_nums=max_frame_nums)
|
154 |
if duration >= 10:
|
155 |
duration = 10
|
156 |
-
time_frames = torch.FloatTensor(frames).permute(0, 3, 1, 2).to(
|
157 |
time_frames = video_transform(time_frames)
|
158 |
time_frames = {'frames': time_frames.unsqueeze(0).permute(0, 2, 1, 3, 4)}
|
159 |
preds = self.time_detector(time_frames)
|
@@ -165,7 +170,7 @@ class FoleyController:
|
|
165 |
# w -> b c h w
|
166 |
time_condition = torch.FloatTensor(time_condition).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 256, 1)
|
167 |
|
168 |
-
images = self.image_processor(images=frames, return_tensors="pt").to(
|
169 |
image_embeddings = self.image_encoder(**images).image_embeds
|
170 |
image_embeddings = torch.mean(image_embeddings, dim=0, keepdim=True).unsqueeze(0).unsqueeze(0)
|
171 |
neg_image_embeddings = torch.zeros_like(image_embeddings)
|
@@ -208,12 +213,6 @@ class FoleyController:
|
|
208 |
controller = FoleyController()
|
209 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
210 |
|
211 |
-
# move to gpu
|
212 |
-
controller.time_detector = controller.time_detector.to(device)
|
213 |
-
controller.pipeline = controller.pipeline.to(device)
|
214 |
-
controller.vocoder = controller.vocoder.to(device)
|
215 |
-
controller.image_encoder = controller.image_encoder.to(device)
|
216 |
-
|
217 |
with gr.Blocks(css=css) as demo:
|
218 |
gr.HTML(
|
219 |
'<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
|
|
|
136 |
cfg_scale_slider,
|
137 |
seed_textbox,
|
138 |
):
|
139 |
+
device = 'cuda'
|
140 |
+
# move to gpu
|
141 |
+
controller.time_detector = controller.time_detector.to(device)
|
142 |
+
controller.pipeline = controller.pipeline.to(device)
|
143 |
+
controller.vocoder = controller.vocoder.to(device)
|
144 |
+
controller.image_encoder = controller.image_encoder.to(device)
|
145 |
vision_transform_list = [
|
146 |
torchvision.transforms.Resize((128, 128)),
|
147 |
torchvision.transforms.CenterCrop((112, 112)),
|
|
|
158 |
frames, duration = read_frames_with_moviepy(input_video, max_frame_nums=max_frame_nums)
|
159 |
if duration >= 10:
|
160 |
duration = 10
|
161 |
+
time_frames = torch.FloatTensor(frames).permute(0, 3, 1, 2).to(device)
|
162 |
time_frames = video_transform(time_frames)
|
163 |
time_frames = {'frames': time_frames.unsqueeze(0).permute(0, 2, 1, 3, 4)}
|
164 |
preds = self.time_detector(time_frames)
|
|
|
170 |
# w -> b c h w
|
171 |
time_condition = torch.FloatTensor(time_condition).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(1, 1, 256, 1)
|
172 |
|
173 |
+
images = self.image_processor(images=frames, return_tensors="pt").to(device)
|
174 |
image_embeddings = self.image_encoder(**images).image_embeds
|
175 |
image_embeddings = torch.mean(image_embeddings, dim=0, keepdim=True).unsqueeze(0).unsqueeze(0)
|
176 |
neg_image_embeddings = torch.zeros_like(image_embeddings)
|
|
|
213 |
controller = FoleyController()
|
214 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
with gr.Blocks(css=css) as demo:
|
217 |
gr.HTML(
|
218 |
'<h1 style="height: 136px; display: flex; align-items: center; justify-content: space-around;"><span style="height: 100%; width:136px;"><img src="file/foleycrafter.png" alt="logo" style="height: 100%; width:auto; object-fit: contain; margin: 0px 0px; padding: 0px 0px;"></span><strong style="font-size: 40px;">FoleyCrafter: Bring Silent Videos to Life with Lifelike and Synchronized Sounds</strong></h1>'
|