Update app.py
Browse files
app.py
CHANGED
@@ -162,11 +162,16 @@ preset_options = [
|
|
162 |
{"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
|
163 |
{"label": "448x448, 100 frames", "width": 448, "height": 448, "num_frames": 100},
|
164 |
{"label": "448x448, 200 frames", "width": 448, "height": 448, "num_frames": 200},
|
|
|
165 |
{"label": "640x640, 80 frames", "width": 640, "height": 640, "num_frames": 80},
|
|
|
166 |
{"label": "768x768, 64 frames", "width": 768, "height": 768, "num_frames": 64},
|
|
|
167 |
{"label": "720x720, 64 frames", "width": 768, "height": 768, "num_frames": 64},
|
|
|
168 |
{"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
|
169 |
{"label": "512x512, 160 frames", "width": 512, "height": 512, "num_frames": 160},
|
|
|
170 |
{"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
|
171 |
{"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
|
172 |
{"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
|
@@ -223,22 +228,10 @@ pipeline = XoraVideoPipeline(
|
|
223 |
tokenizer=tokenizer,
|
224 |
scheduler=scheduler,
|
225 |
vae=vae,
|
226 |
-
).to(
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
"Short (60s)": 60,
|
231 |
-
"Medium (80s)": 80,
|
232 |
-
"Medium (100s)": 100,
|
233 |
-
"Long (120s)": 120,
|
234 |
-
"Long (140s)": 140,
|
235 |
-
}
|
236 |
-
|
237 |
-
def set_gpu_duration(duration_choice):
|
238 |
-
os.environ["GPU_DURATION"] = str(GPU_DURATION_OPTIONS[duration_choice])
|
239 |
-
|
240 |
-
@spaces.GPU(duration=int(os.getenv("GPU_DURATION", "80"))) # Dynamic duration
|
241 |
-
def generate_video_from_text(
|
242 |
prompt="",
|
243 |
enhance_prompt_toggle=False,
|
244 |
txt2vid_analytics_toggle=True,
|
@@ -274,7 +267,7 @@ def generate_video_from_text(
|
|
274 |
progress((step + 1) / num_inference_steps)
|
275 |
|
276 |
try:
|
277 |
-
|
278 |
images = pipeline(
|
279 |
num_inference_steps=num_inference_steps,
|
280 |
num_images_per_prompt=1,
|
@@ -316,8 +309,8 @@ def generate_video_from_text(
|
|
316 |
torch.cuda.empty_cache()
|
317 |
return output_path
|
318 |
|
319 |
-
@spaces.GPU(duration=
|
320 |
-
def
|
321 |
image_path,
|
322 |
prompt="",
|
323 |
enhance_prompt_toggle=False,
|
@@ -414,14 +407,6 @@ def create_advanced_options():
|
|
414 |
inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
|
415 |
guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=3.2)
|
416 |
|
417 |
-
gpu_duration = gr.Dropdown(
|
418 |
-
label="GPU Duration",
|
419 |
-
choices=list(GPU_DURATION_OPTIONS.keys()),
|
420 |
-
value="Medium (80s)" # Default value
|
421 |
-
)
|
422 |
-
|
423 |
-
gpu_duration.change(fn=set_gpu_duration, inputs=gpu_duration, outputs=[])
|
424 |
-
|
425 |
height_slider = gr.Slider(
|
426 |
label="4.4 Height",
|
427 |
minimum=256,
|
@@ -679,7 +664,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
679 |
txt2vid_preset.change(fn=preset_changed, inputs=[txt2vid_preset], outputs=txt2vid_advanced[3:])
|
680 |
|
681 |
txt2vid_generate.click(
|
682 |
-
fn=
|
683 |
inputs=[
|
684 |
txt2vid_prompt,
|
685 |
txt2vid_enhance_toggle,
|
@@ -697,7 +682,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
697 |
img2vid_preset.change(fn=preset_changed, inputs=[img2vid_preset], outputs=img2vid_advanced[3:])
|
698 |
|
699 |
img2vid_generate.click(
|
700 |
-
fn=
|
701 |
inputs=[
|
702 |
img2vid_image,
|
703 |
img2vid_prompt,
|
|
|
162 |
{"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
|
163 |
{"label": "448x448, 100 frames", "width": 448, "height": 448, "num_frames": 100},
|
164 |
{"label": "448x448, 200 frames", "width": 448, "height": 448, "num_frames": 200},
|
165 |
+
{"label": "448x448, 300 frames", "width": 448, "height": 448, "num_frames": 300},
|
166 |
{"label": "640x640, 80 frames", "width": 640, "height": 640, "num_frames": 80},
|
167 |
+
{"label": "640x640, 120 frames", "width": 640, "height": 640, "num_frames": 120},
|
168 |
{"label": "768x768, 64 frames", "width": 768, "height": 768, "num_frames": 64},
|
169 |
+
{"label": "768x768, 90 frames", "width": 768, "height": 768, "num_frames": 90},
|
170 |
{"label": "720x720, 64 frames", "width": 768, "height": 768, "num_frames": 64},
|
171 |
+
{"label": "720x720, 100 frames", "width": 768, "height": 768, "num_frames": 100},
|
172 |
{"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
|
173 |
{"label": "512x512, 160 frames", "width": 512, "height": 512, "num_frames": 160},
|
174 |
+
{"label": "512x512, 200 frames", "width": 512, "height": 512, "num_frames": 200},
|
175 |
{"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
|
176 |
{"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
|
177 |
{"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
|
|
|
228 |
tokenizer=tokenizer,
|
229 |
scheduler=scheduler,
|
230 |
vae=vae,
|
231 |
+
).to(device)
|
232 |
+
|
233 |
+
@spaces.GPU(duration=90) # Dynamic duration
|
234 |
+
def generate_video_from_text_90(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
235 |
prompt="",
|
236 |
enhance_prompt_toggle=False,
|
237 |
txt2vid_analytics_toggle=True,
|
|
|
267 |
progress((step + 1) / num_inference_steps)
|
268 |
|
269 |
try:
|
270 |
+
# with torch.no_grad():
|
271 |
images = pipeline(
|
272 |
num_inference_steps=num_inference_steps,
|
273 |
num_images_per_prompt=1,
|
|
|
309 |
torch.cuda.empty_cache()
|
310 |
return output_path
|
311 |
|
312 |
+
@spaces.GPU(duration=90) # Dynamic duration
|
313 |
+
def generate_video_from_image_90(
|
314 |
image_path,
|
315 |
prompt="",
|
316 |
enhance_prompt_toggle=False,
|
|
|
407 |
inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
|
408 |
guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=3.2)
|
409 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
410 |
height_slider = gr.Slider(
|
411 |
label="4.4 Height",
|
412 |
minimum=256,
|
|
|
664 |
txt2vid_preset.change(fn=preset_changed, inputs=[txt2vid_preset], outputs=txt2vid_advanced[3:])
|
665 |
|
666 |
txt2vid_generate.click(
|
667 |
+
fn=generate_video_from_text_90,
|
668 |
inputs=[
|
669 |
txt2vid_prompt,
|
670 |
txt2vid_enhance_toggle,
|
|
|
682 |
img2vid_preset.change(fn=preset_changed, inputs=[img2vid_preset], outputs=img2vid_advanced[3:])
|
683 |
|
684 |
img2vid_generate.click(
|
685 |
+
fn=generate_video_from_image_90,
|
686 |
inputs=[
|
687 |
img2vid_image,
|
688 |
img2vid_prompt,
|