ford442 commited on
Commit
af2936b
1 Parent(s): 005df4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -29
app.py CHANGED
@@ -162,11 +162,16 @@ preset_options = [
162
  {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
163
  {"label": "448x448, 100 frames", "width": 448, "height": 448, "num_frames": 100},
164
  {"label": "448x448, 200 frames", "width": 448, "height": 448, "num_frames": 200},
 
165
  {"label": "640x640, 80 frames", "width": 640, "height": 640, "num_frames": 80},
 
166
  {"label": "768x768, 64 frames", "width": 768, "height": 768, "num_frames": 64},
 
167
  {"label": "720x720, 64 frames", "width": 768, "height": 768, "num_frames": 64},
 
168
  {"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
169
  {"label": "512x512, 160 frames", "width": 512, "height": 512, "num_frames": 160},
 
170
  {"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
171
  {"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
172
  {"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
@@ -223,22 +228,10 @@ pipeline = XoraVideoPipeline(
223
  tokenizer=tokenizer,
224
  scheduler=scheduler,
225
  vae=vae,
226
- ).to(torch.bfloat16).to(device)
227
-
228
- GPU_DURATION_OPTIONS = {
229
- "Short (45s)": 45,
230
- "Short (60s)": 60,
231
- "Medium (80s)": 80,
232
- "Medium (100s)": 100,
233
- "Long (120s)": 120,
234
- "Long (140s)": 140,
235
- }
236
-
237
- def set_gpu_duration(duration_choice):
238
- os.environ["GPU_DURATION"] = str(GPU_DURATION_OPTIONS[duration_choice])
239
-
240
- @spaces.GPU(duration=int(os.getenv("GPU_DURATION", "80"))) # Dynamic duration
241
- def generate_video_from_text(
242
  prompt="",
243
  enhance_prompt_toggle=False,
244
  txt2vid_analytics_toggle=True,
@@ -274,7 +267,7 @@ def generate_video_from_text(
274
  progress((step + 1) / num_inference_steps)
275
 
276
  try:
277
- with torch.no_grad():
278
  images = pipeline(
279
  num_inference_steps=num_inference_steps,
280
  num_images_per_prompt=1,
@@ -316,8 +309,8 @@ def generate_video_from_text(
316
  torch.cuda.empty_cache()
317
  return output_path
318
 
319
- @spaces.GPU(duration=int(os.getenv("GPU_DURATION", "80"))) # Dynamic duration
320
- def generate_video_from_image(
321
  image_path,
322
  prompt="",
323
  enhance_prompt_toggle=False,
@@ -414,14 +407,6 @@ def create_advanced_options():
414
  inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
415
  guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=3.2)
416
 
417
- gpu_duration = gr.Dropdown(
418
- label="GPU Duration",
419
- choices=list(GPU_DURATION_OPTIONS.keys()),
420
- value="Medium (80s)" # Default value
421
- )
422
-
423
- gpu_duration.change(fn=set_gpu_duration, inputs=gpu_duration, outputs=[])
424
-
425
  height_slider = gr.Slider(
426
  label="4.4 Height",
427
  minimum=256,
@@ -679,7 +664,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
679
  txt2vid_preset.change(fn=preset_changed, inputs=[txt2vid_preset], outputs=txt2vid_advanced[3:])
680
 
681
  txt2vid_generate.click(
682
- fn=generate_video_from_text,
683
  inputs=[
684
  txt2vid_prompt,
685
  txt2vid_enhance_toggle,
@@ -697,7 +682,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
697
  img2vid_preset.change(fn=preset_changed, inputs=[img2vid_preset], outputs=img2vid_advanced[3:])
698
 
699
  img2vid_generate.click(
700
- fn=generate_video_from_image,
701
  inputs=[
702
  img2vid_image,
703
  img2vid_prompt,
 
162
  {"label": "1056x640, 57 frames", "width": 1056, "height": 640, "num_frames": 57},
163
  {"label": "448x448, 100 frames", "width": 448, "height": 448, "num_frames": 100},
164
  {"label": "448x448, 200 frames", "width": 448, "height": 448, "num_frames": 200},
165
+ {"label": "448x448, 300 frames", "width": 448, "height": 448, "num_frames": 300},
166
  {"label": "640x640, 80 frames", "width": 640, "height": 640, "num_frames": 80},
167
+ {"label": "640x640, 120 frames", "width": 640, "height": 640, "num_frames": 120},
168
  {"label": "768x768, 64 frames", "width": 768, "height": 768, "num_frames": 64},
169
+ {"label": "768x768, 90 frames", "width": 768, "height": 768, "num_frames": 90},
170
  {"label": "720x720, 64 frames", "width": 768, "height": 768, "num_frames": 64},
171
+ {"label": "720x720, 100 frames", "width": 768, "height": 768, "num_frames": 100},
172
  {"label": "768x512, 97 frames", "width": 768, "height": 512, "num_frames": 97},
173
  {"label": "512x512, 160 frames", "width": 512, "height": 512, "num_frames": 160},
174
+ {"label": "512x512, 200 frames", "width": 512, "height": 512, "num_frames": 200},
175
  {"label": "736x480, 113 frames", "width": 736, "height": 480, "num_frames": 113},
176
  {"label": "704x480, 121 frames", "width": 704, "height": 480, "num_frames": 121},
177
  {"label": "704x448, 129 frames", "width": 704, "height": 448, "num_frames": 129},
 
228
  tokenizer=tokenizer,
229
  scheduler=scheduler,
230
  vae=vae,
231
+ ).to(device)
232
+
233
+ @spaces.GPU(duration=90) # Dynamic duration
234
+ def generate_video_from_text_90(
 
 
 
 
 
 
 
 
 
 
 
 
235
  prompt="",
236
  enhance_prompt_toggle=False,
237
  txt2vid_analytics_toggle=True,
 
267
  progress((step + 1) / num_inference_steps)
268
 
269
  try:
270
+ # with torch.no_grad():
271
  images = pipeline(
272
  num_inference_steps=num_inference_steps,
273
  num_images_per_prompt=1,
 
309
  torch.cuda.empty_cache()
310
  return output_path
311
 
312
+ @spaces.GPU(duration=90) # Dynamic duration
313
+ def generate_video_from_image_90(
314
  image_path,
315
  prompt="",
316
  enhance_prompt_toggle=False,
 
407
  inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
408
  guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=3.2)
409
 
 
 
 
 
 
 
 
 
410
  height_slider = gr.Slider(
411
  label="4.4 Height",
412
  minimum=256,
 
664
  txt2vid_preset.change(fn=preset_changed, inputs=[txt2vid_preset], outputs=txt2vid_advanced[3:])
665
 
666
  txt2vid_generate.click(
667
+ fn=generate_video_from_text_90,
668
  inputs=[
669
  txt2vid_prompt,
670
  txt2vid_enhance_toggle,
 
682
  img2vid_preset.change(fn=preset_changed, inputs=[img2vid_preset], outputs=img2vid_advanced[3:])
683
 
684
  img2vid_generate.click(
685
+ fn=generate_video_from_image_90,
686
  inputs=[
687
  img2vid_image,
688
  img2vid_prompt,