ford442 commited on
Commit
cc55fb3
1 Parent(s): f8a231f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -13
app.py CHANGED
@@ -13,7 +13,6 @@ from xora.schedulers.rf import RectifiedFlowScheduler
13
  from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
14
  from transformers import T5EncoderModel, T5Tokenizer
15
  from xora.utils.conditioning_method import ConditioningMethod
16
-
17
  from pathlib import Path
18
  import safetensors.torch
19
  import json
@@ -26,6 +25,7 @@ import gc
26
  import csv
27
  from datetime import datetime
28
  from openai import OpenAI
 
29
 
30
  torch.backends.cuda.matmul.allow_tf32 = False
31
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -55,8 +55,11 @@ with open(system_prompt_i2v_path, "r") as f:
55
 
56
  # Set model download directory within Hugging Face Spaces
57
  model_path = "asset"
 
 
 
58
  if not os.path.exists(model_path):
59
- snapshot_download("Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token)
60
 
61
  # Global variables to load components
62
  vae_dir = Path(model_path) / "vae"
@@ -67,10 +70,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
 
68
  request_log = []
69
 
70
- clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
71
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
72
 
73
-
74
  def compute_clip_embedding(text=None, image=None):
75
  """
76
  Compute CLIP embedding for a given text or image.
@@ -219,7 +221,7 @@ vae = load_vae(vae_dir)
219
  unet = load_unet(unet_dir)
220
  scheduler = load_scheduler(scheduler_dir)
221
  patchifier = SymmetricPatchifier(patch_size=1)
222
- text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(device)
223
  tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
224
 
225
  pipeline = XoraVideoPipeline(
@@ -229,7 +231,7 @@ pipeline = XoraVideoPipeline(
229
  tokenizer=tokenizer,
230
  scheduler=scheduler,
231
  vae=vae,
232
- ).to(device)
233
 
234
  @spaces.GPU(duration=90) # Dynamic duration
235
  def generate_video_from_text_90(
@@ -320,7 +322,7 @@ def generate_video_from_image_90(
320
  frame_rate=20,
321
  seed=random.randint(0, MAX_SEED),
322
  num_inference_steps=35,
323
- guidance_scale=3.2,
324
  height=768,
325
  width=768,
326
  num_frames=60,
@@ -357,7 +359,7 @@ def generate_video_from_image_90(
357
  "media_items": media_items,
358
  }
359
 
360
- generator = torch.Generator(device="cpu").manual_seed(seed)
361
 
362
  def gradio_progress_callback(self, step, timestep, kwargs):
363
  progress((step + 1) / num_inference_steps)
@@ -395,18 +397,16 @@ def generate_video_from_image_90(
395
  f"An error occurred while generating the video. Please try again. Error: {e}",
396
  duration=5,
397
  )
398
-
399
  finally:
400
  torch.cuda.empty_cache()
401
  gc.collect()
402
-
403
  return output_path
404
 
405
  def create_advanced_options():
406
  with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
407
  seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
408
  inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
409
- guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=3.2)
410
 
411
  height_slider = gr.Slider(
412
  label="4.4 Height",
@@ -443,7 +443,7 @@ def create_advanced_options():
443
  ]
444
 
445
  # Define the Gradio interface with tabs
446
- with gr.Blocks(theme=gr.themes.Soft()) as iface:
447
  with gr.Row(elem_id="title-row"):
448
  gr.Markdown(
449
  """
@@ -700,4 +700,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
700
  )
701
 
702
  if __name__ == "__main__":
703
- iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)
 
13
  from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
14
  from transformers import T5EncoderModel, T5Tokenizer
15
  from xora.utils.conditioning_method import ConditioningMethod
 
16
  from pathlib import Path
17
  import safetensors.torch
18
  import json
 
25
  import csv
26
  from datetime import datetime
27
  from openai import OpenAI
28
+ #from gradio import themes
29
 
30
  torch.backends.cuda.matmul.allow_tf32 = False
31
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 
55
 
56
  # Set model download directory within Hugging Face Spaces
57
  model_path = "asset"
58
+
59
+ commit_hash='c7c8ad4c2ddba847b94e8bfaefbd30bd8669fafc'
60
+
61
  if not os.path.exists(model_path):
62
+ snapshot_download("Lightricks/LTX-Video", revision=commit_hash, local_dir=model_path, repo_type="model", token=hf_token)
63
 
64
  # Global variables to load components
65
  vae_dir = Path(model_path) / "vae"
 
70
 
71
  request_log = []
72
 
73
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(torch.device("cuda:0"))
74
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
75
 
 
76
  def compute_clip_embedding(text=None, image=None):
77
  """
78
  Compute CLIP embedding for a given text or image.
 
221
  unet = load_unet(unet_dir)
222
  scheduler = load_scheduler(scheduler_dir)
223
  patchifier = SymmetricPatchifier(patch_size=1)
224
+ text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(torch.device("cuda:0"))
225
  tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
226
 
227
  pipeline = XoraVideoPipeline(
 
231
  tokenizer=tokenizer,
232
  scheduler=scheduler,
233
  vae=vae,
234
+ ).to(torch.device("cuda:0"))
235
 
236
  @spaces.GPU(duration=90) # Dynamic duration
237
  def generate_video_from_text_90(
 
322
  frame_rate=20,
323
  seed=random.randint(0, MAX_SEED),
324
  num_inference_steps=35,
325
+ guidance_scale=4.2,
326
  height=768,
327
  width=768,
328
  num_frames=60,
 
359
  "media_items": media_items,
360
  }
361
 
362
+ generator = torch.Generator(device="cuda").manual_seed(seed)
363
 
364
  def gradio_progress_callback(self, step, timestep, kwargs):
365
  progress((step + 1) / num_inference_steps)
 
397
  f"An error occurred while generating the video. Please try again. Error: {e}",
398
  duration=5,
399
  )
 
400
  finally:
401
  torch.cuda.empty_cache()
402
  gc.collect()
 
403
  return output_path
404
 
405
  def create_advanced_options():
406
  with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
407
  seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
408
  inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
409
+ guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.2)
410
 
411
  height_slider = gr.Slider(
412
  label="4.4 Height",
 
443
  ]
444
 
445
  # Define the Gradio interface with tabs
446
+ with gr.Blocks(theme=gr.themes.Origin()) as iface:
447
  with gr.Row(elem_id="title-row"):
448
  gr.Markdown(
449
  """
 
700
  )
701
 
702
  if __name__ == "__main__":
703
+ iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)