Spaces:

ford442
/

LTX-Video

Running on Zero

App Files Files Community

ford442 commited on 8 days ago

Commit

cc55fb3

•

1 Parent(s): f8a231f

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -13

app.py CHANGED Viewed

@@ -13,7 +13,6 @@ from xora.schedulers.rf import RectifiedFlowScheduler
 from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
 from transformers import T5EncoderModel, T5Tokenizer
 from xora.utils.conditioning_method import ConditioningMethod
 from pathlib import Path
 import safetensors.torch
 import json
@@ -26,6 +25,7 @@ import gc
 import csv
 from datetime import datetime
 from openai import OpenAI
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
@@ -55,8 +55,11 @@ with open(system_prompt_i2v_path, "r") as f:
 # Set model download directory within Hugging Face Spaces
 model_path = "asset"
 if not os.path.exists(model_path):
-    snapshot_download("Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token)
 # Global variables to load components
 vae_dir = Path(model_path) / "vae"
@@ -67,10 +70,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 request_log = []
-clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 def compute_clip_embedding(text=None, image=None):
     """
     Compute CLIP embedding for a given text or image.
@@ -219,7 +221,7 @@ vae = load_vae(vae_dir)
 unet = load_unet(unet_dir)
 scheduler = load_scheduler(scheduler_dir)
 patchifier = SymmetricPatchifier(patch_size=1)
-text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(device)
 tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
 pipeline = XoraVideoPipeline(
@@ -229,7 +231,7 @@ pipeline = XoraVideoPipeline(
     tokenizer=tokenizer,
     scheduler=scheduler,
     vae=vae,
-).to(device)
 @spaces.GPU(duration=90)  # Dynamic duration
 def generate_video_from_text_90(
@@ -320,7 +322,7 @@ def generate_video_from_image_90(
     frame_rate=20,
     seed=random.randint(0, MAX_SEED),
     num_inference_steps=35,
-    guidance_scale=3.2,
     height=768,
     width=768,
     num_frames=60,
@@ -357,7 +359,7 @@ def generate_video_from_image_90(
         "media_items": media_items,
     }
-    generator = torch.Generator(device="cpu").manual_seed(seed)
     def gradio_progress_callback(self, step, timestep, kwargs):
         progress((step + 1) / num_inference_steps)
@@ -395,18 +397,16 @@ def generate_video_from_image_90(
             f"An error occurred while generating the video. Please try again. Error: {e}",
             duration=5,
         )
     finally:
         torch.cuda.empty_cache()
         gc.collect()
     return output_path
 def create_advanced_options():
     with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
         seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
         inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
-        guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=3.2)
         height_slider = gr.Slider(
             label="4.4 Height",
@@ -443,7 +443,7 @@ def create_advanced_options():
         ]
 # Define the Gradio interface with tabs
-with gr.Blocks(theme=gr.themes.Soft()) as iface:
     with gr.Row(elem_id="title-row"):
         gr.Markdown(
             """
@@ -700,4 +700,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
     )
 if __name__ == "__main__":
-    iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)

 from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
 from transformers import T5EncoderModel, T5Tokenizer
 from xora.utils.conditioning_method import ConditioningMethod
 from pathlib import Path
 import safetensors.torch
 import json
 import csv
 from datetime import datetime
 from openai import OpenAI
+#from gradio import themes
 torch.backends.cuda.matmul.allow_tf32 = False
 torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 # Set model download directory within Hugging Face Spaces
 model_path = "asset"
+commit_hash='c7c8ad4c2ddba847b94e8bfaefbd30bd8669fafc'
 if not os.path.exists(model_path):
+    snapshot_download("Lightricks/LTX-Video", revision=commit_hash, local_dir=model_path, repo_type="model", token=hf_token)
 # Global variables to load components
 vae_dir = Path(model_path) / "vae"
 request_log = []
+clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(torch.device("cuda:0"))
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
 def compute_clip_embedding(text=None, image=None):
     """
     Compute CLIP embedding for a given text or image.
 unet = load_unet(unet_dir)
 scheduler = load_scheduler(scheduler_dir)
 patchifier = SymmetricPatchifier(patch_size=1)
+text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(torch.device("cuda:0"))
 tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
 pipeline = XoraVideoPipeline(
     tokenizer=tokenizer,
     scheduler=scheduler,
     vae=vae,
+).to(torch.device("cuda:0"))
 @spaces.GPU(duration=90)  # Dynamic duration
 def generate_video_from_text_90(
     frame_rate=20,
     seed=random.randint(0, MAX_SEED),
     num_inference_steps=35,
+    guidance_scale=4.2,
     height=768,
     width=768,
     num_frames=60,
         "media_items": media_items,
     }
+    generator = torch.Generator(device="cuda").manual_seed(seed)
     def gradio_progress_callback(self, step, timestep, kwargs):
         progress((step + 1) / num_inference_steps)
             f"An error occurred while generating the video. Please try again. Error: {e}",
             duration=5,
         )
     finally:
         torch.cuda.empty_cache()
         gc.collect()
     return output_path
 def create_advanced_options():
     with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
         seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
         inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
+        guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.2)
         height_slider = gr.Slider(
             label="4.4 Height",
         ]
 # Define the Gradio interface with tabs
+with gr.Blocks(theme=gr.themes.Origin()) as iface:
     with gr.Row(elem_id="title-row"):
         gr.Markdown(
             """
     )
 if __name__ == "__main__":
+    iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)