Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,6 @@ from xora.schedulers.rf import RectifiedFlowScheduler
|
|
13 |
from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
|
14 |
from transformers import T5EncoderModel, T5Tokenizer
|
15 |
from xora.utils.conditioning_method import ConditioningMethod
|
16 |
-
|
17 |
from pathlib import Path
|
18 |
import safetensors.torch
|
19 |
import json
|
@@ -26,6 +25,7 @@ import gc
|
|
26 |
import csv
|
27 |
from datetime import datetime
|
28 |
from openai import OpenAI
|
|
|
29 |
|
30 |
torch.backends.cuda.matmul.allow_tf32 = False
|
31 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
@@ -55,8 +55,11 @@ with open(system_prompt_i2v_path, "r") as f:
|
|
55 |
|
56 |
# Set model download directory within Hugging Face Spaces
|
57 |
model_path = "asset"
|
|
|
|
|
|
|
58 |
if not os.path.exists(model_path):
|
59 |
-
snapshot_download("Lightricks/LTX-Video", local_dir=model_path, repo_type="model", token=hf_token)
|
60 |
|
61 |
# Global variables to load components
|
62 |
vae_dir = Path(model_path) / "vae"
|
@@ -67,10 +70,9 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
67 |
|
68 |
request_log = []
|
69 |
|
70 |
-
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(device)
|
71 |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
|
72 |
|
73 |
-
|
74 |
def compute_clip_embedding(text=None, image=None):
|
75 |
"""
|
76 |
Compute CLIP embedding for a given text or image.
|
@@ -219,7 +221,7 @@ vae = load_vae(vae_dir)
|
|
219 |
unet = load_unet(unet_dir)
|
220 |
scheduler = load_scheduler(scheduler_dir)
|
221 |
patchifier = SymmetricPatchifier(patch_size=1)
|
222 |
-
text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(device)
|
223 |
tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
|
224 |
|
225 |
pipeline = XoraVideoPipeline(
|
@@ -229,7 +231,7 @@ pipeline = XoraVideoPipeline(
|
|
229 |
tokenizer=tokenizer,
|
230 |
scheduler=scheduler,
|
231 |
vae=vae,
|
232 |
-
).to(device)
|
233 |
|
234 |
@spaces.GPU(duration=90) # Dynamic duration
|
235 |
def generate_video_from_text_90(
|
@@ -320,7 +322,7 @@ def generate_video_from_image_90(
|
|
320 |
frame_rate=20,
|
321 |
seed=random.randint(0, MAX_SEED),
|
322 |
num_inference_steps=35,
|
323 |
-
guidance_scale=
|
324 |
height=768,
|
325 |
width=768,
|
326 |
num_frames=60,
|
@@ -357,7 +359,7 @@ def generate_video_from_image_90(
|
|
357 |
"media_items": media_items,
|
358 |
}
|
359 |
|
360 |
-
generator = torch.Generator(device="
|
361 |
|
362 |
def gradio_progress_callback(self, step, timestep, kwargs):
|
363 |
progress((step + 1) / num_inference_steps)
|
@@ -395,18 +397,16 @@ def generate_video_from_image_90(
|
|
395 |
f"An error occurred while generating the video. Please try again. Error: {e}",
|
396 |
duration=5,
|
397 |
)
|
398 |
-
|
399 |
finally:
|
400 |
torch.cuda.empty_cache()
|
401 |
gc.collect()
|
402 |
-
|
403 |
return output_path
|
404 |
|
405 |
def create_advanced_options():
|
406 |
with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
|
407 |
seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
|
408 |
inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
|
409 |
-
guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=
|
410 |
|
411 |
height_slider = gr.Slider(
|
412 |
label="4.4 Height",
|
@@ -443,7 +443,7 @@ def create_advanced_options():
|
|
443 |
]
|
444 |
|
445 |
# Define the Gradio interface with tabs
|
446 |
-
with gr.Blocks(theme=gr.themes.
|
447 |
with gr.Row(elem_id="title-row"):
|
448 |
gr.Markdown(
|
449 |
"""
|
@@ -700,4 +700,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as iface:
|
|
700 |
)
|
701 |
|
702 |
if __name__ == "__main__":
|
703 |
-
iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)
|
|
|
13 |
from xora.pipelines.pipeline_xora_video import XoraVideoPipeline
|
14 |
from transformers import T5EncoderModel, T5Tokenizer
|
15 |
from xora.utils.conditioning_method import ConditioningMethod
|
|
|
16 |
from pathlib import Path
|
17 |
import safetensors.torch
|
18 |
import json
|
|
|
25 |
import csv
|
26 |
from datetime import datetime
|
27 |
from openai import OpenAI
|
28 |
+
#from gradio import themes
|
29 |
|
30 |
torch.backends.cuda.matmul.allow_tf32 = False
|
31 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
|
55 |
|
56 |
# Set model download directory within Hugging Face Spaces
|
57 |
model_path = "asset"
|
58 |
+
|
59 |
+
commit_hash='c7c8ad4c2ddba847b94e8bfaefbd30bd8669fafc'
|
60 |
+
|
61 |
if not os.path.exists(model_path):
|
62 |
+
snapshot_download("Lightricks/LTX-Video", revision=commit_hash, local_dir=model_path, repo_type="model", token=hf_token)
|
63 |
|
64 |
# Global variables to load components
|
65 |
vae_dir = Path(model_path) / "vae"
|
|
|
70 |
|
71 |
request_log = []
|
72 |
|
73 |
+
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path).to(torch.device("cuda:0"))
|
74 |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32", cache_dir=model_path)
|
75 |
|
|
|
76 |
def compute_clip_embedding(text=None, image=None):
|
77 |
"""
|
78 |
Compute CLIP embedding for a given text or image.
|
|
|
221 |
unet = load_unet(unet_dir)
|
222 |
scheduler = load_scheduler(scheduler_dir)
|
223 |
patchifier = SymmetricPatchifier(patch_size=1)
|
224 |
+
text_encoder = T5EncoderModel.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="text_encoder").to(torch.device("cuda:0"))
|
225 |
tokenizer = T5Tokenizer.from_pretrained("PixArt-alpha/PixArt-XL-2-1024-MS", subfolder="tokenizer")
|
226 |
|
227 |
pipeline = XoraVideoPipeline(
|
|
|
231 |
tokenizer=tokenizer,
|
232 |
scheduler=scheduler,
|
233 |
vae=vae,
|
234 |
+
).to(torch.device("cuda:0"))
|
235 |
|
236 |
@spaces.GPU(duration=90) # Dynamic duration
|
237 |
def generate_video_from_text_90(
|
|
|
322 |
frame_rate=20,
|
323 |
seed=random.randint(0, MAX_SEED),
|
324 |
num_inference_steps=35,
|
325 |
+
guidance_scale=4.2,
|
326 |
height=768,
|
327 |
width=768,
|
328 |
num_frames=60,
|
|
|
359 |
"media_items": media_items,
|
360 |
}
|
361 |
|
362 |
+
generator = torch.Generator(device="cuda").manual_seed(seed)
|
363 |
|
364 |
def gradio_progress_callback(self, step, timestep, kwargs):
|
365 |
progress((step + 1) / num_inference_steps)
|
|
|
397 |
f"An error occurred while generating the video. Please try again. Error: {e}",
|
398 |
duration=5,
|
399 |
)
|
|
|
400 |
finally:
|
401 |
torch.cuda.empty_cache()
|
402 |
gc.collect()
|
|
|
403 |
return output_path
|
404 |
|
405 |
def create_advanced_options():
|
406 |
with gr.Accordion("Step 4: Advanced Options (Optional)", open=False):
|
407 |
seed = gr.Slider(label="4.1 Seed", minimum=0, maximum=1000000, step=1, value=646373)
|
408 |
inference_steps = gr.Slider(label="4.2 Inference Steps", minimum=5, maximum=150, step=5, value=40)
|
409 |
+
guidance_scale = gr.Slider(label="4.3 Guidance Scale", minimum=1.0, maximum=10.0, step=0.1, value=4.2)
|
410 |
|
411 |
height_slider = gr.Slider(
|
412 |
label="4.4 Height",
|
|
|
443 |
]
|
444 |
|
445 |
# Define the Gradio interface with tabs
|
446 |
+
with gr.Blocks(theme=gr.themes.Origin()) as iface:
|
447 |
with gr.Row(elem_id="title-row"):
|
448 |
gr.Markdown(
|
449 |
"""
|
|
|
700 |
)
|
701 |
|
702 |
if __name__ == "__main__":
|
703 |
+
iface.queue(max_size=64, default_concurrency_limit=1, api_open=False).launch(share=True, show_api=False)
|