tsqn commited on
Commit
a45ef62
1 Parent(s): 8fecf84

feature: deploying space with quantified Latte-1 model for lowVram cards.

Browse files
Files changed (3) hide show
  1. app.py +149 -4
  2. package.txt +1 -0
  3. requirements.txt +39 -0
app.py CHANGED
@@ -1,7 +1,152 @@
 
 
 
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gc
2
+ import os
3
+ import torch
4
  import gradio as gr
5
+ from diffusers import LattePipeline
6
+ from transformers import T5EncoderModel, BitsAndBytesConfig
7
+ import imageio
8
+ from torchvision.utils import save_image
9
 
10
+ def flush():
11
+ gc.collect()
12
+ torch.cuda.empty_cache()
13
 
14
+ def bytes_to_giga_bytes(bytes):
15
+ return bytes / 1024 / 1024 / 1024
16
+
17
+ def initialize_pipeline():
18
+ model_id = "maxin-cn/Latte-1"
19
+
20
+ text_encoder = T5EncoderModel.from_pretrained(
21
+ model_id,
22
+ subfolder="text_encoder",
23
+ quantization_config=BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16),
24
+ device_map="auto",
25
+ )
26
+
27
+ pipe = LattePipeline.from_pretrained(
28
+ model_id,
29
+ text_encoder=text_encoder,
30
+ transformer=None,
31
+ device_map="balanced",
32
+ )
33
+ return pipe, text_encoder
34
+
35
+ def generate_video(
36
+ prompt: str,
37
+ negative_prompt: str = "",
38
+ video_length: int = 16,
39
+ num_inference_steps: int = 50,
40
+ progress=gr.Progress()
41
+ ):
42
+ # Set random seed for reproducibility
43
+ torch.manual_seed(0)
44
+
45
+ # Initialize the pipeline
46
+ progress(0, desc="Initializing pipeline...")
47
+ pipe, text_encoder = initialize_pipeline()
48
+
49
+ # Generate prompt embeddings
50
+ progress(0.2, desc="Encoding prompt...")
51
+ with torch.no_grad():
52
+ prompt_embeds, negative_prompt_embeds = pipe.encode_prompt(
53
+ prompt,
54
+ negative_prompt=negative_prompt
55
+ )
56
+
57
+ # Clean up first pipeline
58
+ progress(0.3, desc="Cleaning up...")
59
+ del text_encoder
60
+ del pipe
61
+ flush()
62
+
63
+ # Initialize the second pipeline
64
+ progress(0.4, desc="Initializing generation pipeline...")
65
+ pipe = LattePipeline.from_pretrained(
66
+ "maxin-cn/Latte-1",
67
+ text_encoder=None,
68
+ torch_dtype=torch.float16,
69
+ ).to("cuda")
70
+
71
+ # Generate video
72
+ progress(0.5, desc="Generating video...")
73
+ videos = pipe(
74
+ video_length=video_length,
75
+ num_inference_steps=num_inference_steps,
76
+ negative_prompt=None,
77
+ prompt_embeds=prompt_embeds,
78
+ negative_prompt_embeds=negative_prompt_embeds,
79
+ output_type="pt",
80
+ ).frames.cpu()
81
+
82
+ progress(0.8, desc="Post-processing...")
83
+ # Convert to video format
84
+ videos = (videos.clamp(0, 1) * 255).to(dtype=torch.uint8)
85
+
86
+ # Save temporary file
87
+ temp_output = "temp_output.mp4"
88
+ imageio.mimwrite(
89
+ temp_output,
90
+ videos[0].permute(0, 2, 3, 1),
91
+ fps=8,
92
+ quality=5
93
+ )
94
+
95
+ # Clean up
96
+ progress(0.9, desc="Cleaning up...")
97
+ del pipe
98
+ flush()
99
+
100
+ progress(1.0, desc="Done!")
101
+ return temp_output
102
+
103
+ def create_demo():
104
+ with gr.Blocks() as demo:
105
+ gr.Markdown("""
106
+ # Latte Video Generation
107
+ Generate short videos using the Latte-1 model.
108
+ """)
109
+
110
+ with gr.Row():
111
+ with gr.Column():
112
+ prompt = gr.Textbox(
113
+ label="Prompt",
114
+ value="a cat wearing sunglasses and working as a lifeguard at pool.",
115
+ info="Describe what you want to generate"
116
+ )
117
+ negative_prompt = gr.Textbox(
118
+ label="Negative Prompt",
119
+ value="",
120
+ info="What you don't want to see in the generation"
121
+ )
122
+ video_length = gr.Slider(
123
+ minimum=8,
124
+ maximum=32,
125
+ step=8,
126
+ value=16,
127
+ label="Video Length (frames)"
128
+ )
129
+ steps = gr.Slider(
130
+ minimum=20,
131
+ maximum=100,
132
+ step=10,
133
+ value=50,
134
+ label="Number of Inference Steps"
135
+ )
136
+ generate_btn = gr.Button("Generate Video")
137
+
138
+ with gr.Column():
139
+ output_video = gr.Video(label="Generated Video")
140
+
141
+ generate_btn.click(
142
+ fn=generate_video,
143
+ inputs=[prompt, negative_prompt, video_length, steps],
144
+ outputs=output_video
145
+ )
146
+
147
+ return demo
148
+
149
+ if __name__ == "__main__":
150
+ demo = create_demo()
151
+ demo.queue()
152
+ demo.launch(share=False)
package.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -U --extra-index-url https://download.pytorch.org/whl/cu118
2
+ torch
3
+ torchvision
4
+ torchaudio
5
+ timm
6
+ pytorch-cuda>=11.8
7
+ diffusers[torch]
8
+ cmake
9
+ ninja
10
+ accelerate
11
+ tensorboard
12
+ pillow
13
+ einops
14
+ transformers
15
+ av
16
+ scikit-image
17
+ decord
18
+ pandas
19
+ imageio
20
+ imageio-ffmpeg
21
+ sentencepiece
22
+ beautifulsoup4
23
+ ftfy
24
+ omegaconf
25
+ gradio
26
+ imageio
27
+ imageio-ffmpeg
28
+ bitsandbytes
29
+ xformers
30
+ setuptools
31
+ pip
32
+ wheel
33
+ triton
34
+ spaces
35
+ huggingface-hub
36
+ numpy
37
+ matplotlib
38
+ lit
39
+ pybind11