Spaces:
Running
Running
AmitIsraeli
commited on
Commit
•
c2ee1c9
1
Parent(s):
29b74cb
remove video
Browse files
app.py
CHANGED
@@ -4,14 +4,9 @@ import torch.nn as nn
|
|
4 |
import torch.nn.functional as F
|
5 |
from transformers import AutoTokenizer, SiglipTextModel
|
6 |
from peft import LoraConfig, get_peft_model
|
7 |
-
import random
|
8 |
from torchvision.transforms import ToPILImage
|
9 |
-
import numpy as np
|
10 |
-
from moviepy.editor import ImageSequenceClip
|
11 |
import random
|
12 |
import gradio as gr
|
13 |
-
import tempfile
|
14 |
-
import os
|
15 |
|
16 |
class SimpleAdapter(nn.Module):
|
17 |
def __init__(self, input_dim=512, hidden_dim=1024, out_dim=1024):
|
@@ -117,35 +112,6 @@ class InrenceTextVAR(nn.Module):
|
|
117 |
image = ToPILImage()(generated_images[0].cpu())
|
118 |
return image
|
119 |
|
120 |
-
@torch.no_grad()
|
121 |
-
def generate_video(self, text, start_beta, target_beta, fps, length, top_k=0, top_p=0.9, seed=None,
|
122 |
-
more_smooth=False,
|
123 |
-
output_filename='output_video.mp4'):
|
124 |
-
|
125 |
-
if seed is None:
|
126 |
-
seed = random.randint(0, 2 ** 32 - 1)
|
127 |
-
|
128 |
-
num_frames = int(fps * length)
|
129 |
-
images = []
|
130 |
-
|
131 |
-
# Define an easing function for smoother interpolation
|
132 |
-
def ease_in_out(t):
|
133 |
-
return t * t * (3 - 2 * t)
|
134 |
-
|
135 |
-
# Generate t values between 0 and 1
|
136 |
-
t_values = np.linspace(0, 1, num_frames)
|
137 |
-
# Apply the easing function
|
138 |
-
eased_t_values = ease_in_out(t_values)
|
139 |
-
# Interpolate beta values using the eased t values
|
140 |
-
beta_values = start_beta + (target_beta - start_beta) * eased_t_values
|
141 |
-
|
142 |
-
for beta in beta_values:
|
143 |
-
image = self.generate_image(text, beta=beta, seed=seed, more_smooth=more_smooth, top_k=top_k, top_p=top_p)
|
144 |
-
images.append(np.array(image))
|
145 |
-
|
146 |
-
# Create a video from images
|
147 |
-
clip = ImageSequenceClip(images, fps=fps)
|
148 |
-
clip.write_videofile(output_filename, codec='libx264')
|
149 |
|
150 |
if __name__ == '__main__':
|
151 |
|
@@ -167,37 +133,6 @@ if __name__ == '__main__':
|
|
167 |
image = model.generate_image(text, beta=beta, seed=seed, more_smooth=more_smooth, top_k=int(top_k), top_p=top_p)
|
168 |
return image
|
169 |
|
170 |
-
def generate_video_gradio(text, start_beta=1.0, target_beta=1.0, fps=10, length=5.0, top_k=0, top_p=0.9, seed=None, more_smooth=False, progress=gr.Progress()):
|
171 |
-
print(f"Generating video for text: {text}\n"
|
172 |
-
f"start_beta: {start_beta}\n"
|
173 |
-
f"target_beta: {target_beta}\n"
|
174 |
-
f"seed: {seed}\n"
|
175 |
-
f"more_smooth: {more_smooth}\n"
|
176 |
-
f"top_k: {top_k}\n"
|
177 |
-
f"top_p: {top_p}"
|
178 |
-
f"fps: {fps}\n"
|
179 |
-
f"length: {length}\n")
|
180 |
-
with tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) as tmpfile:
|
181 |
-
output_filename = tmpfile.name
|
182 |
-
num_frames = int(fps * length)
|
183 |
-
beta_values = np.linspace(start_beta, target_beta, num_frames)
|
184 |
-
images = []
|
185 |
-
|
186 |
-
for i, beta in enumerate(beta_values):
|
187 |
-
image = model.generate_image(text, beta=beta, seed=seed, more_smooth=more_smooth, top_k=top_k, top_p=top_p)
|
188 |
-
images.append(np.array(image))
|
189 |
-
# Update progress
|
190 |
-
progress((i + 1) / num_frames)
|
191 |
-
# Yield the frame image to update the GUI
|
192 |
-
yield image, gr.update()
|
193 |
-
|
194 |
-
# After generating all frames, create the video
|
195 |
-
clip = ImageSequenceClip(images, fps=fps)
|
196 |
-
clip.write_videofile(output_filename, codec='libx264')
|
197 |
-
|
198 |
-
# Yield the final video output
|
199 |
-
yield gr.update(), output_filename
|
200 |
-
|
201 |
with gr.Blocks() as demo:
|
202 |
gr.Markdown("# Text to Image/Video Generator")
|
203 |
with gr.Tab("Generate Image"):
|
@@ -215,25 +150,4 @@ if __name__ == '__main__':
|
|
215 |
outputs=image_output
|
216 |
)
|
217 |
|
218 |
-
with gr.Tab("Generate Video"):
|
219 |
-
text_input_video = gr.Textbox(label="Input Text")
|
220 |
-
start_beta_input = gr.Slider(label="Start Beta", minimum=0.0, maximum=2.5, step=0.05, value=0)
|
221 |
-
target_beta_input = gr.Slider(label="Target Beta",minimum=0.0, maximum=2.5, step=0.05, value=1.0)
|
222 |
-
fps_input = gr.Number(label="FPS", value=10)
|
223 |
-
length_input = gr.Number(label="Length (seconds)", value=5.0)
|
224 |
-
seed_input_video = gr.Number(label="Seed", value=None)
|
225 |
-
more_smooth_input_video = gr.Checkbox(label="More Smooth", value=False)
|
226 |
-
top_k_input_video = gr.Number(label="Top K", value=0)
|
227 |
-
top_p_input_video = gr.Slider(label="Top P", minimum=0.0, maximum=1.0, step=0.01, value=0.9)
|
228 |
-
generate_video_button = gr.Button("Generate Video")
|
229 |
-
frame_output = gr.Image(label="Current Frame")
|
230 |
-
video_output = gr.Video(label="Generated Video")
|
231 |
-
|
232 |
-
generate_video_button.click(
|
233 |
-
generate_video_gradio,
|
234 |
-
inputs=[text_input_video, start_beta_input, target_beta_input, fps_input, length_input, top_k_input_video, top_p_input_video, seed_input_video, more_smooth_input_video],
|
235 |
-
outputs=[frame_output, video_output],
|
236 |
-
queue=True # Enable queuing to allow for progress updates
|
237 |
-
)
|
238 |
-
|
239 |
demo.launch()
|
|
|
4 |
import torch.nn.functional as F
|
5 |
from transformers import AutoTokenizer, SiglipTextModel
|
6 |
from peft import LoraConfig, get_peft_model
|
|
|
7 |
from torchvision.transforms import ToPILImage
|
|
|
|
|
8 |
import random
|
9 |
import gradio as gr
|
|
|
|
|
10 |
|
11 |
class SimpleAdapter(nn.Module):
|
12 |
def __init__(self, input_dim=512, hidden_dim=1024, out_dim=1024):
|
|
|
112 |
image = ToPILImage()(generated_images[0].cpu())
|
113 |
return image
|
114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
if __name__ == '__main__':
|
117 |
|
|
|
133 |
image = model.generate_image(text, beta=beta, seed=seed, more_smooth=more_smooth, top_k=int(top_k), top_p=top_p)
|
134 |
return image
|
135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
with gr.Blocks() as demo:
|
137 |
gr.Markdown("# Text to Image/Video Generator")
|
138 |
with gr.Tab("Generate Image"):
|
|
|
150 |
outputs=image_output
|
151 |
)
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
demo.launch()
|