Text2Video-Zero / app_depth.py
jmjm69's picture
Duplicate from PAIR/Text2Video-Zero
2b86658
import gradio as gr
from model import Model
import os
on_huggingspace = os.environ.get("SPACE_AUTHOR_NAME") == "PAIR"
def create_demo(model: Model):
examples = [
["__assets__/depth_videos_depth/girl_dancing.mp4",
"A stormtrooper, masterpiece, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/girl_dancing.mp4",
"Oil painting of a catwoman, masterpiece, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/girl_dancing.mp4",
"Oil painting of a girl dancing closed eyes, masterpiece, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/woman.mp4",
"A robot is dancing in the Sahara desert, detailed, and professional photo"],
["__assets__/depth_videos_depth/woman.mp4",
"Wonder woman is dancing, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/woman.mp4",
"Oil painting of a girl dancing close-up, masterpiece, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/man.mp4",
"An astronaut is Dancing in space, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/man.mp4",
"Iron Man is dancing, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/man.mp4",
"Spiderman is Dancing, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/halloween.mp4",
"Beautiful blonde girl, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/halloween.mp4",
"Beautiful brunette girl, a high-quality, detailed, and professional photo"],
["__assets__/depth_videos_depth/halloween.mp4",
"Beautiful red-haired girl, a high-quality, detailed, and professional photo"],
]
with gr.Blocks() as demo:
with gr.Row():
gr.Markdown('## Text and Depth Conditional Video Generation')
with gr.Row():
gr.HTML(
"""
<div style="text-align: left; auto;">
<h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
Description: For performance purposes, our current preview release supports any input videos but caps output videos after 80 frames and the input videos are scaled down before processing.
</h3>
</div>
""")
with gr.Row():
with gr.Column():
input_video = gr.Video(
label="Input Video", source='upload', format="mp4", visible=True).style(height="auto")
with gr.Column():
prompt = gr.Textbox(label='Prompt')
run_button = gr.Button(label='Run')
with gr.Accordion('Advanced options', open=False):
watermark = gr.Radio(["Picsart AI Research", "Text2Video-Zero",
"None"], label="Watermark", value='Picsart AI Research')
chunk_size = gr.Slider(
label="Chunk size", minimum=2, maximum=16, value=2, step=1, visible=not on_huggingspace,
info="Number of frames processed at once. Reduce for lower memory usage.")
merging_ratio = gr.Slider(
label="Merging ratio", minimum=0.0, maximum=0.9, step=0.1, value=0.0, visible=not on_huggingspace,
info="Ratio of how many tokens are merged. The higher the more compression (less memory and faster inference).")
with gr.Column():
result = gr.Video(label="Generated Video").style(height="auto")
inputs = [
input_video,
prompt,
chunk_size,
watermark,
merging_ratio,
]
gr.Examples(examples=examples,
inputs=inputs,
outputs=result,
fn=model.process_controlnet_depth,
# cache_examples=on_huggingspace,
cache_examples=False,
run_on_click=False,
)
run_button.click(fn=model.process_controlnet_depth,
inputs=inputs,
outputs=result,)
return demo