File size: 4,478 Bytes
2b86658
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import gradio as gr
from model import Model
import os
on_huggingspace = os.environ.get("SPACE_AUTHOR_NAME") == "PAIR"


def create_demo(model: Model):

    examples = [
        ["__assets__/depth_videos_depth/girl_dancing.mp4",
            "A stormtrooper, masterpiece, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/girl_dancing.mp4",
            "Oil painting of a catwoman, masterpiece, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/girl_dancing.mp4",
            "Oil painting of a girl dancing closed eyes, masterpiece, a high-quality, detailed, and professional photo"],

        ["__assets__/depth_videos_depth/woman.mp4",
            "A robot is dancing in the Sahara desert, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/woman.mp4",
            "Wonder woman is dancing, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/woman.mp4",
            "Oil painting of a girl dancing close-up, masterpiece, a high-quality, detailed, and professional photo"],

        ["__assets__/depth_videos_depth/man.mp4",
            "An astronaut is Dancing in space, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/man.mp4",
            "Iron Man is dancing, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/man.mp4",
            "Spiderman is Dancing, a high-quality, detailed, and professional photo"],

        ["__assets__/depth_videos_depth/halloween.mp4",
            "Beautiful blonde girl, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/halloween.mp4",
            "Beautiful brunette girl, a high-quality, detailed, and professional photo"],
        ["__assets__/depth_videos_depth/halloween.mp4",
            "Beautiful red-haired girl, a high-quality, detailed, and professional photo"],
    ]

    with gr.Blocks() as demo:
        with gr.Row():
            gr.Markdown('## Text and Depth Conditional Video Generation')
        with gr.Row():
            gr.HTML(
                """
                <div style="text-align: left; auto;">
                <h2 style="font-weight: 450; font-size: 1rem; margin: 0rem">
                    Description: For performance purposes, our current preview release supports any input videos but caps output videos after 80 frames and the input videos are scaled down before processing.
                </h3>
                </div>
                """)

        with gr.Row():
            with gr.Column():
                input_video = gr.Video(
                    label="Input Video", source='upload', format="mp4", visible=True).style(height="auto")
            with gr.Column():
                prompt = gr.Textbox(label='Prompt')
                run_button = gr.Button(label='Run')
                with gr.Accordion('Advanced options', open=False):
                    watermark = gr.Radio(["Picsart AI Research", "Text2Video-Zero",
                                         "None"], label="Watermark", value='Picsart AI Research')
                    chunk_size = gr.Slider(
                        label="Chunk size", minimum=2, maximum=16, value=2, step=1, visible=not on_huggingspace,
                        info="Number of frames processed at once. Reduce for lower memory usage.")
                    merging_ratio = gr.Slider(
                        label="Merging ratio", minimum=0.0, maximum=0.9, step=0.1, value=0.0, visible=not on_huggingspace,
                        info="Ratio of how many tokens are merged. The higher the more compression (less memory and faster inference).")
            with gr.Column():
                result = gr.Video(label="Generated Video").style(height="auto")

        inputs = [
            input_video,
            prompt,
            chunk_size,
            watermark,
            merging_ratio,
        ]

        gr.Examples(examples=examples,
                    inputs=inputs,
                    outputs=result,
                    fn=model.process_controlnet_depth,
                    # cache_examples=on_huggingspace,
                    cache_examples=False,
                    run_on_click=False,
                    )

        run_button.click(fn=model.process_controlnet_depth,
                         inputs=inputs,
                         outputs=result,)
    return demo