nruto commited on
Commit
9874f2a
·
verified ·
1 Parent(s): 5501de1

Upload 4 files

Browse files
demos/api_example.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python
2
+ from textwrap import dedent
3
+ from genmo.mochi_preview.pipelines import (
4
+ DecoderModelFactory,
5
+ DitModelFactory,
6
+ MochiSingleGPUPipeline,
7
+ T5ModelFactory,
8
+ linear_quadratic_schedule,
9
+ )
10
+ from genmo.lib.utils import save_video
11
+ from genmo.lib.progress import progress_bar
12
+ from pathlib import Path
13
+ import sys
14
+
15
+ MOCHI_DIR = sys.argv[1]
16
+ assert Path(MOCHI_DIR).exists(), f"Model directory {MOCHI_DIR} does not exist."
17
+ pipeline = MochiSingleGPUPipeline(
18
+ text_encoder_factory=T5ModelFactory(),
19
+ dit_factory=DitModelFactory(model_path=f"{MOCHI_DIR}/dit.safetensors", model_dtype="bf16"),
20
+ decoder_factory=DecoderModelFactory(
21
+ model_path=f"{MOCHI_DIR}/vae.safetensors",
22
+ model_stats_path=f"{MOCHI_DIR}/vae_stats.json",
23
+ ),
24
+ cpu_offload=True,
25
+ decode_type="tiled_full"
26
+ )
27
+
28
+ PROMPT = dedent("""
29
+ A hand with delicate fingers picks up a bright yellow lemon from a wooden bowl
30
+ filled with lemons and sprigs of mint against a peach-colored background.
31
+ The hand gently tosses the lemon up and catches it, showcasing its smooth texture.
32
+ A beige string bag sits beside the bowl, adding a rustic touch to the scene.
33
+ Additional lemons, one halved, are scattered around the base of the bowl.
34
+ The even lighting enhances the vibrant colors and creates a fresh,
35
+ inviting atmosphere.
36
+ """)
37
+
38
+ video = pipeline(
39
+ height=480,
40
+ width=848,
41
+ num_frames=31,
42
+ num_inference_steps=64,
43
+ sigma_schedule=linear_quadratic_schedule(64, 0.025),
44
+ cfg_schedule=[4.5] * 64,
45
+ batch_cfg=False,
46
+ prompt=PROMPT,
47
+ negative_prompt="",
48
+ seed=12345,
49
+ )
50
+
51
+ with progress_bar(type="tqdm"):
52
+ save_video(video[0], "video.mp4")
demos/cli.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python
2
+ import json
3
+ import os
4
+ import time
5
+
6
+ import click
7
+ import numpy as np
8
+ import torch
9
+
10
+ from genmo.mochi_preview.pipelines import (
11
+ DecoderModelFactory,
12
+ DitModelFactory,
13
+ MochiMultiGPUPipeline,
14
+ MochiSingleGPUPipeline,
15
+ T5ModelFactory,
16
+ linear_quadratic_schedule,
17
+ )
18
+ from genmo.lib.progress import progress_bar
19
+ from genmo.lib.utils import save_video
20
+
21
+ pipeline = None
22
+ model_dir_path = None
23
+ num_gpus = torch.cuda.device_count()
24
+ cpu_offload = False
25
+
26
+
27
+ def configure_model(model_dir_path_, cpu_offload_):
28
+ global model_dir_path, cpu_offload
29
+ model_dir_path = model_dir_path_
30
+ cpu_offload = cpu_offload_
31
+
32
+
33
+ def load_model():
34
+ global num_gpus, pipeline, model_dir_path
35
+ if pipeline is None:
36
+ MOCHI_DIR = model_dir_path
37
+ print(f"Launching with {num_gpus} GPUs. If you want to force single GPU mode use CUDA_VISIBLE_DEVICES=0.")
38
+ klass = MochiSingleGPUPipeline if num_gpus == 1 else MochiMultiGPUPipeline
39
+ kwargs = dict(
40
+ text_encoder_factory=T5ModelFactory(),
41
+ dit_factory=DitModelFactory(model_path=f"{MOCHI_DIR}/dit.safetensors", model_dtype="bf16"),
42
+ decoder_factory=DecoderModelFactory(
43
+ model_path=f"{MOCHI_DIR}/vae.safetensors",
44
+ model_stats_path=f"{MOCHI_DIR}/vae_stats.json",
45
+ ),
46
+ )
47
+ if num_gpus > 1:
48
+ assert not cpu_offload, "CPU offload not supported in multi-GPU mode"
49
+ kwargs["world_size"] = num_gpus
50
+ else:
51
+ kwargs["cpu_offload"] = cpu_offload
52
+ kwargs["tiled_decode"] = True
53
+ pipeline = klass(**kwargs)
54
+
55
+
56
+ def generate_video(
57
+ prompt,
58
+ negative_prompt,
59
+ width,
60
+ height,
61
+ num_frames,
62
+ seed,
63
+ cfg_scale,
64
+ num_inference_steps,
65
+ ):
66
+ load_model()
67
+
68
+ # sigma_schedule should be a list of floats of length (num_inference_steps + 1),
69
+ # such that sigma_schedule[0] == 1.0 and sigma_schedule[-1] == 0.0 and monotonically decreasing.
70
+ sigma_schedule = linear_quadratic_schedule(num_inference_steps, 0.025)
71
+
72
+ # cfg_schedule should be a list of floats of length num_inference_steps.
73
+ # For simplicity, we just use the same cfg scale at all timesteps,
74
+ # but more optimal schedules may use varying cfg, e.g:
75
+ # [5.0] * (num_inference_steps // 2) + [4.5] * (num_inference_steps // 2)
76
+ cfg_schedule = [cfg_scale] * num_inference_steps
77
+
78
+ args = {
79
+ "height": height,
80
+ "width": width,
81
+ "num_frames": num_frames,
82
+ "sigma_schedule": sigma_schedule,
83
+ "cfg_schedule": cfg_schedule,
84
+ "num_inference_steps": num_inference_steps,
85
+ # We *need* flash attention to batch cfg
86
+ # and it's only worth doing in a high-memory regime (assume multiple GPUs)
87
+ "batch_cfg": False,
88
+ "prompt": prompt,
89
+ "negative_prompt": negative_prompt,
90
+ "seed": seed,
91
+ }
92
+
93
+ with progress_bar(type="tqdm"):
94
+ final_frames = pipeline(**args)
95
+
96
+ final_frames = final_frames[0]
97
+
98
+ assert isinstance(final_frames, np.ndarray)
99
+ assert final_frames.dtype == np.float32
100
+
101
+ os.makedirs("outputs", exist_ok=True)
102
+ output_path = os.path.join("outputs", f"output_{int(time.time())}.mp4")
103
+
104
+
105
+ save_video(final_frames, output_path)
106
+ json_path = os.path.splitext(output_path)[0] + ".json"
107
+ json.dump(args, open(json_path, "w"), indent=4)
108
+
109
+ return output_path
110
+
111
+ from textwrap import dedent
112
+
113
+ DEFAULT_PROMPT = dedent("""
114
+ A hand with delicate fingers picks up a bright yellow lemon from a wooden bowl
115
+ filled with lemons and sprigs of mint against a peach-colored background.
116
+ The hand gently tosses the lemon up and catches it, showcasing its smooth texture.
117
+ A beige string bag sits beside the bowl, adding a rustic touch to the scene.
118
+ Additional lemons, one halved, are scattered around the base of the bowl.
119
+ The even lighting enhances the vibrant colors and creates a fresh,
120
+ inviting atmosphere.
121
+ """)
122
+
123
+ @click.command()
124
+ @click.option("--prompt", default=DEFAULT_PROMPT, help="Prompt for video generation.")
125
+ @click.option("--negative_prompt", default="", help="Negative prompt for video generation.")
126
+ @click.option("--width", default=848, type=int, help="Width of the video.")
127
+ @click.option("--height", default=480, type=int, help="Height of the video.")
128
+ @click.option("--num_frames", default=163, type=int, help="Number of frames.")
129
+ @click.option("--seed", default=12345, type=int, help="Random seed.")
130
+ @click.option("--cfg_scale", default=4.5, type=float, help="CFG Scale.")
131
+ @click.option("--num_steps", default=64, type=int, help="Number of inference steps.")
132
+ @click.option("--model_dir", required=True, help="Path to the model directory.")
133
+ @click.option("--cpu_offload", is_flag=True, help="Whether to offload model to CPU")
134
+ def generate_cli(
135
+ prompt, negative_prompt, width, height, num_frames, seed, cfg_scale, num_steps, model_dir, cpu_offload
136
+ ):
137
+ configure_model(model_dir, cpu_offload)
138
+ output = generate_video(
139
+ prompt,
140
+ negative_prompt,
141
+ width,
142
+ height,
143
+ num_frames,
144
+ seed,
145
+ cfg_scale,
146
+ num_steps,
147
+ )
148
+ click.echo(f"Video generated at: {output}")
149
+
150
+
151
+ if __name__ == "__main__":
152
+ generate_cli()
demos/comfyui_nodes.py ADDED
File without changes
demos/gradio_ui.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /usr/bin/env python
2
+
3
+
4
+ import click
5
+ import gradio as gr
6
+
7
+ import sys
8
+ sys.path.append("..")
9
+ from cli import generate_video, configure_model
10
+
11
+ with gr.Blocks() as demo:
12
+ gr.Markdown("Video Generator")
13
+ with gr.Row():
14
+ prompt = gr.Textbox(
15
+ label="Prompt",
16
+ value="A hand with delicate fingers picks up a bright yellow lemon from a wooden bowl filled with lemons and sprigs of mint against a peach-colored background. The hand gently tosses the lemon up and catches it, showcasing its smooth texture. A beige string bag sits beside the bowl, adding a rustic touch to the scene. Additional lemons, one halved, are scattered around the base of the bowl. The even lighting enhances the vibrant colors and creates a fresh, inviting atmosphere.",
17
+ )
18
+ negative_prompt = gr.Textbox(label="Negative Prompt", value="")
19
+ seed = gr.Number(label="Seed", value=1710977262, precision=0)
20
+ with gr.Row():
21
+ width = gr.Number(label="Width", value=848, precision=0)
22
+ height = gr.Number(label="Height", value=480, precision=0)
23
+ num_frames = gr.Number(label="Number of Frames", value=163, precision=0)
24
+ with gr.Row():
25
+ cfg_scale = gr.Number(label="CFG Scale", value=4.5)
26
+ num_inference_steps = gr.Number(label="Number of Inference Steps", value=200, precision=0)
27
+ btn = gr.Button("Generate Video")
28
+ output = gr.Video()
29
+
30
+ btn.click(
31
+ generate_video,
32
+ inputs=[
33
+ prompt,
34
+ negative_prompt,
35
+ width,
36
+ height,
37
+ num_frames,
38
+ seed,
39
+ cfg_scale,
40
+ num_inference_steps,
41
+ ],
42
+ outputs=output,
43
+ )
44
+
45
+
46
+ @click.command()
47
+ @click.option("--model_dir", required=True, help="Path to the model directory.")
48
+ @click.option("--cpu_offload", is_flag=True, help="Whether to offload model to CPU")
49
+ def launch(model_dir, cpu_offload):
50
+ configure_model(model_dir, cpu_offload)
51
+ demo.launch()
52
+
53
+
54
+ if __name__ == "__main__":
55
+ launch()