File size: 4,629 Bytes
cac2b0c
4bd6d0e
 
 
526c5d8
cac2b0c
 
629f20f
2e7920c
 
 
 
 
 
 
 
 
 
629f20f
 
 
 
cac2b0c
 
 
 
2e7920c
 
 
 
 
483dce0
cac2b0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483dce0
cac2b0c
 
 
 
ce01e8f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from diffusers import DiffusionPipeline
from .invert import Inverter
from .generate import Generator
from .utils import init_model, seed_everything, get_frame_ids
import torch

class VidToMePipeline(DiffusionPipeline):
    # def __init__(self, device="cuda", sd_version="2.1", float_precision="fp16", height=512, width=512):
        # # this will initlize the core pipeline components
        # pipe, scheduler, model_key = init_model(device, sd_version, None, "none", float_precision)
        # self.pipe = pipe
        # self.scheduler = scheduler
        # self.model_key = model_key
        # self.device = device
        # self.sd_version = sd_version
        # self.float_precision = float_precision
        # self.height = height
        # self.width = width

    def __init__(self, device="cuda", sd_version="1.5", float_precision="fp16", height=512, width=512):
        # Register configuration parameters
        self.register_to_config(device=device, sd_version=sd_version, float_precision=float_precision, height=height, width=width)
        self.sd_version = sd_version
        self.float_precision = float_precision
        self.height = height
        self.width = width
        # this will initlize the core pipeline components
        pipe, scheduler, model_key = init_model(device, sd_version, None, "none", float_precision)
        self.pipe = pipe
        self.scheduler = scheduler
        self.model_key = model_key
        super().__init__()

    def __call__(self, video_path=None, video_prompt=None, edit_prompt=None, 
                 control_type="none", n_timesteps=50, guidance_scale=7.5, 
                 negative_prompt="ugly, blurry, low res", frame_range=None, 
                 use_lora=False, seed=123, local_merge_ratio=0.9, global_merge_ratio=0.8):
        
        # dynamic config built from user inputs
        config = self._build_config(video_path, video_prompt, edit_prompt, control_type, 
                                    n_timesteps, guidance_scale, negative_prompt, 
                                    frame_range, use_lora, seed, local_merge_ratio, global_merge_ratio)
        
        # seed for reproducibility - change as you need
        seed_everything(config['seed'])

        # inversion stage
        print("Start inversion!")
        inversion = Inverter(self.pipe, self.scheduler, config)
        inversion(config['input_path'], config['inversion']['save_path'])

        # generation stage
        print("Start generation!")
        generator = Generator(self.pipe, self.scheduler, config)
        frame_ids = get_frame_ids(config['generation']['frame_range'], None)
        generator(config['input_path'], config['generation']['latents_path'], 
                  config['generation']['output_path'], frame_ids=frame_ids)
        print(f"Output generated at: {config['generation']['output_path']}")

    def _build_config(self, video_path, video_prompt, edit_prompt, control_type, 
                      n_timesteps, guidance_scale, negative_prompt, frame_range, 
                      use_lora, seed, local_merge_ratio, global_merge_ratio):
        # constructing config dictionary from user prompts
        config = {
            'sd_version': self.sd_version,
            'input_path': video_path,
            'work_dir': "outputs/",
            'height': self.height,
            'width': self.width,
            'inversion': {
                'prompt': video_prompt or "Default video prompt.",
                'save_path': "outputs/latents",
                'steps': 50,
                'save_intermediate': False
            },
            'generation': {
                'control': control_type,
                'guidance_scale': guidance_scale,
                'n_timesteps': n_timesteps,
                'negative_prompt': negative_prompt,
                'prompt': edit_prompt or "Default edit prompt.",
                'latents_path': "outputs/latents",
                'output_path': "outputs/final",
                'frame_range': frame_range or [0, 32],
                'use_lora': use_lora,
                'local_merge_ratio': local_merge_ratio,
                'global_merge_ratio': global_merge_ratio
            },
            'seed': seed,
            'device': "cuda",
            'float_precision': self.float_precision
        }
        return config

# # Sample usage
# pipeline = VidToMePipeline(device="cuda", sd_version="2.1", float_precision="fp16")
# pipeline(video_path="path/to/video.mp4", video_prompt="A beautiful scene of a sunset", 
#          edit_prompt="Make the sunset look more vibrant", control_type="depth", n_timesteps=50)