File size: 8,628 Bytes
cac2b0c 4bd6d0e 526c5d8 43b1627 cac2b0c 629f20f 2e7920c 629f20f cac2b0c 2e7920c 483dce0 cac2b0c 43b1627 1451d5e 59e9a43 43b1627 59e9a43 cac2b0c 59e9a43 cac2b0c 59e9a43 cac2b0c 59e9a43 cac2b0c 59e9a43 43b1627 3ace9e3 cac2b0c 59e9a43 ce01e8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 |
from diffusers import DiffusionPipeline
from .invert import Inverter
from .generate import Generator
from .utils import init_model, seed_everything, get_frame_ids
import torch
from omegaconf import OmegaConf
class VidToMePipeline(DiffusionPipeline):
# def __init__(self, device="cuda", sd_version="2.1", float_precision="fp16", height=512, width=512):
# # this will initlize the core pipeline components
# pipe, scheduler, model_key = init_model(device, sd_version, None, "none", float_precision)
# self.pipe = pipe
# self.scheduler = scheduler
# self.model_key = model_key
# self.device = device
# self.sd_version = sd_version
# self.float_precision = float_precision
# self.height = height
# self.width = width
def __init__(self, device="cuda", sd_version="1.5", float_precision="fp16", height=512, width=512):
# Register configuration parameters
self.register_to_config(device=device, sd_version=sd_version, float_precision=float_precision, height=height, width=width)
self.sd_version = sd_version
self.float_precision = float_precision
self.height = height
self.width = width
# this will initlize the core pipeline components
pipe, scheduler, model_key = init_model(device, sd_version, None, "none", float_precision)
self.pipe = pipe
self.scheduler = scheduler
self.model_key = model_key
super().__init__()
def __call__(self, video_path=None, video_prompt=None, edit_prompt=None,
control_type="none", n_timesteps=50, guidance_scale=7.5,
negative_prompt="ugly, blurry, low res", frame_range=None,
use_lora=False, seed=123, local_merge_ratio=0.9, global_merge_ratio=0.8):
# dynamic config built from user inputs
config = self._build_config(video_path, video_prompt, edit_prompt, control_type,
n_timesteps, guidance_scale, negative_prompt,
frame_range, use_lora, seed, local_merge_ratio, global_merge_ratio)
# seed for reproducibility - change as you need
seed_everything(config['seed'])
# inversion stage
print("Start inversion!")
inversion = Inverter(self.pipe, self.scheduler, config)
inversion(config['input_path'], config['inversion']['save_path'])
# generation stage
print("Start generation!")
generator = Generator(self.pipe, self.scheduler, config)
frame_ids = get_frame_ids(config['generation']['frame_range'], None)
generator(config['input_path'], config['generation']['latents_path'],
config['generation']['output_path'], frame_ids=frame_ids)
print(f"Output generated at: {config['generation']['output_path']}")
# def _build_config(self, video_path, video_prompt, edit_prompt, control_type,
# n_timesteps, guidance_scale, negative_prompt, frame_range,
# use_lora, seed, local_merge_ratio, global_merge_ratio):
# # constructing config dictionary from user prompts
# config = {
# 'sd_version': self.sd_version,
# 'input_path': video_path,
# 'work_dir': "outputs/",
# 'height': self.height,
# 'width': self.width,
# 'inversion': {
# 'prompt': video_prompt or "Default video prompt.",
# 'save_path': "outputs/latents",
# 'steps': 50,
# 'save_intermediate': False
# },
# 'generation': {
# 'control': control_type,
# 'guidance_scale': guidance_scale,
# 'n_timesteps': n_timesteps,
# 'negative_prompt': negative_prompt,
# 'prompt': edit_prompt or "Default edit prompt.",
# 'latents_path': "outputs/latents",
# 'output_path': "outputs/final",
# 'frame_range': frame_range or [0, 32],
# 'use_lora': use_lora,
# 'local_merge_ratio': local_merge_ratio,
# 'global_merge_ratio': global_merge_ratio
# },
# 'seed': seed,
# 'device': "cuda",
# 'float_precision': self.float_precision
# }
# return config
from omegaconf import OmegaConf
def _build_config(self, video_path, video_prompt, edit_prompt, control_type,
n_timesteps, guidance_scale, negative_prompt, frame_range,
use_lora, seed, local_merge_ratio, global_merge_ratio):
# Build config using OmegaConf, abstracting as much as possible
config = OmegaConf.create({
'sd_version': self.sd_version, # Default sd_version
'model_key': self.model_key or None, # Optionally allow model_key to be None
'input_path': video_path, # Path to the video
'work_dir': "workdir", # Default workdir, can be abstracted further
'height': self.height,
'width': self.width,
'inversion': {
'save_path': "${work_dir}/latents", # Save latents during inversion
'prompt': video_prompt or "Default video prompt.",
'n_frames': None, # None to invert all frames
'steps': 50, # Default inversion steps
'save_intermediate': False, # Default, but can be abstracted to user
'save_steps': 50, # Default
'use_blip': False, # Abstract BLIP prompt creation
'recon': False, # Reconstruct the input video from latents
'control': control_type or "none", # Default to 'none', can use 'tile', 'softedge', etc.
'control_scale': 1.0, # Default control scale
'batch_size': 8, # Default batch size for inversion
'force': False, # Default, force inversion even if latents exist
},
'generation': {
'control': "pnp", # Default to Plug-and-Play for generation control
'pnp_attn_t': 0.5, # PnP args
'pnp_f_t': 0.8, # PnP args
'control_scale': 1.0, # Scale for ControlNet-like controls
'guidance_scale': guidance_scale, # Guidance scale for CFG
'n_timesteps': n_timesteps, # Number of diffusion timesteps
'negative_prompt': negative_prompt or "ugly, blurry, low res", # Negative prompt to avoid undesired generations
'prompt': edit_prompt or None, # Edit prompt during generation
'latents_path': "${work_dir}/latents", # Latents path from inversion
'output_path': "${work_dir}", # Output directory for final images
'chunk_size': 4, # Number of frames processed per chunk
'chunk_ord': "mix-4", # Processing order for video chunks
'local_merge_ratio': local_merge_ratio, # Merge ratio for blending
'merge_global': True, # Enable global merging
'global_merge_ratio': global_merge_ratio, # Global merge ratio
'global_rand': 0.5, # Randomness in global merge
'align_batch': True, # Align batch processing
'frame_range': frame_range or [0, 32, 1], # Default frame range
'frame_ids': None, # Specify frame IDs to edit
'save_frame': True, # Save individual frames
'use_lora': use_lora, # Enable LoRA if applicable
# Additional LoRA configurations
'lora': {
'pretrained_model_name_or_path_or_dict': None, # Default LoRA model path
'lora_weight_name': None,
'lora_adapter': None,
'lora_weight': 1.0
}
},
'seed': seed, # Seed for reproducibility
'device': "cuda", # Default to CUDA
'float_precision': "fp16", # Enable mixed-precision
'enable_xformers_memory_efficient_attention': True # Default to enable xformers memory-efficient attention
})
return config
# # Sample usage
# pipeline = VidToMePipeline(device="cuda", sd_version="2.1", float_precision="fp16")
# pipeline(video_path="path/to/video.mp4", video_prompt="A beautiful scene of a sunset",
# edit_prompt="Make the sunset look more vibrant", control_type="depth", n_timesteps=50)
|