|
from PIL import Image |
|
import os |
|
import cv2 |
|
import numpy as np |
|
from PIL import Image |
|
from moviepy.editor import * |
|
import gradio as gr |
|
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler |
|
import torch |
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
import gc |
|
|
|
controlnet = ControlNetModel.from_pretrained("ioclab/control_v1p_sd15_brightness", torch_dtype=torch.float16, use_safetensors=True) |
|
|
|
pipe = StableDiffusionControlNetPipeline.from_pretrained( |
|
"runwayml/stable-diffusion-v1-5", |
|
controlnet=controlnet, |
|
torch_dtype=torch.float16, |
|
safety_checker=None, |
|
) |
|
|
|
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config) |
|
|
|
pipe.enable_xformers_memory_efficient_attention() |
|
pipe.enable_model_cpu_offload() |
|
pipe.enable_attention_slicing() |
|
|
|
def get_frames(video_in): |
|
frames = [] |
|
|
|
clip = VideoFileClip(video_in) |
|
|
|
|
|
if clip.fps > 30: |
|
print("vide rate is over 30, resetting to 30") |
|
clip_resized = clip.resize(height=512) |
|
clip_resized.write_videofile("video_resized.mp4", fps=30) |
|
else: |
|
print("video rate is OK") |
|
clip_resized = clip.resize(height=512) |
|
clip_resized.write_videofile("video_resized.mp4", fps=clip.fps) |
|
|
|
print("video resized to 512 height") |
|
|
|
|
|
cap= cv2.VideoCapture("video_resized.mp4") |
|
|
|
fps = cap.get(cv2.CAP_PROP_FPS) |
|
print("video fps: " + str(fps)) |
|
i=0 |
|
while(cap.isOpened()): |
|
ret, frame = cap.read() |
|
if ret == False: |
|
break |
|
cv2.imwrite('kang'+str(i)+'.jpg',frame) |
|
frames.append('kang'+str(i)+'.jpg') |
|
i+=1 |
|
|
|
cap.release() |
|
cv2.destroyAllWindows() |
|
print("broke the video into frames") |
|
|
|
return frames, fps |
|
|
|
def create_video(frames, fps): |
|
print("building video result") |
|
clip = ImageSequenceClip(frames, fps=fps) |
|
clip.write_videofile("_result.mp4", fps=fps) |
|
|
|
return "_result.mp4" |
|
|
|
def process_brightness( |
|
prompt, |
|
negative_prompt, |
|
conditioning_image, |
|
num_inference_steps=30, |
|
size=768, |
|
guidance_scale=7.0, |
|
seed=1234, |
|
): |
|
|
|
conditioning_image_raw = Image.fromarray(conditioning_image) |
|
conditioning_image = conditioning_image_raw.convert('L') |
|
|
|
g_cpu = torch.Generator() |
|
|
|
if seed == -1: |
|
generator = g_cpu.manual_seed(g_cpu.seed()) |
|
else: |
|
generator = g_cpu.manual_seed(seed) |
|
|
|
output_image = pipe( |
|
prompt, |
|
conditioning_image, |
|
height=size, |
|
width=size, |
|
num_inference_steps=num_inference_steps, |
|
generator=generator, |
|
negative_prompt=negative_prompt, |
|
guidance_scale=guidance_scale, |
|
controlnet_conditioning_scale=1.0, |
|
).images[0] |
|
|
|
del conditioning_image, conditioning_image_raw |
|
gc.collect() |
|
|
|
return output_image |
|
|
|
def infer(video_in, trim_value, prompt, |
|
negative_prompt, |
|
num_inference_steps=30, |
|
size=768, |
|
guidance_scale=7.0, |
|
seed=1234 |
|
): |
|
|
|
|
|
break_vid = get_frames(video_in) |
|
frames_list= break_vid[0] |
|
fps = break_vid[1] |
|
n_frame = int(trim_value * fps) |
|
|
|
|
|
if n_frame >= len(frames_list): |
|
print("video is shorter than the cut value") |
|
n_frame = len(frames_list) |
|
|
|
|
|
result_frames = [] |
|
print("set stop frames to: " + str(n_frame)) |
|
|
|
for i, image in enumerate(frames_list[0:int(n_frame)]): |
|
|
|
conditioning_image = Image.open(image).convert("RGB") |
|
conditioning_image = np.array(image) |
|
output_frame = process_brightness( |
|
prompt, |
|
negative_prompt, |
|
conditioning_image, |
|
num_inference_steps=30, |
|
size=768, |
|
guidance_scale=7.0, |
|
seed=1234 |
|
) |
|
print(output_frame) |
|
|
|
image = Image.open(output_frame) |
|
|
|
image.save("_frame_" + str(i) + ".jpeg") |
|
result_frames.append("_frame_" + str(i) + ".jpeg") |
|
print("frame " + str(i) + "/" + str(n_frame) + ": done;") |
|
|
|
|
|
final_vid = create_video(result_frames, fps) |
|
|
|
|
|
return final_vid |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# ControlNet on Brightness • Video |
|
|
|
This is a demo on ControlNet based on brightness for video. |
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Textbox( |
|
label="Prompt", |
|
) |
|
negative_prompt = gr.Textbox( |
|
label="Negative Prompt", |
|
) |
|
video_in = gr.Video( |
|
label="Conditioning Video", |
|
source="upload", |
|
type="filepath" |
|
) |
|
trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1) |
|
with gr.Accordion('Advanced options', open=False): |
|
with gr.Row(): |
|
num_inference_steps = gr.Slider( |
|
10, 40, 20, |
|
step=1, |
|
label="Steps", |
|
) |
|
size = gr.Slider( |
|
256, 768, 512, |
|
step=128, |
|
label="Size", |
|
) |
|
with gr.Row(): |
|
guidance_scale = gr.Slider( |
|
label='Guidance Scale', |
|
minimum=0.1, |
|
maximum=30.0, |
|
value=7.0, |
|
step=0.1 |
|
) |
|
seed = gr.Slider( |
|
label='Seed', |
|
value=-1, |
|
minimum=-1, |
|
maximum=2147483647, |
|
step=1, |
|
|
|
) |
|
submit_btn = gr.Button( |
|
value="Submit", |
|
variant="primary" |
|
) |
|
with gr.Column(min_width=300): |
|
output = gr.Image( |
|
label="Result", |
|
) |
|
|
|
submit_btn.click( |
|
fn=infer, |
|
inputs=[ |
|
video_in, trim_in, prompt, negative_prompt, num_inference_steps, size, guidance_scale, seed |
|
], |
|
outputs=output |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
* [Dataset](https://huggingface.co/datasets/ioclab/grayscale_image_aesthetic_3M) |
|
* [Diffusers model](https://huggingface.co/ioclab/control_v1p_sd15_brightness), [Web UI model](https://huggingface.co/ioclab/ioc-controlnet) |
|
* [Training Report](https://api.wandb.ai/links/ciaochaos/oot5cui2), [Doc(Chinese)](https://aigc.ioclab.com/sd-showcase/brightness-controlnet.html) |
|
""") |
|
|
|
demo.launch() |