fffiloni's picture
Update app.py
4250afc
from PIL import Image
import os
import cv2
import numpy as np
from PIL import Image
from moviepy.editor import *
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
import torch
torch.backends.cuda.matmul.allow_tf32 = True
import gc
controlnet = ControlNetModel.from_pretrained("ioclab/control_v1p_sd15_brightness", torch_dtype=torch.float16, use_safetensors=True)
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
controlnet=controlnet,
torch_dtype=torch.float16,
safety_checker=None,
)
pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()
pipe.enable_attention_slicing()
def get_frames(video_in):
frames = []
#resize the video
clip = VideoFileClip(video_in)
#check fps
if clip.fps > 30:
print("vide rate is over 30, resetting to 30")
clip_resized = clip.resize(height=512)
clip_resized.write_videofile("video_resized.mp4", fps=30)
else:
print("video rate is OK")
clip_resized = clip.resize(height=512)
clip_resized.write_videofile("video_resized.mp4", fps=clip.fps)
print("video resized to 512 height")
# Opens the Video file with CV2
cap= cv2.VideoCapture("video_resized.mp4")
fps = cap.get(cv2.CAP_PROP_FPS)
print("video fps: " + str(fps))
i=0
while(cap.isOpened()):
ret, frame = cap.read()
if ret == False:
break
cv2.imwrite('kang'+str(i)+'.jpg',frame)
frames.append('kang'+str(i)+'.jpg')
i+=1
cap.release()
cv2.destroyAllWindows()
print("broke the video into frames")
return frames, fps
def create_video(frames, fps):
print("building video result")
clip = ImageSequenceClip(frames, fps=fps)
clip.write_videofile("_result.mp4", fps=fps)
return "_result.mp4"
def process_brightness(
prompt,
negative_prompt,
conditioning_image,
num_inference_steps=30,
size=512,
guidance_scale=7.0,
seed=1234,
):
conditioning_image_raw = Image.fromarray(conditioning_image)
conditioning_image = conditioning_image_raw.convert('L')
g_cpu = torch.Generator()
if seed == -1:
generator = g_cpu.manual_seed(g_cpu.seed())
else:
generator = g_cpu.manual_seed(seed)
output_image = pipe(
prompt,
conditioning_image,
height=size,
width=size,
num_inference_steps=num_inference_steps,
generator=generator,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
controlnet_conditioning_scale=1.0,
).images[0]
del conditioning_image, conditioning_image_raw
gc.collect()
return output_image
def infer(video_in, trim_value, prompt,
negative_prompt,
num_inference_steps=30,
size=512,
guidance_scale=7.0,
seed=1234
):
# 1. break video into frames and get FPS
break_vid = get_frames(video_in)
frames_list= break_vid[0]
fps = break_vid[1]
n_frame = int(trim_value * fps)
#n_frame = len(frames_list)
if n_frame >= len(frames_list):
print("video is shorter than the cut value")
n_frame = len(frames_list)
# 2. prepare frames result arrays
result_frames = []
print("set stop frames to: " + str(n_frame))
for i, image in enumerate(frames_list[0:int(n_frame)]):
conditioning_image = Image.open(image).convert("RGB")
conditioning_image = np.array(conditioning_image)
output_frame = process_brightness(
prompt,
negative_prompt,
conditioning_image,
num_inference_steps=30,
size=512,
guidance_scale=7.0,
seed=1234
)
print(output_frame)
#image = Image.open(output_frame)
#image = Image.fromarray(output_frame[0])
output_frame.save("_frame_" + str(i) + ".jpeg")
result_frames.append("_frame_" + str(i) + ".jpeg")
print("frame " + str(i) + "/" + str(n_frame) + ": done;")
final_vid = create_video(result_frames, fps)
return final_vid
with gr.Blocks() as demo:
gr.Markdown(
"""
# ControlNet on Brightness • Video
This is a demo on ControlNet based on brightness for video.
""")
with gr.Row():
with gr.Column():
prompt = gr.Textbox(
label="Prompt",
)
negative_prompt = gr.Textbox(
label="Negative Prompt",
)
video_in = gr.Video(
label="Conditioning Video",
source="upload",
type="filepath"
)
trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1)
with gr.Accordion('Advanced options', open=False):
with gr.Row():
num_inference_steps = gr.Slider(
10, 40, 20,
step=1,
label="Steps",
)
size = gr.Slider(
256, 768, 512,
step=128,
label="Size",
)
with gr.Row():
guidance_scale = gr.Slider(
label='Guidance Scale',
minimum=0.1,
maximum=30.0,
value=7.0,
step=0.1
)
seed = gr.Slider(
label='Seed',
value=-1,
minimum=-1,
maximum=2147483647,
step=1,
# randomize=True
)
submit_btn = gr.Button(
value="Submit",
variant="primary"
)
with gr.Column(min_width=300):
output = gr.Video(
label="Result",
)
submit_btn.click(
fn=infer,
inputs=[
video_in, trim_in, prompt, negative_prompt, num_inference_steps, size, guidance_scale, seed
],
outputs=output
)
gr.Markdown(
"""
* [Dataset](https://huggingface.co/datasets/ioclab/grayscale_image_aesthetic_3M)
* [Diffusers model](https://huggingface.co/ioclab/control_v1p_sd15_brightness), [Web UI model](https://huggingface.co/ioclab/ioc-controlnet)
* [Training Report](https://api.wandb.ai/links/ciaochaos/oot5cui2), [Doc(Chinese)](https://aigc.ioclab.com/sd-showcase/brightness-controlnet.html)
""")
demo.launch()