brightness-controlnet_video

Paused

File size: 7,105 Bytes

from PIL import Image
import os
import cv2
import numpy as np
from PIL import Image
from moviepy.editor import *
import gradio as gr
from diffusers import StableDiffusionControlNetPipeline, ControlNetModel, UniPCMultistepScheduler
import torch
torch.backends.cuda.matmul.allow_tf32 = True
import gc

controlnet = ControlNetModel.from_pretrained("ioclab/control_v1p_sd15_brightness", torch_dtype=torch.float16, use_safetensors=True)

pipe = StableDiffusionControlNetPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    controlnet=controlnet,
    torch_dtype=torch.float16,
    safety_checker=None,
)

pipe.scheduler = UniPCMultistepScheduler.from_config(pipe.scheduler.config)

pipe.enable_xformers_memory_efficient_attention()
pipe.enable_model_cpu_offload()
pipe.enable_attention_slicing()

def get_frames(video_in):
    frames = []
    #resize the video
    clip = VideoFileClip(video_in)
    
    #check fps
    if clip.fps > 30:
        print("vide rate is over 30, resetting to 30")
        clip_resized = clip.resize(height=512)
        clip_resized.write_videofile("video_resized.mp4", fps=30)
    else:
        print("video rate is OK")
        clip_resized = clip.resize(height=512)
        clip_resized.write_videofile("video_resized.mp4", fps=clip.fps)
    
    print("video resized to 512 height")
    
    # Opens the Video file with CV2
    cap= cv2.VideoCapture("video_resized.mp4")
    
    fps = cap.get(cv2.CAP_PROP_FPS)
    print("video fps: " + str(fps))
    i=0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == False:
            break
        cv2.imwrite('kang'+str(i)+'.jpg',frame)
        frames.append('kang'+str(i)+'.jpg')
        i+=1
    
    cap.release()
    cv2.destroyAllWindows()
    print("broke the video into frames")
    
    return frames, fps

def create_video(frames, fps):
    print("building video result")
    clip = ImageSequenceClip(frames, fps=fps)
    clip.write_videofile("_result.mp4", fps=fps)
    
    return "_result.mp4"

def process_brightness(
        prompt,
        negative_prompt,
        conditioning_image,
        num_inference_steps=30,
        size=512,
        guidance_scale=7.0,
        seed=1234,
):

    conditioning_image_raw = Image.fromarray(conditioning_image)
    conditioning_image = conditioning_image_raw.convert('L')

    g_cpu = torch.Generator()

    if seed == -1:
        generator = g_cpu.manual_seed(g_cpu.seed())
    else:
        generator = g_cpu.manual_seed(seed)

    output_image = pipe(
        prompt,
        conditioning_image,
        height=size,
        width=size,
        num_inference_steps=num_inference_steps,
        generator=generator,
        negative_prompt=negative_prompt,
        guidance_scale=guidance_scale,
        controlnet_conditioning_scale=1.0,
    ).images[0]

    del conditioning_image, conditioning_image_raw
    gc.collect()

    return output_image

def infer(video_in, trim_value, prompt,
                    negative_prompt,
                    num_inference_steps=30,
                    size=512,
                    guidance_scale=7.0,
                    seed=1234
         ):
    
    # 1. break video into frames and get FPS
    break_vid = get_frames(video_in)
    frames_list= break_vid[0]
    fps = break_vid[1]
    n_frame = int(trim_value * fps)
    #n_frame = len(frames_list)
    
    if n_frame >= len(frames_list):
        print("video is shorter than the cut value")
        n_frame = len(frames_list)
    
    # 2. prepare frames result arrays
    result_frames = []
    print("set stop frames to: " + str(n_frame))
    
    for i, image in enumerate(frames_list[0:int(n_frame)]):

        conditioning_image = Image.open(image).convert("RGB")
        conditioning_image = np.array(conditioning_image)
        output_frame = process_brightness(
                            prompt,
                            negative_prompt,
                            conditioning_image,
                            num_inference_steps=30,
                            size=512,
                            guidance_scale=7.0,
                            seed=1234
                           )
        print(output_frame)
        
        #image = Image.open(output_frame)
        #image = Image.fromarray(output_frame[0])
        output_frame.save("_frame_" + str(i) + ".jpeg")
        result_frames.append("_frame_" + str(i) + ".jpeg")
        print("frame " + str(i) + "/" + str(n_frame) + ": done;")

    
    final_vid = create_video(result_frames, fps)

    
    return final_vid

with gr.Blocks() as demo:
    gr.Markdown(
        """
    # ControlNet on Brightness • Video

    This is a demo on ControlNet based on brightness for video.
    """)

    with gr.Row():
        with gr.Column():
            prompt = gr.Textbox(
                label="Prompt",
            )
            negative_prompt = gr.Textbox(
                label="Negative Prompt",
            )
            video_in = gr.Video(
                label="Conditioning Video",
                source="upload",
                type="filepath"     
            )
            trim_in = gr.Slider(label="Cut video at (s)", minimun=1, maximum=5, step=1, value=1)
            with gr.Accordion('Advanced options', open=False):
                with gr.Row():
                    num_inference_steps = gr.Slider(
                        10, 40, 20,
                        step=1,
                        label="Steps",
                    )
                    size = gr.Slider(
                        256, 768, 512,
                        step=128,
                        label="Size",
                    )
                with gr.Row():
                    guidance_scale = gr.Slider(
                        label='Guidance Scale',
                        minimum=0.1,
                        maximum=30.0,
                        value=7.0,
                        step=0.1
                    )
                    seed = gr.Slider(
                        label='Seed',
                        value=-1,
                        minimum=-1,
                        maximum=2147483647,
                        step=1,
                        # randomize=True
                    )
            submit_btn = gr.Button(
                value="Submit",
                variant="primary"
            )
        with gr.Column(min_width=300):
            output = gr.Video(
                label="Result",
            )

    submit_btn.click(
        fn=infer,
        inputs=[
            video_in, trim_in, prompt, negative_prompt, num_inference_steps, size, guidance_scale, seed
        ],
        outputs=output
    )
    
    gr.Markdown(
        """
    * [Dataset](https://huggingface.co/datasets/ioclab/grayscale_image_aesthetic_3M)
    * [Diffusers model](https://huggingface.co/ioclab/control_v1p_sd15_brightness), [Web UI model](https://huggingface.co/ioclab/ioc-controlnet)
    * [Training Report](https://api.wandb.ai/links/ciaochaos/oot5cui2), [Doc(Chinese)](https://aigc.ioclab.com/sd-showcase/brightness-controlnet.html)
    """)

demo.launch()