import subprocess subprocess.run( 'pip install numpy==1.26.4', shell=True ) import os import gradio as gr import torch import spaces import random from PIL import Image import numpy as np from glob import glob from pathlib import Path from typing import Optional from diffsynth import save_video, ModelManager, SVDVideoPipeline from diffsynth import SDVideoPipeline, ControlNetConfigUnit, VideoData, save_frames from diffsynth.extensions.RIFE import RIFESmoother import requests def download_model(url, file_path): model_file = requests.get(url, allow_redirects=True) with open(file_path, "wb") as f: f.write(model_file.content) download_model("https://civitai.com/api/download/models/229575", "models/stable_diffusion/aingdiffusion_v12.safetensors") download_model("https://civitai.com/api/download/models/266360?type=Model&format=SafeTensor&size=pruned&fp=fp16", "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors") download_model("https://huggingface.co/guoyww/animatediff/resolve/main/mm_sd_v15_v2.ckpt", "models/AnimateDiff/mm_sd_v15_v2.ckpt") download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11p_sd15_lineart.pth") download_model("https://huggingface.co/lllyasviel/ControlNet-v1-1/resolve/main/control_v11f1e_sd15_tile.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth") download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model.pth", "models/Annotators/sk_model.pth") download_model("https://huggingface.co/lllyasviel/Annotators/resolve/main/sk_model2.pth", "models/Annotators/sk_model2.pth") download_model("https://civitai.com/api/download/models/25820?type=Model&format=PickleTensor&size=full&fp=fp16", "models/textual_inversion/verybadimagenegative_v1.3.pt") download_model("https://drive.google.com/file/d/1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_/view?usp=sharing", "models/RIFE/flownet.pkl") HF_TOKEN = os.environ.get("HF_TOKEN", None) # Constants MAX_SEED = np.iinfo(np.int32).max CSS = """ footer { visibility: hidden; } """ JS = """function () { gradioURL = window.location.href if (!gradioURL.endsWith('?__theme=dark')) { window.location.replace(gradioURL + '?__theme=dark'); } }""" # Ensure model and scheduler are initialized in GPU-enabled function if torch.cuda.is_available(): model_manager = ModelManager( torch_dtype=torch.float16, device="cuda", model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"], downloading_priority=["HuggingFace"]) pipe = SVDVideoPipeline.from_model_manager(model_manager) model_manager2 = ModelManager(torch_dtype=torch.float16, device="cuda") model_manager2.load_textual_inversions("models/textual_inversion") model_manager2.load_models([ "models/stable_diffusion/flat2DAnimerge_v45Sharp.safetensors", "models/AnimateDiff/mm_sd_v15_v2.ckpt", "models/ControlNet/control_v11p_sd15_lineart.pth", "models/ControlNet/control_v11f1e_sd15_tile.pth", "models/RIFE/flownet.pkl" ]) pipe2 = SDVideoPipeline.from_model_manager( model_manager2, [ ControlNetConfigUnit( processor_id="lineart", model_path="models/ControlNet/control_v11p_sd15_lineart.pth", scale=0.5 ), ControlNetConfigUnit( processor_id="tile", model_path="models/ControlNet/control_v11f1e_sd15_tile.pth", scale=0.5 ) ] ) smoother = RIFESmoother.from_model_manager(model_manager2) def video_to_image(selected): if selected == "ExVideo": return gr.Image(label='Upload Image', height=600, scale=2, image_mode="RGB", type="filepath") @spaces.GPU(duration=120) def generate( media, selected, seed: Optional[int] = -1, num_inference_steps: int = 10, animatediff_batch_size: int = 32, animatediff_stride: int = 16, motion_bucket_id: int = 127, fps_id: int = 25, num_frames: int = 50, prompt: str = "best quality", output_folder: str = "outputs", progress=gr.Progress(track_tqdm=True)): print(media) if seed == -1: seed = random.randint(0, MAX_SEED) torch.manual_seed(seed) os.makedirs(output_folder, exist_ok=True) base_count = len(glob(os.path.join(output_folder, "*.mp4"))) video_path = os.path.join(output_folder, f"{base_count:06d}.mp4") if selected == "ExVideo": image = Image.open(media) video = pipe( input_image=image.resize((512, 512)), num_frames=num_frames, fps=fps_id, height=512, width=512, motion_bucket_id=motion_bucket_id, num_inference_steps=num_inference_steps, min_cfg_scale=2, max_cfg_scale=2, contrast_enhance_scale=1.2 ) model_manager.to("cpu") else: up_video = VideoData( video_file=media, height=1024, width=1024) input_video = [up_video[i] for i in range(40*60, 41*60)] video = pipe( prompt=prompt, negative_prompt="verybadimagenegative_v1.3", cfg_scale=3, clip_skip=2, controlnet_frames=input_video, num_frames=len(input_video), num_inference_steps=num_inference_steps, height=1024, width=1024, animatediff_batch_size=animatediff_batch_size, animatediff_stride=animatediff_stride, vram_limit_level=0, ) video = smoother(video) save_video(video, video_path, fps=fps_id) return video_path, seed examples = [ "./train.jpg", "./girl.webp", "./robo.jpg", './working.mp4', ] # Gradio Interface with gr.Blocks(css=CSS, js=JS, theme="soft") as demo: gr.HTML("

Exvideo📽️Diffutoon

") gr.HTML("

Exvideo and Diffutoon video generation
Update: first version
Note: ZeroGPU limited, Set the parameters appropriately.

") with gr.Row(): media = gr.Video(label='Upload Video', height=600, scale=2) video = gr.Video(label="Generated Video", height=600, scale=2) with gr.Column(scale=1): selected = gr.Radio( label="Selected App", choices=["ExVideo", "Diffutoon"], value="Diffutoon" ) seed = gr.Slider( label="Seed (-1 Random)", minimum=-1, maximum=MAX_SEED, step=1, value=-1, ) num_inference_steps = gr.Slider( label="Inference steps", info="Inference steps", step=1, value=10, minimum=1, maximum=50 ) with gr.Accordion("Diffutoon Options", open=False): animatediff_batch_size = gr.Slider( label="Animatediff batch size", minimum=1, maximum=50, step=1, value=32, ) animatediff_stride = gr.Slider( label="Animatediff stride", minimum=1, maximum=50, step=1, value=16, ) with gr.Accordion("ExVideo Options", open=False): motion_bucket_id = gr.Slider( label="Motion bucket id", info="Controls how much motion to add/remove from the image", value=127, step=1, minimum=1, maximum=255 ) fps_id = gr.Slider( label="Frames per second", info="The length of your video in seconds will be 25/fps", value=6, step=1, minimum=5, maximum=30 ) num_frames = gr.Slider( label="Frames num", info="Frames num", step=1, value=50, minimum=1, maximum=128 ) prompt = gr.Textbox(label="Prompt") with gr.Row(): submit_btn = gr.Button(value="Generate") #stop_btn = gr.Button(value="Stop", variant="stop") clear_btn = gr.ClearButton([media, seed, video]) gr.Examples( examples=examples, examples_per_page=4, ) selected.change(fn=video_to_image, inputs=[selected], outputs=[media]) submit_event = submit_btn.click(fn=generate, inputs=[media, selected, seed, num_inference_steps, animatediff_batch_size, animatediff_stride, motion_bucket_id, fps_id, num_frames, prompt], outputs=[video, seed], api_name="video") #stop_btn.click(fn=None, inputs=None, outputs=None, cancels=[submit_event]) demo.queue().launch()