diffusers / app.py
seawolf2357's picture
Update app.py
18e9814 verified
import torch
import gradio as gr
from diffusers import AnimateDiffPipeline, MotionAdapter, DPMSolverMultistepScheduler, AutoencoderKL, SparseControlNetModel
from diffusers.utils import export_to_gif, load_image
from transformers import pipeline
from PIL import Image
import numpy as np
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
def translate_korean_to_english(text):
if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in text):
translated = translator(text)[0]['translation_text']
return translated
return text
def generate_video(prompt, negative_prompt, num_inference_steps, conditioning_frame_indices, controlnet_conditioning_scale, width, height, num_frames):
prompt = translate_korean_to_english(prompt)
negative_prompt = translate_korean_to_english(negative_prompt)
motion_adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=torch.float16).to(device)
controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectrl-scribble", torch_dtype=torch.float16).to(device)
vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16).to(device)
pipe = AnimateDiffPipeline.from_pretrained(
"SG161222/Realistic_Vision_V6.0_B1_noVAE",
motion_adapter=motion_adapter,
controlnet=controlnet,
vae=vae,
torch_dtype=torch.float16,
).to(device)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True)
image_files = [
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png",
"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png"
]
conditioning_frames = [load_image(img_file) for img_file in image_files]
conditioning_frame_indices = eval(conditioning_frame_indices)
controlnet_conditioning_scale = float(controlnet_conditioning_scale)
video = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
num_inference_steps=num_inference_steps,
conditioning_frames=conditioning_frames,
controlnet_conditioning_scale=controlnet_conditioning_scale,
controlnet_frame_indices=conditioning_frame_indices,
width=width,
height=height,
num_frames=num_frames,
generator=torch.Generator().manual_seed(1337),
).frames[0]
# ํ›„์ฒ˜๋ฆฌ: ํ”„๋ ˆ์ž„ ๊ฐ„ ๋ณด๊ฐ„์„ ํ†ตํ•œ ๋ถ€๋“œ๋Ÿฌ์šด ์ „ํ™˜
interpolated_frames = []
for i in range(len(video) - 1):
interpolated_frames.append(video[i])
interpolated_frames.append(Image.blend(video[i], video[i+1], 0.5))
interpolated_frames.append(video[-1])
export_to_gif(interpolated_frames, "output.gif")
return "output.gif"
demo = gr.Interface(
fn=generate_video,
inputs=[
gr.Textbox(label="Prompt (ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด)", value="๊ท€์—ฌ์šด ๊ฐ•์•„์ง€๊ฐ€ ์กฐ์šฉํžˆ ์ง–๊ณ ์žˆ, ๊ฑธ์ž‘, ๊ณ ํ’ˆ์งˆ"),
gr.Textbox(label="Negative Prompt (ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด)", value="์ €ํ’ˆ์งˆ, ์ตœ์•…์˜ ํ’ˆ์งˆ, ๋ ˆํ„ฐ๋ฐ•์Šค"),
gr.Slider(label="Number of Inference Steps", minimum=1, maximum=200, step=1, value=150),
gr.Textbox(label="Conditioning Frame Indices", value="[0, 8, 15]"),
gr.Slider(label="ControlNet Conditioning Scale", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
gr.Slider(label="Width", minimum=256, maximum=1024, step=64, value=512),
gr.Slider(label="Height", minimum=256, maximum=1024, step=64, value=512),
gr.Slider(label="Number of Frames", minimum=16, maximum=128, step=16, value=64)
],
outputs=gr.Image(label="Generated Video"),
title="AnimateDiffSparseControlNetPipeline์„ ์‚ฌ์šฉํ•œ ๊ณ ํ’ˆ์งˆ ๋น„๋””์˜ค ์ƒ์„ฑ",
description="AnimateDiffSparseControlNetPipeline์„ ์‚ฌ์šฉํ•˜์—ฌ ๊ณ ํ’ˆ์งˆ ๋น„๋””์˜ค๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค. ํ•œ๊ธ€ ๋˜๋Š” ์˜์–ด๋กœ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์ž…๋ ฅํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
)
demo.launch()