Spaces:

seawolf2357
/

diffusers

Paused

App Files Files Community

diffusers / app.py

seawolf2357

Update app.py

18e9814 verified 4 months ago

raw

history blame contribute delete

4.36 kB

	import torch
	import gradio as gr
	from diffusers import AnimateDiffPipeline, MotionAdapter, DPMSolverMultistepScheduler, AutoencoderKL, SparseControlNetModel
	from diffusers.utils import export_to_gif, load_image
	from transformers import pipeline
	from PIL import Image
	import numpy as np

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")

	def translate_korean_to_english(text):
	if any('\u3131' <= char <= '\u3163' or '\uac00' <= char <= '\ud7a3' for char in text):
	translated = translator(text)[0]['translation_text']
	return translated
	return text

	def generate_video(prompt, negative_prompt, num_inference_steps, conditioning_frame_indices, controlnet_conditioning_scale, width, height, num_frames):
	prompt = translate_korean_to_english(prompt)
	negative_prompt = translate_korean_to_english(negative_prompt)

	motion_adapter = MotionAdapter.from_pretrained("guoyww/animatediff-motion-adapter-v1-5-3", torch_dtype=torch.float16).to(device)
	controlnet = SparseControlNetModel.from_pretrained("guoyww/animatediff-sparsectrl-scribble", torch_dtype=torch.float16).to(device)
	vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse", torch_dtype=torch.float16).to(device)

	pipe = AnimateDiffPipeline.from_pretrained(
	"SG161222/Realistic_Vision_V6.0_B1_noVAE",
	motion_adapter=motion_adapter,
	controlnet=controlnet,
	vae=vae,
	torch_dtype=torch.float16,
	).to(device)

	pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, beta_schedule="linear", algorithm_type="dpmsolver++", use_karras_sigmas=True)

	image_files = [
	"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-1.png",
	"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-2.png",
	"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/diffusers/animatediff-scribble-3.png"
	]
	conditioning_frames = [load_image(img_file) for img_file in image_files]

	conditioning_frame_indices = eval(conditioning_frame_indices)
	controlnet_conditioning_scale = float(controlnet_conditioning_scale)

	video = pipe(
	prompt=prompt,
	negative_prompt=negative_prompt,
	num_inference_steps=num_inference_steps,
	conditioning_frames=conditioning_frames,
	controlnet_conditioning_scale=controlnet_conditioning_scale,
	controlnet_frame_indices=conditioning_frame_indices,
	width=width,
	height=height,
	num_frames=num_frames,
	generator=torch.Generator().manual_seed(1337),
	).frames[0]

	# 후처리: 프레임 간 보간을 통한 부드러운 전환
	interpolated_frames = []
	for i in range(len(video) - 1):
	interpolated_frames.append(video[i])
	interpolated_frames.append(Image.blend(video[i], video[i+1], 0.5))
	interpolated_frames.append(video[-1])

	export_to_gif(interpolated_frames, "output.gif")
	return "output.gif"

	demo = gr.Interface(
	fn=generate_video,
	inputs=[
	gr.Textbox(label="Prompt (한글 또는 영어)", value="귀여운 강아지가 조용히 짖고있, 걸작, 고품질"),
	gr.Textbox(label="Negative Prompt (한글 또는 영어)", value="저품질, 최악의 품질, 레터박스"),
	gr.Slider(label="Number of Inference Steps", minimum=1, maximum=200, step=1, value=150),
	gr.Textbox(label="Conditioning Frame Indices", value="[0, 8, 15]"),
	gr.Slider(label="ControlNet Conditioning Scale", minimum=0.1, maximum=2.0, step=0.1, value=1.0),
	gr.Slider(label="Width", minimum=256, maximum=1024, step=64, value=512),
	gr.Slider(label="Height", minimum=256, maximum=1024, step=64, value=512),
	gr.Slider(label="Number of Frames", minimum=16, maximum=128, step=16, value=64)
	],
	outputs=gr.Image(label="Generated Video"),
	title="AnimateDiffSparseControlNetPipeline을 사용한 고품질 비디오 생성",
	description="AnimateDiffSparseControlNetPipeline을 사용하여 고품질 비디오를 생성합니다. 한글 또는 영어로 프롬프트를 입력할 수 있습니다."
	)

	demo.launch()