Spaces:

xinglilu
/

Vadhwid

Build error

Vadhwid / app.py

QinOwen

change-weights-path

869596d 7 months ago

10.3 kB

	import gradio as gr
	import os
	import spaces
	import sys
	from copy import deepcopy
	sys.path.append('./VADER-VideoCrafter/scripts/main')
	sys.path.append('./VADER-VideoCrafter/scripts')
	sys.path.append('./VADER-VideoCrafter')


	from train_t2v_lora import main_fn, setup_model

	examples = [
	["Fairy and Magical Flowers: A fairy tends to enchanted, glowing flowers.", 'huggingface-hps-aesthetic',
	8, 901, 384, 512, 12.0, 25, 1.0, 24, 10],
	["A cat playing an electric guitar in a loft with industrial-style decor and soft, multicolored lights.",
	'huggingface-hps-aesthetic', 8, 208, 384, 512, 12.0, 25, 1.0, 24, 10],
	["A raccoon playing a guitar under a blossoming cherry tree.",
	'huggingface-hps-aesthetic', 8, 180, 384, 512, 12.0, 25, 1.0, 24, 10],
	["A raccoon playing an electric bass in a garage band setting.",
	'huggingface-hps-aesthetic', 8, 400, 384, 512, 12.0, 25, 1.0, 24, 10],
	["A talking bird with shimmering feathers and a melodious voice finds a legendary treasure, guiding through enchanted forests, ancient ruins, and mystical challenges.",
	"huggingface-pickscore", 16, 200, 384, 512, 12.0, 25, 1.0, 24, 10],
	["A snow princess stands on the balcony of her ice castle, her hair adorned with delicate snowflakes, overlooking her serene realm.",
	"huggingface-pickscore", 16, 400, 384, 512, 12.0, 25, 1.0, 24, 10],
	["A mermaid with flowing hair and a shimmering tail discovers a hidden underwater kingdom adorned with coral palaces, glowing pearls, and schools of colorful fish, encountering both wonders and dangers along the way.",
	"huggingface-pickscore", 16, 800, 384, 512, 12.0, 25, 1.0, 24, 10],
	]

	model = setup_model()

	@spaces.GPU(duration=180)
	def gradio_main_fn(prompt, lora_model, lora_rank, seed, height, width, unconditional_guidance_scale, ddim_steps, ddim_eta,
	frames, savefps):
	global model
	if model is None:
	return "Model is not loaded. Please load the model first."
	video_path = main_fn(prompt=prompt,
	lora_model=lora_model,
	lora_rank=int(lora_rank),
	seed=int(seed),
	height=int(height),
	width=int(width),
	unconditional_guidance_scale=float(unconditional_guidance_scale),
	ddim_steps=int(ddim_steps),
	ddim_eta=float(ddim_eta),
	frames=int(frames),
	savefps=int(savefps),
	model=deepcopy(model))

	return video_path

	def reset_fn():
	return ("A brown dog eagerly eats from a bowl in a kitchen.",
	200, 384, 512, 12.0, 25, 1.0, 24, 16, 10, "huggingface-pickscore")

	def update_lora_rank(lora_model):
	if lora_model == "huggingface-pickscore":
	return gr.update(value=16)
	elif lora_model == "huggingface-hps-aesthetic":
	return gr.update(value=8)
	else: # "Base Model"
	return gr.update(value=8)

	def update_dropdown(lora_rank):
	if lora_rank == 16:
	return gr.update(value="huggingface-pickscore")
	elif lora_rank == 8:
	return gr.update(value="huggingface-hps-aesthetic")
	else: # 0
	return gr.update(value="Base Model")

	custom_css = """
	#centered {
	display: flex;
	justify-content: center;
	width: 60%;
	margin: 0 auto;
	}
	.column-centered {
	display: flex;
	flex-direction: column;
	align-items: center;
	width: 60%;
	}
	#image-upload {
	flex-grow: 1;
	}
	#params .tabs {
	display: flex;
	flex-direction: column;
	flex-grow: 1;
	}
	#params .tabitem[style="display: block;"] {
	flex-grow: 1;
	display: flex !important;
	}
	#params .gap {
	flex-grow: 1;
	}
	#params .form {
	flex-grow: 1 !important;
	}
	#params .form > :last-child{
	flex-grow: 1;
	}
	"""

	with gr.Blocks(css=custom_css) as demo:
	with gr.Row():
	with gr.Column():
	gr.HTML(
	"""
	<h1 style='text-align: center; font-size: 3.2em; margin-bottom: 0.5em; font-family: Arial, sans-serif; margin: 20px;'>
	Video Diffusion Alignment via Reward Gradient
	</h1>
	"""
	)
	gr.HTML(
	"""
	<style>
	body {
	font-family: Arial, sans-serif;
	text-align: center;
	margin: 50px;
	}
	a {
	text-decoration: none !important;
	color: black !important;
	}
	</style>
	<body>
	<div style="font-size: 1.4em; margin-bottom: 0.5em; ">
	<a href="https://mihirp1998.github.io">Mihir Prabhudesai</a><sup>*</sup>
	<a href="https://russellmendonca.github.io/">Russell Mendonca</a><sup>*</sup>
	<a href="mailto: zheyangqin.qzy@gmail.com">Zheyang Qin</a><sup>*</sup>
	<a href="https://www.cs.cmu.edu/~katef/">Katerina Fragkiadaki</a><sup></sup>
	<a href="https://www.cs.cmu.edu/~dpathak/">Deepak Pathak</a><sup></sup>


	</div>
	<div style="font-size: 1.3em; font-style: italic;">
	Carnegie Mellon University
	</div>
	</body>
	"""
	)
	gr.HTML(
	"""
	<head>
	<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">

	<style>
	.button-container {
	display: flex;
	justify-content: center;
	gap: 10px;
	margin-top: 10px;
	}

	.button-container a {
	display: inline-flex;
	align-items: center;
	padding: 10px 20px;
	border-radius: 30px;
	border: 1px solid #ccc;
	text-decoration: none;
	color: #333 !important;
	font-size: 16px;
	text-decoration: none !important;
	}

	.button-container a i {
	margin-right: 8px;
	}
	</style>
	</head>

	<div class="button-container">
	<a href="https://arxiv.org/abs/2407.08737" class="btn btn-outline-primary">
	<i class="fa-solid fa-file-pdf"></i> Paper
	</a>
	<a href="https://vader-vid.github.io/" class="btn btn-outline-danger">
	<i class="fa-solid fa-video"></i> Website
	<a href="https://github.com/mihirp1998/VADER" class="btn btn-outline-secondary">
	<i class="fa-brands fa-github"></i> Code
	</a>
	</div>
	"""
	)

	with gr.Row(elem_id="centered"):
	with gr.Column(elem_id="params"):
	lora_model = gr.Dropdown(
	label="VADER Model",
	choices=["huggingface-pickscore", "huggingface-hps-aesthetic"],
	value="huggingface-pickscore"
	)
	lora_rank = gr.Slider(minimum=8, maximum=16, label="LoRA Rank", step = 8, value=16)
	prompt = gr.Textbox(placeholder="Enter prompt text here", lines=4, label="Text Prompt",
	value="A brown dog eagerly eats from a bowl in a kitchen.")
	run_btn = gr.Button("Run Inference")

	with gr.Column():
	output_video = gr.Video(elem_id="image-upload")

	with gr.Row(elem_id="centered"):
	with gr.Column():


	seed = gr.Slider(minimum=0, maximum=65536, label="Seed", step = 1, value=200)

	with gr.Row():
	height = gr.Slider(minimum=0, maximum=512, label="Height", step = 16, value=384)
	width = gr.Slider(minimum=0, maximum=512, label="Width", step = 16, value=512)

	with gr.Row():
	frames = gr.Slider(minimum=0, maximum=50, label="Frames", step = 1, value=24)
	savefps = gr.Slider(minimum=0, maximum=30, label="Save FPS", step = 1, value=10)


	with gr.Row():
	DDIM_Steps = gr.Slider(minimum=0, maximum=50, label="DDIM Steps", step = 1, value=25)
	unconditional_guidance_scale = gr.Slider(minimum=0, maximum=50, label="Guidance Scale", step = 0.1, value=12.0)
	DDIM_Eta = gr.Slider(minimum=0, maximum=1, label="DDIM Eta", step = 0.01, value=1.0)

	# reset button
	reset_btn = gr.Button("Reset")

	reset_btn.click(fn=reset_fn, outputs=[prompt, seed, height, width, unconditional_guidance_scale, DDIM_Steps, DDIM_Eta, frames, lora_rank, savefps, lora_model])


	run_btn.click(fn=gradio_main_fn,
	inputs=[prompt, lora_model, lora_rank,
	seed, height, width, unconditional_guidance_scale,
	DDIM_Steps, DDIM_Eta, frames, savefps],
	outputs=output_video
	)

	lora_model.change(fn=update_lora_rank, inputs=lora_model, outputs=lora_rank)
	lora_rank.change(fn=update_dropdown, inputs=lora_rank, outputs=lora_model)

	gr.Examples(examples=examples,
	inputs=[prompt, lora_model, lora_rank, seed,
	height, width, unconditional_guidance_scale,
	DDIM_Steps, DDIM_Eta, frames, savefps],
	outputs=output_video,
	fn=gradio_main_fn,
	run_on_click=False,
	cache_examples="lazy",
	)

	demo.launch(share=True)