Spaces:

VAST-AI
/

MV-Adapter-I2MV-SDXL

Running on Zero

App Files Files Community

MV-Adapter-I2MV-SDXL / app.py

huanngzh

cache examples

f38509e 2 days ago

raw

history blame contribute delete

6.6 kB

	import random

	import gradio as gr
	import numpy as np

	import spaces
	import torch
	from torchvision import transforms
	from transformers import AutoModelForImageSegmentation

	from inference_i2mv_sdxl import prepare_pipeline, remove_bg, run_pipeline

	# Device and dtype
	dtype = torch.bfloat16
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Hyperparameters
	NUM_VIEWS = 6
	HEIGHT = 768
	WIDTH = 768
	MAX_SEED = np.iinfo(np.int32).max

	pipe = prepare_pipeline(
	base_model="stabilityai/stable-diffusion-xl-base-1.0",
	vae_model="madebyollin/sdxl-vae-fp16-fix",
	unet_model=None,
	lora_model=None,
	adapter_path="huanngzh/mv-adapter",
	scheduler=None,
	num_views=NUM_VIEWS,
	device=device,
	dtype=dtype,
	)

	# remove bg
	birefnet = AutoModelForImageSegmentation.from_pretrained(
	"ZhengPeng7/BiRefNet", trust_remote_code=True
	)
	birefnet.to(device)
	transform_image = transforms.Compose(
	[
	transforms.Resize((1024, 1024)),
	transforms.ToTensor(),
	transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
	]
	)


	@spaces.GPU()
	def infer(
	prompt,
	image,
	do_rembg=True,
	seed=42,
	randomize_seed=False,
	guidance_scale=3.0,
	num_inference_steps=30,
	reference_conditioning_scale=1.0,
	negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
	progress=gr.Progress(track_tqdm=True),
	):
	if do_rembg:
	remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, device)
	else:
	remove_bg_fn = None
	if randomize_seed:
	seed = random.randint(0, MAX_SEED)
	if isinstance(seed, str):
	try:
	seed = int(seed.strip())
	except ValueError:
	seed = 42

	images, preprocessed_image = run_pipeline(
	pipe,
	num_views=NUM_VIEWS,
	text=prompt,
	image=image,
	height=HEIGHT,
	width=WIDTH,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	seed=seed,
	remove_bg_fn=remove_bg_fn,
	reference_conditioning_scale=reference_conditioning_scale,
	negative_prompt=negative_prompt,
	device=device,
	)
	return images, preprocessed_image, seed


	examples = [
	[
	"A decorative figurine of a young anime-style girl",
	"assets/demo/i2mv/A_decorative_figurine_of_a_young_anime-style_girl.png",
	True,
	21,
	],
	[
	"A juvenile emperor penguin chick",
	"assets/demo/i2mv/A_juvenile_emperor_penguin_chick.png",
	True,
	0,
	],
	[
	"A striped tabby cat with white fur sitting upright",
	"assets/demo/i2mv/A_striped_tabby_cat_with_white_fur_sitting_upright.png",
	True,
	0,
	],
	]


	with gr.Blocks() as demo:
	with gr.Row():
	gr.Markdown(
	f"""# MV-Adapter [Image-to-Multi-View]
	Generate 768x768 multi-view images from a single image using SDXL <br>
	Check our [project page](https://huanngzh.github.io/MV-Adapter-Page/) and [github repo](https://github.com/huanngzh/MV-Adapter) for details <br>
	Also try our other demos: [Text-to-Multiview (General)](https://huggingface.co/spaces/VAST-AI/MV-Adapter-T2MV-SDXL) \| [Text-to-Multiview (Anime)](https://huggingface.co/spaces/huanngzh/MV-Adapter-T2MV-Anime) \| [Image-to-Multiview](https://huggingface.co/spaces/VAST-AI/MV-Adapter-I2MV-SDXL) <br>
	Tips: if error occurs, wait for a few seconds and try again
	"""
	)

	with gr.Row():
	with gr.Column():
	with gr.Row():
	input_image = gr.Image(
	label="Input Image",
	sources=["upload", "webcam", "clipboard"],
	type="pil",
	)
	preprocessed_image = gr.Image(label="Preprocessed Image", type="pil")

	prompt = gr.Textbox(
	label="Prompt", placeholder="Enter your prompt", value="high quality"
	)
	do_rembg = gr.Checkbox(label="Remove background", value=True)
	run_button = gr.Button("Run")

	with gr.Accordion("Advanced Settings", open=False):
	seed = gr.Slider(
	label="Seed",
	minimum=0,
	maximum=MAX_SEED,
	step=1,
	value=0,
	)
	randomize_seed = gr.Checkbox(label="Randomize seed", value=True)

	with gr.Row():
	num_inference_steps = gr.Slider(
	label="Number of inference steps",
	minimum=1,
	maximum=50,
	step=1,
	value=30,
	)

	with gr.Row():
	guidance_scale = gr.Slider(
	label="CFG scale",
	minimum=0.0,
	maximum=10.0,
	step=0.1,
	value=3.0,
	)

	with gr.Row():
	reference_conditioning_scale = gr.Slider(
	label="Image conditioning scale",
	minimum=0.0,
	maximum=2.0,
	step=0.1,
	value=1.0,
	)

	with gr.Row():
	negative_prompt = gr.Textbox(
	label="Negative prompt",
	placeholder="Enter your negative prompt",
	value="watermark, ugly, deformed, noisy, blurry, low contrast",
	)

	with gr.Column():
	result = gr.Gallery(
	label="Result",
	show_label=False,
	columns=[3],
	rows=[2],
	object_fit="contain",
	height="auto",
	)

	with gr.Row():
	gr.Examples(
	examples=examples,
	fn=infer,
	inputs=[prompt, input_image, do_rembg, seed],
	outputs=[result, preprocessed_image, seed],
	cache_examples=True,
	)

	gr.on(
	triggers=[run_button.click, prompt.submit],
	fn=infer,
	inputs=[
	prompt,
	input_image,
	do_rembg,
	seed,
	randomize_seed,
	guidance_scale,
	num_inference_steps,
	reference_conditioning_scale,
	negative_prompt,
	],
	outputs=[result, preprocessed_image, seed],
	)

	demo.launch()