Ashoka74 commited on
Commit
26e0ccb
·
verified ·
1 Parent(s): e401952

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +214 -0
app.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+ import random
3
+
4
+ import gradio as gr
5
+ import numpy as np
6
+
7
+ # import spaces
8
+ import torch
9
+ from torchvision import transforms
10
+ from transformers import AutoModelForImageSegmentation
11
+
12
+ from .inference_i2mv_sdxl import prepare_pipeline, remove_bg, run_pipeline
13
+
14
+ # Device and dtype
15
+ dtype = torch.bfloat16
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+
18
+ # Hyperparameters
19
+ NUM_VIEWS = 6
20
+ HEIGHT = 768
21
+ WIDTH = 768
22
+ MAX_SEED = np.iinfo(np.int32).max
23
+
24
+ pipe = prepare_pipeline(
25
+ base_model="stabilityai/stable-diffusion-xl-base-1.0",
26
+ vae_model="madebyollin/sdxl-vae-fp16-fix",
27
+ unet_model=None,
28
+ lora_model=None,
29
+ adapter_path="huanngzh/mv-adapter",
30
+ scheduler=None,
31
+ num_views=NUM_VIEWS,
32
+ device=device,
33
+ dtype=dtype,
34
+ )
35
+
36
+ # remove bg
37
+ birefnet = AutoModelForImageSegmentation.from_pretrained(
38
+ "ZhengPeng7/BiRefNet", trust_remote_code=True
39
+ )
40
+ birefnet.to(device)
41
+ transform_image = transforms.Compose(
42
+ [
43
+ transforms.Resize((1024, 1024)),
44
+ transforms.ToTensor(),
45
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
46
+ ]
47
+ )
48
+
49
+
50
+ # @spaces.GPU()
51
+ def infer(
52
+ prompt,
53
+ image,
54
+ do_rembg=True,
55
+ seed=42,
56
+ randomize_seed=False,
57
+ guidance_scale=3.0,
58
+ num_inference_steps=50,
59
+ reference_conditioning_scale=1.0,
60
+ negative_prompt="watermark, ugly, deformed, noisy, blurry, low contrast",
61
+ progress=gr.Progress(track_tqdm=True),
62
+ ):
63
+ if do_rembg:
64
+ remove_bg_fn = lambda x: remove_bg(x, birefnet, transform_image, device)
65
+ else:
66
+ remove_bg_fn = None
67
+ if randomize_seed:
68
+ seed = random.randint(0, MAX_SEED)
69
+ images, preprocessed_image = run_pipeline(
70
+ pipe,
71
+ num_views=NUM_VIEWS,
72
+ text=prompt,
73
+ image=image,
74
+ height=HEIGHT,
75
+ width=WIDTH,
76
+ num_inference_steps=num_inference_steps,
77
+ guidance_scale=guidance_scale,
78
+ seed=seed,
79
+ remove_bg_fn=remove_bg_fn,
80
+ reference_conditioning_scale=reference_conditioning_scale,
81
+ negative_prompt=negative_prompt,
82
+ device=device,
83
+ )
84
+ return images, preprocessed_image, seed
85
+
86
+
87
+ examples = [
88
+ [
89
+ "A decorative figurine of a young anime-style girl",
90
+ "assets/demo/i2mv/A_decorative_figurine_of_a_young_anime-style_girl.png",
91
+ True,
92
+ 21,
93
+ ],
94
+ [
95
+ "A juvenile emperor penguin chick",
96
+ "assets/demo/i2mv/A_juvenile_emperor_penguin_chick.png",
97
+ True,
98
+ 0,
99
+ ],
100
+ [
101
+ "A striped tabby cat with white fur sitting upright",
102
+ "assets/demo/i2mv/A_striped_tabby_cat_with_white_fur_sitting_upright.png",
103
+ True,
104
+ 0,
105
+ ],
106
+ ]
107
+
108
+
109
+ with gr.Blocks() as demo:
110
+ with gr.Row():
111
+ gr.Markdown(
112
+ f"""# MV-Adapter [Image-to-Multi-View]
113
+ Generate 768x768 multi-view images from a single image using SDXL <br>
114
+ [[page](https://huanngzh.github.io/MV-Adapter-Page/)] [[repo](https://github.com/huanngzh/MV-Adapter)]
115
+ """
116
+ )
117
+
118
+ with gr.Row():
119
+ with gr.Column():
120
+ with gr.Row():
121
+ input_image = gr.Image(
122
+ label="Input Image",
123
+ sources=["upload", "webcam", "clipboard"],
124
+ type="pil",
125
+ )
126
+ preprocessed_image = gr.Image(label="Preprocessed Image", type="pil")
127
+
128
+ prompt = gr.Textbox(
129
+ label="Prompt", placeholder="Enter your prompt", value="high quality"
130
+ )
131
+ do_rembg = gr.Checkbox(label="Remove background", value=True)
132
+ run_button = gr.Button("Run")
133
+
134
+ with gr.Accordion("Advanced Settings", open=False):
135
+ seed = gr.Slider(
136
+ label="Seed",
137
+ minimum=0,
138
+ maximum=MAX_SEED,
139
+ step=1,
140
+ value=0,
141
+ )
142
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
143
+
144
+ with gr.Row():
145
+ num_inference_steps = gr.Slider(
146
+ label="Number of inference steps",
147
+ minimum=1,
148
+ maximum=50,
149
+ step=1,
150
+ value=50,
151
+ )
152
+
153
+ with gr.Row():
154
+ guidance_scale = gr.Slider(
155
+ label="CFG scale",
156
+ minimum=0.0,
157
+ maximum=10.0,
158
+ step=0.1,
159
+ value=3.0,
160
+ )
161
+
162
+ with gr.Row():
163
+ reference_conditioning_scale = gr.Slider(
164
+ label="Image conditioning scale",
165
+ minimum=0.0,
166
+ maximum=2.0,
167
+ step=0.1,
168
+ value=1.0,
169
+ )
170
+
171
+ with gr.Row():
172
+ negative_prompt = gr.Textbox(
173
+ label="Negative prompt",
174
+ placeholder="Enter your negative prompt",
175
+ value="watermark, ugly, deformed, noisy, blurry, low contrast",
176
+ )
177
+
178
+ with gr.Column():
179
+ result = gr.Gallery(
180
+ label="Result",
181
+ show_label=False,
182
+ columns=[3],
183
+ rows=[2],
184
+ object_fit="contain",
185
+ height="auto",
186
+ )
187
+
188
+ with gr.Row():
189
+ gr.Examples(
190
+ examples=examples,
191
+ fn=infer,
192
+ inputs=[prompt, input_image, do_rembg, seed],
193
+ outputs=[result, preprocessed_image, seed],
194
+ cache_examples=True,
195
+ )
196
+
197
+ gr.on(
198
+ triggers=[run_button.click, prompt.submit],
199
+ fn=infer,
200
+ inputs=[
201
+ prompt,
202
+ input_image,
203
+ do_rembg,
204
+ seed,
205
+ randomize_seed,
206
+ guidance_scale,
207
+ num_inference_steps,
208
+ reference_conditioning_scale,
209
+ negative_prompt,
210
+ ],
211
+ outputs=[result, preprocessed_image, seed],
212
+ )
213
+
214
+ demo.launch()