Spaces:
Sleeping
Sleeping
Shanshan Wang
commited on
Commit
•
1757eeb
1
Parent(s):
cbfb2ad
added a few more options
Browse files
app.py
CHANGED
@@ -26,7 +26,6 @@ def build_transform(input_size):
|
|
26 |
])
|
27 |
return transform
|
28 |
|
29 |
-
|
30 |
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
|
31 |
best_ratio_diff = float('inf')
|
32 |
best_ratio = (1, 1)
|
@@ -41,6 +40,7 @@ def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_
|
|
41 |
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
|
42 |
best_ratio = ratio
|
43 |
return best_ratio
|
|
|
44 |
def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_thumbnail=False):
|
45 |
orig_width, orig_height = image.size
|
46 |
aspect_ratio = orig_width / orig_height
|
@@ -79,7 +79,6 @@ def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_thumbnai
|
|
79 |
processed_images.append(thumbnail_img)
|
80 |
return processed_images, target_aspect_ratio
|
81 |
|
82 |
-
|
83 |
def dynamic_preprocess2(image, min_num=1, max_num=6, image_size=448, use_thumbnail=False, prior_aspect_ratio=None):
|
84 |
orig_width, orig_height = image.size
|
85 |
aspect_ratio = orig_width / orig_height
|
@@ -175,7 +174,7 @@ tokenizer.eos_token = "<|end|>"
|
|
175 |
model.generation_config.pad_token_id = tokenizer.pad_token_id
|
176 |
|
177 |
|
178 |
-
def inference(image, prompt):
|
179 |
# Check if both image and prompt are provided
|
180 |
if image is None or prompt.strip() == "":
|
181 |
return "Please provide both an image and a prompt."
|
@@ -188,6 +187,8 @@ def inference(image, prompt):
|
|
188 |
num_beams=1,
|
189 |
max_new_tokens=2048,
|
190 |
do_sample=False,
|
|
|
|
|
191 |
)
|
192 |
|
193 |
# Generate the response
|
@@ -200,6 +201,7 @@ def inference(image, prompt):
|
|
200 |
|
201 |
return response
|
202 |
|
|
|
203 |
# Build the Gradio interface
|
204 |
with gr.Blocks() as demo:
|
205 |
gr.Markdown("H2O-Mississippi")
|
@@ -207,17 +209,29 @@ with gr.Blocks() as demo:
|
|
207 |
with gr.Row():
|
208 |
image_input = gr.Image(type="pil", label="Upload an Image")
|
209 |
prompt_input = gr.Textbox(label="Enter your prompt here")
|
|
|
|
|
|
|
|
|
210 |
|
211 |
response_output = gr.Textbox(label="Model Response")
|
212 |
|
213 |
with gr.Row():
|
214 |
submit_button = gr.Button("Submit")
|
|
|
215 |
clear_button = gr.Button("Clear")
|
|
|
216 |
|
217 |
# When the submit button is clicked, call the inference function
|
218 |
submit_button.click(
|
219 |
fn=inference,
|
220 |
-
inputs=[image_input, prompt_input],
|
|
|
|
|
|
|
|
|
|
|
|
|
221 |
outputs=response_output
|
222 |
)
|
223 |
|
|
|
26 |
])
|
27 |
return transform
|
28 |
|
|
|
29 |
def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):
|
30 |
best_ratio_diff = float('inf')
|
31 |
best_ratio = (1, 1)
|
|
|
40 |
if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:
|
41 |
best_ratio = ratio
|
42 |
return best_ratio
|
43 |
+
|
44 |
def dynamic_preprocess(image, min_num=1, max_num=6, image_size=448, use_thumbnail=False):
|
45 |
orig_width, orig_height = image.size
|
46 |
aspect_ratio = orig_width / orig_height
|
|
|
79 |
processed_images.append(thumbnail_img)
|
80 |
return processed_images, target_aspect_ratio
|
81 |
|
|
|
82 |
def dynamic_preprocess2(image, min_num=1, max_num=6, image_size=448, use_thumbnail=False, prior_aspect_ratio=None):
|
83 |
orig_width, orig_height = image.size
|
84 |
aspect_ratio = orig_width / orig_height
|
|
|
174 |
model.generation_config.pad_token_id = tokenizer.pad_token_id
|
175 |
|
176 |
|
177 |
+
def inference(image, prompt, temperature, top_p):
|
178 |
# Check if both image and prompt are provided
|
179 |
if image is None or prompt.strip() == "":
|
180 |
return "Please provide both an image and a prompt."
|
|
|
187 |
num_beams=1,
|
188 |
max_new_tokens=2048,
|
189 |
do_sample=False,
|
190 |
+
temperature=temperature,
|
191 |
+
top_p=top_p,
|
192 |
)
|
193 |
|
194 |
# Generate the response
|
|
|
201 |
|
202 |
return response
|
203 |
|
204 |
+
|
205 |
# Build the Gradio interface
|
206 |
with gr.Blocks() as demo:
|
207 |
gr.Markdown("H2O-Mississippi")
|
|
|
209 |
with gr.Row():
|
210 |
image_input = gr.Image(type="pil", label="Upload an Image")
|
211 |
prompt_input = gr.Textbox(label="Enter your prompt here")
|
212 |
+
|
213 |
+
with gr.Accordion('Parameters', open=False):
|
214 |
+
temperature_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.01, default=1.0, label="Temperature")
|
215 |
+
top_p_input = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, default=0.9, label="Top-p")
|
216 |
|
217 |
response_output = gr.Textbox(label="Model Response")
|
218 |
|
219 |
with gr.Row():
|
220 |
submit_button = gr.Button("Submit")
|
221 |
+
regenerate_button = gr.Button("Regenerate")
|
222 |
clear_button = gr.Button("Clear")
|
223 |
+
|
224 |
|
225 |
# When the submit button is clicked, call the inference function
|
226 |
submit_button.click(
|
227 |
fn=inference,
|
228 |
+
inputs=[image_input, prompt_input, temperature_input, top_p_input],
|
229 |
+
outputs=response_output
|
230 |
+
)
|
231 |
+
# When the regenerate button is clicked, re-run the last inference
|
232 |
+
regenerate_button.click(
|
233 |
+
fn=inference,
|
234 |
+
inputs=[image_input, prompt_input, temperature_input, top_p_input],
|
235 |
outputs=response_output
|
236 |
)
|
237 |
|