Spaces:
Build error
Build error
Update app_dialogue.py
Browse files- app_dialogue.py +85 -131
app_dialogue.py
CHANGED
@@ -70,13 +70,20 @@ import tempfile
|
|
70 |
|
71 |
|
72 |
def convert_to_rgb_pil(image):
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
# Save the converted image to a temporary file
|
78 |
-
#temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
|
79 |
-
#temp_file_path = temp_file.name
|
80 |
filename = f"{uuid.uuid4()}.jpg"
|
81 |
local_path = f"{filename}"
|
82 |
|
@@ -89,16 +96,30 @@ def convert_to_rgb_pil(image):
|
|
89 |
else:
|
90 |
image.save(local_path)
|
91 |
|
92 |
-
#temp_file.close()
|
93 |
-
print(f"# Return the path to the saved image as - {local_path}")
|
94 |
return local_path # Return the path to the saved image
|
95 |
|
96 |
|
97 |
def convert_to_rgb(filepath_or_pilimg):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
# `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
|
99 |
# for transparent images. The call to `alpha_composite` handles this case
|
100 |
-
print(f"***** convert_to_rgb ******")
|
101 |
-
print(f"params: image is - {filepath_or_pilimg}")
|
102 |
|
103 |
if isinstance(filepath_or_pilimg, PIL.Image.Image):
|
104 |
return convert_to_rgb_pil(filepath_or_pilimg)
|
@@ -123,26 +144,15 @@ def convert_to_rgb(filepath_or_pilimg):
|
|
123 |
# Save the converted image to a temporary file
|
124 |
filename = f"{uuid.uuid4()}.jpg"
|
125 |
local_path = f"{filename}"
|
126 |
-
#temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
|
127 |
-
#temp_file_path = temp_file.name
|
128 |
alpha_composite.save(local_path)
|
129 |
-
#temp_file.close()
|
130 |
|
131 |
-
print(f"# Return the path to the saved image as - {local_path}")
|
132 |
return local_path # Return the path to the saved image
|
133 |
|
|
|
134 |
def pil_to_markdown_im(image):
|
135 |
"""
|
136 |
Convert a PIL image into markdown filled with the base64 string representation.
|
137 |
"""
|
138 |
-
print(f"***** pil_to_markdown_im ******")
|
139 |
-
print(f"params: image is - {image}")
|
140 |
-
#if isinstance(image, PIL.Image.Image):
|
141 |
-
#img_b64_str = pil_to_base64(image)
|
142 |
-
#img_str = f'<img src="data:image/png;base64,{img_b64_str}" />'
|
143 |
-
#if path_or_url.startswith(("http://", "https://")):
|
144 |
-
#response = requests.get(image)
|
145 |
-
#image = Image.open(BytesIO(response.content))
|
146 |
# Generate a unique filename using UUID
|
147 |
filename = f"{uuid.uuid4()}.jpg"
|
148 |
local_path = f"{filename}"
|
@@ -156,7 +166,7 @@ def base64_to_pil(encoded_image):
|
|
156 |
pil_image = Image.open(BytesIO(decoded_image))
|
157 |
return pil_image
|
158 |
|
159 |
-
|
160 |
def im_markdown_to_pil(im_markdown_str):
|
161 |
pattern = r'<img src="data:image/png;base64,([^"]+)" />'
|
162 |
match = re.search(pattern, im_markdown_str)
|
@@ -218,33 +228,20 @@ def isolate_images_urls(prompt_list):
|
|
218 |
]
|
219 |
```
|
220 |
"""
|
221 |
-
print(f"******* isolate_images_urls *******")
|
222 |
-
print(f"params: prompt_list is - {prompt_list}")
|
223 |
|
224 |
linearized_list = []
|
225 |
for prompt in prompt_list:
|
226 |
-
print(f"inside FOR loop: prompt in prompt_list is - {prompt}")
|
227 |
# Prompt can be either a string, or a PIL image
|
228 |
if isinstance(prompt, PIL.Image.Image):
|
229 |
-
print(f"inside first IF in FOR loop: prompt is of type PIL.Image.Image")
|
230 |
linearized_list.append(prompt)
|
231 |
-
print(f"linearized_list after append is - {linearized_list}")
|
232 |
elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
|
233 |
-
print(f"inside IF in FOR loop: prompt is a string and is a path for temporary file")
|
234 |
linearized_list.append(prompt)
|
235 |
-
print(f"linearized_list after append is - {linearized_list}")
|
236 |
elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
|
237 |
-
print(f"inside ELIF in FOR loop: prompt is a string and is NOT a path for temporary file")
|
238 |
if "<fake_token_around_image>" not in prompt:
|
239 |
-
print(f"inside IF inside ELIF in FOR loop: '<fake_token_around_image>' is NOT in prompt")
|
240 |
linearized_list.append(prompt)
|
241 |
-
print(f"linearized_list after append is - {linearized_list}")
|
242 |
else:
|
243 |
-
print(f"inside ELSE inside ELIF in FOR loop: '<fake_token_around_image>' IS IN prompt")
|
244 |
prompt_splitted = prompt.split("<fake_token_around_image>")
|
245 |
-
print(f"prompt_splitted is - {prompt_splitted}")
|
246 |
for ps in prompt_splitted:
|
247 |
-
print(f"Inside FOR loop inside FOR loop: ps in prompt_split is {ps}")
|
248 |
if ps == "":
|
249 |
continue
|
250 |
if ps.startswith("<image:"):
|
@@ -256,7 +253,6 @@ def isolate_images_urls(prompt_list):
|
|
256 |
f"Unrecognized type for `prompt`. Got {type(type(prompt))}. Was expecting something in [`str`,"
|
257 |
" `PIL.Image.Image`]"
|
258 |
)
|
259 |
-
print(f"linearized_list to be returned is - {linearized_list}")
|
260 |
return linearized_list
|
261 |
|
262 |
|
@@ -285,31 +281,20 @@ def user_prompt_list_to_markdown(user_prompt_list: List[Union[str, PIL.Image.Ima
|
|
285 |
Convert a user prompt in the list format (i.e. elements are either a PIL image or a string) into
|
286 |
the markdown format that is used for the chatbot history and rendering.
|
287 |
"""
|
288 |
-
print("********** user_prompt_list_to_markdown *********")
|
289 |
-
print(f" param : user_prompt_list is - {user_prompt_list}")
|
290 |
resulting_string = ""
|
291 |
for elem in user_prompt_list:
|
292 |
-
print(f"inside user_prompt_list_to_markdown, for loop on user_prompt_list")
|
293 |
-
print(f"elem is - {elem} ")
|
294 |
if isinstance(elem, str):
|
295 |
if "/tmp/gradio/" not in elem:
|
296 |
resulting_string += elem
|
297 |
-
print(f"inside IF - when elem is string and is not temp image filepath. resulting_string is - {resulting_string}")
|
298 |
elif "/tmp/gradio/" in elem:
|
299 |
resulting_string += f"![](/file={convert_to_rgb(elem)})"
|
300 |
-
print(f"inside IF - when elem is string and is a temp image filepath. resulting_string is - {resulting_string}")
|
301 |
-
#elif isinstance(elem, str) and "/tmp/gradio/" in elem:
|
302 |
-
# resulting_string += f"![](/file={convert_to_rgb(elem)})" #f"![](/file={image})"
|
303 |
-
# print(f"inside first ELIF - when elem is string and is the temp image filepath. resulting_string is - {resulting_string}")
|
304 |
elif isinstance(elem, PIL.Image.Image): #or "/tmp/gradio/" in elem: #and "/tmp/gradio/" in elem:
|
305 |
resulting_string += f"![](/file={convert_to_rgb(elem)})" #pil_to_markdown_im(convert_to_rgb(elem)) <---------------
|
306 |
-
print(f"inside the ELIF - when elem is an instance of PIL.Image.Image. The resulting_string after convert_to_rgb() operation is - {resulting_string}")
|
307 |
else:
|
308 |
raise ValueError(
|
309 |
"Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
|
310 |
f" `{type(elem)}`"
|
311 |
)
|
312 |
-
print(f" final resulting_string that will be returned is - {resulting_string}")
|
313 |
return resulting_string
|
314 |
|
315 |
|
@@ -348,8 +333,6 @@ def load_processor_tokenizer_model(model_name):
|
|
348 |
max_memory=max_memory_map,
|
349 |
)
|
350 |
model.eval()
|
351 |
-
print("Current device map:", model.hf_device_map)
|
352 |
-
print("Model default generation config:", model.generation_config)
|
353 |
# TODO: the device_map looks very inefficien right now. that could be improved
|
354 |
return processor, tokenizer, model
|
355 |
|
@@ -361,60 +344,41 @@ def format_user_prompt_with_im_history_and_system_conditioning(
|
|
361 |
Produces the resulting list that needs to go inside the processor.
|
362 |
It handles the potential image box input, the history and the system conditionning.
|
363 |
"""
|
364 |
-
print(f"*********format_user_prompt_with_im_history_and_system_conditioning*********")
|
365 |
-
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_user_prompt_str is - {current_user_prompt_str} ")
|
366 |
-
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_image is - {current_image} ")
|
367 |
-
print(f"format_user_prompt_with_im_history_and_system_conditioning -- param history is - {history} ")
|
368 |
|
369 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
370 |
|
371 |
# Format history
|
372 |
for turn in history:
|
373 |
-
print(f"inside for loop, turn is - {turn}")
|
374 |
user_utterance, assistant_utterance = turn
|
375 |
-
print("calling split_str_on_im_markdown from inside for loop inside format_user_prompt_with_im_history_and_system_conditioning")
|
376 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
]
|
383 |
-
print(f"splitted_user_utterance after im_markdown_to_pil() is - {splitted_user_utterance} ")
|
384 |
|
385 |
if isinstance(splitted_user_utterance[0], str):
|
386 |
resulting_list.append("\nUser: ")
|
387 |
else:
|
388 |
resulting_list.append("\nUser:")
|
389 |
-
print(f"resulting_list after if..else block is - {resulting_list}")
|
390 |
resulting_list.extend(splitted_user_utterance)
|
391 |
-
print(f"resulting_list after extend is - {resulting_list}")
|
392 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
393 |
-
print(f"resulting_list after append is - {resulting_list}")
|
394 |
|
395 |
|
396 |
# Format current input
|
397 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
398 |
-
print(f"current_user_prompt_str is - {current_user_prompt_str}")
|
399 |
|
400 |
if current_image is None:
|
401 |
-
print("inside IF : current_image is NONE")
|
402 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
403 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
404 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
405 |
-
print(f"current_user_prompt_list (or [user_prompt]/resulting_user_prompt((most likely this one)) from handle_manual_images_in_user_prompt ) is - {current_user_prompt_list}")
|
406 |
resulting_list.append("\nUser: ")
|
407 |
-
print(f"resulting_list with append user - {resulting_list}")
|
408 |
resulting_list.extend(current_user_prompt_list)
|
409 |
-
print(f"resulting_list after extend with current_user_prompt_list is - {resulting_list}")
|
410 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
411 |
-
print(f"resulting_list after append with end_of_utteranceAssistant is - {resulting_list}")
|
412 |
return resulting_list, current_user_prompt_list
|
413 |
else:
|
414 |
-
|
415 |
-
# Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
|
416 |
resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
|
417 |
-
print(f"final resulting_list passed on to calling function is - {resulting_list}")
|
418 |
return resulting_list, [current_user_prompt_str]
|
419 |
|
420 |
|
@@ -836,11 +800,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
836 |
penalty_alpha,
|
837 |
):
|
838 |
# global processor, model, tokenizer
|
839 |
-
print("***********Model_inference*************")
|
840 |
-
print(f"Inside Model_inference, user_prompt_str is - {user_prompt_str} ")
|
841 |
-
print(f"Inside Model_inference, chat_history is - {chat_history} ")
|
842 |
-
print(f"Inside Model_inference, image type is - {type(image)} ")
|
843 |
-
print(f"Inside Model_inference, image is - {image} ")
|
844 |
|
845 |
force_words = ""
|
846 |
hide_special_tokens = False
|
@@ -851,9 +810,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
851 |
history=chat_history,
|
852 |
)
|
853 |
|
854 |
-
print(f"formated_prompt_list (or resulting_list) is {formated_prompt_list}")
|
855 |
-
print(f"user_prompt_list (or [current_user_prompt_str]) is {user_prompt_list}")
|
856 |
-
|
857 |
generated_text = model_generation(
|
858 |
prompt_list=formated_prompt_list,
|
859 |
processor=processor,
|
@@ -881,7 +837,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
881 |
chat_history.append(
|
882 |
(user_prompt_list_to_markdown(user_prompt_list), generated_text.strip("<end_of_utterance>"))
|
883 |
)
|
884 |
-
print(f"chat_history (IF image is None or is with fake token) is -{chat_history}")
|
885 |
else:
|
886 |
# Case where the image is passed through the Image Box.
|
887 |
# Convert the image into base64 for both passing it through the chat history and
|
@@ -892,7 +847,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
892 |
generated_text.strip("<end_of_utterance>"),
|
893 |
)
|
894 |
)
|
895 |
-
print(f"chat_history (ELSE IF image is available) is -{chat_history}")
|
896 |
return "", None, chat_history
|
897 |
|
898 |
|
@@ -1045,52 +999,52 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
|
|
1045 |
examples=[
|
1046 |
["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
|
1047 |
[
|
1048 |
-
|
1049 |
-
|
1050 |
-
|
1051 |
-
["Can you describe the image?", f"{examples_path}/example_images/bear_costume.png"],
|
1052 |
-
["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
|
1053 |
-
[
|
1054 |
-
|
1055 |
-
|
1056 |
-
],
|
1057 |
-
["What is this sketch for? How would you make an argument to prove this sketch was made by Picasso himself?", f"{examples_path}/example_images/cat_sketch.png"],
|
1058 |
-
["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
|
1059 |
-
[
|
1060 |
-
|
1061 |
-
|
1062 |
-
],
|
1063 |
-
[
|
1064 |
-
|
1065 |
-
|
1066 |
-
],
|
1067 |
-
["Can you describe this image in details please?", f"{examples_path}/example_images/dragons_playing.png"],
|
1068 |
-
["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
|
1069 |
-
[
|
1070 |
-
|
1071 |
-
|
1072 |
-
],
|
1073 |
-
[
|
1074 |
-
|
1075 |
-
|
1076 |
-
],
|
1077 |
-
["What is happening in this image and why is it unusual?", f"{examples_path}/example_images/ramen.png"],
|
1078 |
-
[
|
1079 |
-
|
1080 |
-
|
1081 |
-
],
|
1082 |
-
["Who is the person in the image and what is he doing?", f"{examples_path}/example_images/tom-cruise-astronaut-pegasus.jpg"],
|
1083 |
-
[
|
1084 |
-
|
1085 |
-
|
1086 |
-
],
|
1087 |
-
[
|
1088 |
-
|
1089 |
-
|
1090 |
-
|
1091 |
-
|
1092 |
-
|
1093 |
-
],
|
1094 |
],
|
1095 |
inputs=[textbox, imagebox],
|
1096 |
outputs=[textbox, imagebox, chatbot],
|
|
|
70 |
|
71 |
|
72 |
def convert_to_rgb_pil(image):
|
73 |
+
"""
|
74 |
+
Convert a PIL Image object to RGB mode and save it locally.
|
75 |
+
|
76 |
+
The function ensures that images with transparency (alpha channel)
|
77 |
+
are overlaid on a white background before saving.
|
78 |
+
|
79 |
+
Parameters:
|
80 |
+
- image (PIL.Image.Image): The input image to be processed.
|
81 |
+
|
82 |
+
Returns:
|
83 |
+
- str: The path to the saved RGB image.
|
84 |
+
|
85 |
+
"""
|
86 |
# Save the converted image to a temporary file
|
|
|
|
|
87 |
filename = f"{uuid.uuid4()}.jpg"
|
88 |
local_path = f"{filename}"
|
89 |
|
|
|
96 |
else:
|
97 |
image.save(local_path)
|
98 |
|
|
|
|
|
99 |
return local_path # Return the path to the saved image
|
100 |
|
101 |
|
102 |
def convert_to_rgb(filepath_or_pilimg):
|
103 |
+
"""
|
104 |
+
Convert an image to RGB mode, handling transparency for non-RGB images.
|
105 |
+
|
106 |
+
This function can accept either a file path to an image or a PIL Image object.
|
107 |
+
For transparent images, the function overlays the image onto a white background
|
108 |
+
to handle the transparency before converting it to RGB mode.
|
109 |
+
|
110 |
+
Parameters:
|
111 |
+
- filepath_or_pilimg (str or PIL.Image.Image): The file path to an image or a PIL
|
112 |
+
Image object to be processed.
|
113 |
+
|
114 |
+
Returns:
|
115 |
+
- str: If the input was a file path, the return will be the path to the original
|
116 |
+
image (if it's already in RGB) or the path to the saved RGB image.
|
117 |
+
If the input was a PIL Image object, the return will be the path to the saved
|
118 |
+
RGB image.
|
119 |
+
|
120 |
+
"""
|
121 |
# `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
|
122 |
# for transparent images. The call to `alpha_composite` handles this case
|
|
|
|
|
123 |
|
124 |
if isinstance(filepath_or_pilimg, PIL.Image.Image):
|
125 |
return convert_to_rgb_pil(filepath_or_pilimg)
|
|
|
144 |
# Save the converted image to a temporary file
|
145 |
filename = f"{uuid.uuid4()}.jpg"
|
146 |
local_path = f"{filename}"
|
|
|
|
|
147 |
alpha_composite.save(local_path)
|
|
|
148 |
|
|
|
149 |
return local_path # Return the path to the saved image
|
150 |
|
151 |
+
# XXXX REMOVE
|
152 |
def pil_to_markdown_im(image):
|
153 |
"""
|
154 |
Convert a PIL image into markdown filled with the base64 string representation.
|
155 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
# Generate a unique filename using UUID
|
157 |
filename = f"{uuid.uuid4()}.jpg"
|
158 |
local_path = f"{filename}"
|
|
|
166 |
pil_image = Image.open(BytesIO(decoded_image))
|
167 |
return pil_image
|
168 |
|
169 |
+
# XXXXX REMOVE
|
170 |
def im_markdown_to_pil(im_markdown_str):
|
171 |
pattern = r'<img src="data:image/png;base64,([^"]+)" />'
|
172 |
match = re.search(pattern, im_markdown_str)
|
|
|
228 |
]
|
229 |
```
|
230 |
"""
|
|
|
|
|
231 |
|
232 |
linearized_list = []
|
233 |
for prompt in prompt_list:
|
|
|
234 |
# Prompt can be either a string, or a PIL image
|
235 |
if isinstance(prompt, PIL.Image.Image):
|
|
|
236 |
linearized_list.append(prompt)
|
|
|
237 |
elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
|
|
|
238 |
linearized_list.append(prompt)
|
|
|
239 |
elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
|
|
|
240 |
if "<fake_token_around_image>" not in prompt:
|
|
|
241 |
linearized_list.append(prompt)
|
|
|
242 |
else:
|
|
|
243 |
prompt_splitted = prompt.split("<fake_token_around_image>")
|
|
|
244 |
for ps in prompt_splitted:
|
|
|
245 |
if ps == "":
|
246 |
continue
|
247 |
if ps.startswith("<image:"):
|
|
|
253 |
f"Unrecognized type for `prompt`. Got {type(type(prompt))}. Was expecting something in [`str`,"
|
254 |
" `PIL.Image.Image`]"
|
255 |
)
|
|
|
256 |
return linearized_list
|
257 |
|
258 |
|
|
|
281 |
Convert a user prompt in the list format (i.e. elements are either a PIL image or a string) into
|
282 |
the markdown format that is used for the chatbot history and rendering.
|
283 |
"""
|
|
|
|
|
284 |
resulting_string = ""
|
285 |
for elem in user_prompt_list:
|
|
|
|
|
286 |
if isinstance(elem, str):
|
287 |
if "/tmp/gradio/" not in elem:
|
288 |
resulting_string += elem
|
|
|
289 |
elif "/tmp/gradio/" in elem:
|
290 |
resulting_string += f"![](/file={convert_to_rgb(elem)})"
|
|
|
|
|
|
|
|
|
291 |
elif isinstance(elem, PIL.Image.Image): #or "/tmp/gradio/" in elem: #and "/tmp/gradio/" in elem:
|
292 |
resulting_string += f"![](/file={convert_to_rgb(elem)})" #pil_to_markdown_im(convert_to_rgb(elem)) <---------------
|
|
|
293 |
else:
|
294 |
raise ValueError(
|
295 |
"Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
|
296 |
f" `{type(elem)}`"
|
297 |
)
|
|
|
298 |
return resulting_string
|
299 |
|
300 |
|
|
|
333 |
max_memory=max_memory_map,
|
334 |
)
|
335 |
model.eval()
|
|
|
|
|
336 |
# TODO: the device_map looks very inefficien right now. that could be improved
|
337 |
return processor, tokenizer, model
|
338 |
|
|
|
344 |
Produces the resulting list that needs to go inside the processor.
|
345 |
It handles the potential image box input, the history and the system conditionning.
|
346 |
"""
|
|
|
|
|
|
|
|
|
347 |
|
348 |
resulting_list = copy.deepcopy(SYSTEM_PROMPT)
|
349 |
|
350 |
# Format history
|
351 |
for turn in history:
|
|
|
352 |
user_utterance, assistant_utterance = turn
|
|
|
353 |
splitted_user_utterance = split_str_on_im_markdown(user_utterance)
|
354 |
+
#splitted_user_utterance = [
|
355 |
+
# im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
|
356 |
+
# for s in splitted_user_utterance
|
357 |
+
# if s != ""
|
358 |
+
#]
|
|
|
|
|
359 |
|
360 |
if isinstance(splitted_user_utterance[0], str):
|
361 |
resulting_list.append("\nUser: ")
|
362 |
else:
|
363 |
resulting_list.append("\nUser:")
|
|
|
364 |
resulting_list.extend(splitted_user_utterance)
|
|
|
365 |
resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
|
|
|
366 |
|
367 |
|
368 |
# Format current input
|
369 |
current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
|
|
|
370 |
|
371 |
if current_image is None:
|
|
|
372 |
if "<img src=data:image/png;base64" in current_user_prompt_str:
|
373 |
raise ValueError("The UI does not support inputing via the text box an image in base64.")
|
374 |
current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
|
|
|
375 |
resulting_list.append("\nUser: ")
|
|
|
376 |
resulting_list.extend(current_user_prompt_list)
|
|
|
377 |
resulting_list.append("<end_of_utterance>\nAssistant:")
|
|
|
378 |
return resulting_list, current_user_prompt_list
|
379 |
else:
|
380 |
+
# Choosing to put the image first when the image is inputted through the UI, but this is an arbitrary choice.
|
|
|
381 |
resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
|
|
|
382 |
return resulting_list, [current_user_prompt_str]
|
383 |
|
384 |
|
|
|
800 |
penalty_alpha,
|
801 |
):
|
802 |
# global processor, model, tokenizer
|
|
|
|
|
|
|
|
|
|
|
803 |
|
804 |
force_words = ""
|
805 |
hide_special_tokens = False
|
|
|
810 |
history=chat_history,
|
811 |
)
|
812 |
|
|
|
|
|
|
|
813 |
generated_text = model_generation(
|
814 |
prompt_list=formated_prompt_list,
|
815 |
processor=processor,
|
|
|
837 |
chat_history.append(
|
838 |
(user_prompt_list_to_markdown(user_prompt_list), generated_text.strip("<end_of_utterance>"))
|
839 |
)
|
|
|
840 |
else:
|
841 |
# Case where the image is passed through the Image Box.
|
842 |
# Convert the image into base64 for both passing it through the chat history and
|
|
|
847 |
generated_text.strip("<end_of_utterance>"),
|
848 |
)
|
849 |
)
|
|
|
850 |
return "", None, chat_history
|
851 |
|
852 |
|
|
|
999 |
examples=[
|
1000 |
["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
|
1001 |
[
|
1002 |
+
"Can you tell me a very short story based on this image?",
|
1003 |
+
f"{examples_path}/example_images/chicken_on_money.png",
|
1004 |
+
],
|
1005 |
+
# ["Can you describe the image?", f"{examples_path}/example_images/bear_costume.png"],
|
1006 |
+
# ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
|
1007 |
+
# [
|
1008 |
+
# "What is this object and do you think it is horrifying?",
|
1009 |
+
# f"{examples_path}/example_images/can_horror.png",
|
1010 |
+
# ],
|
1011 |
+
# ["What is this sketch for? How would you make an argument to prove this sketch was made by Picasso himself?", f"{examples_path}/example_images/cat_sketch.png"],
|
1012 |
+
# ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
|
1013 |
+
# [
|
1014 |
+
# "Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?",
|
1015 |
+
# f"{examples_path}/example_images/obama-harry-potter.jpg",
|
1016 |
+
# ],
|
1017 |
+
# [
|
1018 |
+
# "Is there a celebrity look-alike in this image? What is happening to the person?",
|
1019 |
+
# f"{examples_path}/example_images/ryan-reynolds-borg.jpg",
|
1020 |
+
# ],
|
1021 |
+
# ["Can you describe this image in details please?", f"{examples_path}/example_images/dragons_playing.png"],
|
1022 |
+
# ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
|
1023 |
+
# [
|
1024 |
+
# "Can you write an advertisement for Coca-Cola based on this image?",
|
1025 |
+
# f"{examples_path}/example_images/polar_bear_coke.png",
|
1026 |
+
# ],
|
1027 |
+
# [
|
1028 |
+
# "What is the rabbit doing in this image? Do you think this image is real?",
|
1029 |
+
# f"{examples_path}/example_images/rabbit_force.png",
|
1030 |
+
# ],
|
1031 |
+
# ["What is happening in this image and why is it unusual?", f"{examples_path}/example_images/ramen.png"],
|
1032 |
+
# [
|
1033 |
+
# "What I should look most forward to when I visit this place?",
|
1034 |
+
# f"{examples_path}/example_images/tree_fortress.jpg",
|
1035 |
+
# ],
|
1036 |
+
# ["Who is the person in the image and what is he doing?", f"{examples_path}/example_images/tom-cruise-astronaut-pegasus.jpg"],
|
1037 |
+
# [
|
1038 |
+
# "What is happening in this image? Which famous personality does this person in center looks like?",
|
1039 |
+
# f"{examples_path}/example_images/gandhi_selfie.jpg",
|
1040 |
+
# ],
|
1041 |
+
# [
|
1042 |
+
# (
|
1043 |
+
# "<fake_token_around_image><image:https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/stable-diffusion-xl-coreml/a_high_quality_photo_of_a_surfing_dog.7667.final_float16_original.jpg><fake_token_around_image>What"
|
1044 |
+
# " do you think the dog is doing and is it unusual?"
|
1045 |
+
# ),
|
1046 |
+
# None,
|
1047 |
+
# ],
|
1048 |
],
|
1049 |
inputs=[textbox, imagebox],
|
1050 |
outputs=[textbox, imagebox, chatbot],
|