dummy_m4

Build error

App Files Files Community

ysharma HF staff commited on Aug 14, 2023

Commit

650bccb

1 Parent(s): ddc7b1b

Update app_dialogue.py

Browse files

Files changed (1) hide show

app_dialogue.py +85 -131

app_dialogue.py CHANGED Viewed

@@ -70,13 +70,20 @@ import tempfile
 def convert_to_rgb_pil(image):
-    print(f"***** convert_to_rgb_pil ******")
-    print(f"params: image is - {image}")
-    #if image.mode == "RGB":
-    #    return image
     # Save the converted image to a temporary file
-    #temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
-    #temp_file_path = temp_file.name
     filename = f"{uuid.uuid4()}.jpg"
     local_path = f"{filename}"
@@ -89,16 +96,30 @@ def convert_to_rgb_pil(image):
     else:
         image.save(local_path)
-    #temp_file.close()
-    print(f"# Return the path to the saved image as - {local_path}")
     return local_path  # Return the path to the saved image
 def convert_to_rgb(filepath_or_pilimg):
     # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
     # for transparent images. The call to `alpha_composite` handles this case
-    print(f"***** convert_to_rgb ******")
-    print(f"params: image is - {filepath_or_pilimg}")
     if isinstance(filepath_or_pilimg, PIL.Image.Image):
         return convert_to_rgb_pil(filepath_or_pilimg)
@@ -123,26 +144,15 @@ def convert_to_rgb(filepath_or_pilimg):
         # Save the converted image to a temporary file
         filename = f"{uuid.uuid4()}.jpg"
         local_path = f"{filename}"
-        #temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".jpg")
-        #temp_file_path = temp_file.name
         alpha_composite.save(local_path)
-        #temp_file.close()
-        print(f"# Return the path to the saved image as - {local_path}")
         return local_path  # Return the path to the saved image
 def pil_to_markdown_im(image):
     """
     Convert a PIL image into markdown filled with the base64 string representation.
     """
-    print(f"***** pil_to_markdown_im ******")
-    print(f"params: image is - {image}")
-    #if isinstance(image, PIL.Image.Image):
-    #img_b64_str = pil_to_base64(image)
-    #img_str = f'<img src="data:image/png;base64,{img_b64_str}" />'
-    #if path_or_url.startswith(("http://", "https://")):
-    #response = requests.get(image)
-    #image = Image.open(BytesIO(response.content))
     # Generate a unique filename using UUID
     filename = f"{uuid.uuid4()}.jpg"
     local_path = f"{filename}"
@@ -156,7 +166,7 @@ def base64_to_pil(encoded_image):
     pil_image = Image.open(BytesIO(decoded_image))
     return pil_image
 def im_markdown_to_pil(im_markdown_str):
     pattern = r'<img src="data:image/png;base64,([^"]+)" />'
     match = re.search(pattern, im_markdown_str)
@@ -218,33 +228,20 @@ def isolate_images_urls(prompt_list):
     ]
     ```
     """
-    print(f"******* isolate_images_urls *******")
-    print(f"params: prompt_list is - {prompt_list}")
     linearized_list = []
     for prompt in prompt_list:
-        print(f"inside FOR loop: prompt in prompt_list is - {prompt}")
         # Prompt can be either a string, or a PIL image
         if isinstance(prompt, PIL.Image.Image):
-            print(f"inside first IF in FOR loop: prompt is of type PIL.Image.Image")
             linearized_list.append(prompt)
-            print(f"linearized_list after append is - {linearized_list}")
         elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
-            print(f"inside IF in FOR loop: prompt is a string and is a path for temporary file")
             linearized_list.append(prompt)
-            print(f"linearized_list after append is - {linearized_list}")
         elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
-            print(f"inside ELIF in FOR loop: prompt is a string and is NOT a path for temporary file")
             if "<fake_token_around_image>" not in prompt:
-                print(f"inside IF inside ELIF in FOR loop: '<fake_token_around_image>' is NOT in prompt")
                 linearized_list.append(prompt)
-                print(f"linearized_list after append is - {linearized_list}")
             else:
-                print(f"inside ELSE inside ELIF in FOR loop: '<fake_token_around_image>' IS IN prompt")
                 prompt_splitted = prompt.split("<fake_token_around_image>")
-                print(f"prompt_splitted is - {prompt_splitted}")
                 for ps in prompt_splitted:
-                    print(f"Inside FOR loop inside FOR loop: ps in prompt_split is {ps}")
                     if ps == "":
                         continue
                     if ps.startswith("<image:"):
@@ -256,7 +253,6 @@ def isolate_images_urls(prompt_list):
                 f"Unrecognized type for `prompt`. Got {type(type(prompt))}. Was expecting something in [`str`,"
                 " `PIL.Image.Image`]"
             )
-            print(f"linearized_list to be returned is - {linearized_list}")
     return linearized_list
@@ -285,31 +281,20 @@ def user_prompt_list_to_markdown(user_prompt_list: List[Union[str, PIL.Image.Ima
     Convert a user prompt in the list format (i.e. elements are either a PIL image or a string) into
     the markdown format that is used for the chatbot history and rendering.
     """
-    print("********** user_prompt_list_to_markdown *********")
-    print(f" param : user_prompt_list is - {user_prompt_list}")
     resulting_string = ""
     for elem in user_prompt_list:
-        print(f"inside user_prompt_list_to_markdown, for loop on user_prompt_list")
-        print(f"elem is - {elem} ")
         if isinstance(elem, str):
             if "/tmp/gradio/" not in elem:
                 resulting_string += elem
-                print(f"inside IF - when elem is string and is not temp image filepath. resulting_string is - {resulting_string}")
             elif "/tmp/gradio/" in elem:
                 resulting_string += f"![](/file={convert_to_rgb(elem)})"
-                print(f"inside IF - when elem is string and is a temp image filepath. resulting_string is - {resulting_string}")
-        #elif isinstance(elem, str) and "/tmp/gradio/" in elem:
-        #    resulting_string += f"![](/file={convert_to_rgb(elem)})"  #f"![](/file={image})"
-        #    print(f"inside first ELIF - when elem is string and is the temp image filepath. resulting_string is - {resulting_string}")
         elif isinstance(elem, PIL.Image.Image): #or "/tmp/gradio/" in elem:  #and "/tmp/gradio/" in elem:
             resulting_string += f"![](/file={convert_to_rgb(elem)})" #pil_to_markdown_im(convert_to_rgb(elem)) <---------------
-            print(f"inside the ELIF - when elem is an instance of  PIL.Image.Image. The resulting_string after convert_to_rgb() operation is - {resulting_string}")
         else:
             raise ValueError(
                 "Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
                 f" `{type(elem)}`"
             )
-    print(f" final resulting_string that will be returned is - {resulting_string}")
     return resulting_string
@@ -348,8 +333,6 @@ def load_processor_tokenizer_model(model_name):
         max_memory=max_memory_map,
     )
     model.eval()
-    print("Current device map:", model.hf_device_map)
-    print("Model default generation config:", model.generation_config)
     # TODO: the device_map looks very inefficien right now. that could be improved
     return processor, tokenizer, model
@@ -361,60 +344,41 @@ def format_user_prompt_with_im_history_and_system_conditioning(
     Produces the resulting list that needs to go inside the processor.
     It handles the potential image box input, the history and the system conditionning.
     """
-    print(f"*********format_user_prompt_with_im_history_and_system_conditioning*********")
-    print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_user_prompt_str is - {current_user_prompt_str} ")
-    print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_image is - {current_image} ")
-    print(f"format_user_prompt_with_im_history_and_system_conditioning -- param history is - {history} ")
     resulting_list = copy.deepcopy(SYSTEM_PROMPT)
     # Format history
     for turn in history:
-        print(f"inside for loop, turn is - {turn}")
         user_utterance, assistant_utterance = turn
-        print("calling split_str_on_im_markdown from inside for loop inside format_user_prompt_with_im_history_and_system_conditioning")
         splitted_user_utterance = split_str_on_im_markdown(user_utterance)
-        print(f"splitted_user_utterance from split_str_on_im_markdown is - {splitted_user_utterance} ")
-        splitted_user_utterance = [
-            im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
-            for s in splitted_user_utterance
-            if s != ""
-        ]
-        print(f"splitted_user_utterance after im_markdown_to_pil() is - {splitted_user_utterance} ")
         if isinstance(splitted_user_utterance[0], str):
             resulting_list.append("\nUser: ")
         else:
             resulting_list.append("\nUser:")
-        print(f"resulting_list after if..else block is - {resulting_list}")
         resulting_list.extend(splitted_user_utterance)
-        print(f"resulting_list after extend is - {resulting_list}")
         resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
-        print(f"resulting_list after append is - {resulting_list}")
     # Format current input
     current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
-    print(f"current_user_prompt_str is - {current_user_prompt_str}")
     if current_image is None:
-        print("inside IF : current_image is NONE")
         if "<img src=data:image/png;base64" in current_user_prompt_str:
             raise ValueError("The UI does not support inputing via the text box an image in base64.")
         current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
-        print(f"current_user_prompt_list (or [user_prompt]/resulting_user_prompt((most likely this one)) from handle_manual_images_in_user_prompt ) is - {current_user_prompt_list}")
         resulting_list.append("\nUser: ")
-        print(f"resulting_list with append user - {resulting_list}")
         resulting_list.extend(current_user_prompt_list)
-        print(f"resulting_list after extend with current_user_prompt_list is - {resulting_list}")
         resulting_list.append("<end_of_utterance>\nAssistant:")
-        print(f"resulting_list after append with end_of_utteranceAssistant is - {resulting_list}")
         return resulting_list, current_user_prompt_list
     else:
-        print("inside ELSE : current_image is not NONE")
-        # Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
         resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
-        print(f"final resulting_list passed on to calling function is - {resulting_list}")
         return resulting_list, [current_user_prompt_str]
@@ -836,11 +800,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
         penalty_alpha,
     ):
         # global processor, model, tokenizer
-        print("***********Model_inference*************")
-        print(f"Inside Model_inference, user_prompt_str is - {user_prompt_str} ")
-        print(f"Inside Model_inference, chat_history is - {chat_history} ")
-        print(f"Inside Model_inference, image type is - {type(image)} ")
-        print(f"Inside Model_inference, image is - {image} ")
         force_words = ""
         hide_special_tokens = False
@@ -851,9 +810,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
             history=chat_history,
         )
-        print(f"formated_prompt_list (or resulting_list)  is {formated_prompt_list}")
-        print(f"user_prompt_list (or [current_user_prompt_str]) is {user_prompt_list}")
         generated_text = model_generation(
             prompt_list=formated_prompt_list,
             processor=processor,
@@ -881,7 +837,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
             chat_history.append(
                 (user_prompt_list_to_markdown(user_prompt_list), generated_text.strip("<end_of_utterance>"))
             )
-            print(f"chat_history (IF image is None or is with fake token) is -{chat_history}")
         else:
             # Case where the image is passed through the Image Box.
             # Convert the image into base64 for both passing it through the chat history and
@@ -892,7 +847,6 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
                     generated_text.strip("<end_of_utterance>"),
                 )
             )
-            print(f"chat_history (ELSE IF image is available) is -{chat_history}")
         return "", None, chat_history
@@ -1045,52 +999,52 @@ And so, the story of Mulan and Shrek's romance came to an end, leaving a lasting
         examples=[
             ["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
             [
-                "Can you tell me a very short story based on this image?",
-                f"{examples_path}/example_images/chicken_on_money.png",
-            ],
-            ["Can you describe the image?", f"{examples_path}/example_images/bear_costume.png"],
-            ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
-            [
-                "What is this object and do you think it is horrifying?",
-                f"{examples_path}/example_images/can_horror.png",
-            ],
-            ["What is this sketch for? How would you make an argument to prove this sketch was made by Picasso himself?", f"{examples_path}/example_images/cat_sketch.png"],
-            ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
-            [
-                "Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?",
-                f"{examples_path}/example_images/obama-harry-potter.jpg",
-            ],
-            [
-                "Is there a celebrity look-alike in this image? What is happening to the person?",
-                f"{examples_path}/example_images/ryan-reynolds-borg.jpg",
-            ],
-            ["Can you describe this image in details please?", f"{examples_path}/example_images/dragons_playing.png"],
-            ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
-            [
-                "Can you write an advertisement for Coca-Cola based on this image?",
-                f"{examples_path}/example_images/polar_bear_coke.png",
-            ],
-            [
-                "What is the rabbit doing in this image? Do you think this image is real?",
-                f"{examples_path}/example_images/rabbit_force.png",
-            ],
-            ["What is happening in this image and why is it unusual?", f"{examples_path}/example_images/ramen.png"],
-            [
-                "What I should look most forward to when I visit this place?",
-                f"{examples_path}/example_images/tree_fortress.jpg",
-            ],
-            ["Who is the person in the image and what is he doing?", f"{examples_path}/example_images/tom-cruise-astronaut-pegasus.jpg"],
-            [
-                "What is happening in this image? Which famous personality does this person in center looks like?",
-                f"{examples_path}/example_images/gandhi_selfie.jpg",
-            ],
-            [
-                (
-                    "<fake_token_around_image><image:https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/stable-diffusion-xl-coreml/a_high_quality_photo_of_a_surfing_dog.7667.final_float16_original.jpg><fake_token_around_image>What"
-                    " do you think the dog is doing and is it unusual?"
-                ),
-                None,
-            ],
         ],
         inputs=[textbox, imagebox],
         outputs=[textbox, imagebox, chatbot],

 def convert_to_rgb_pil(image):
+    """
+    Convert a PIL Image object to RGB mode and save it locally.
+    The function ensures that images with transparency (alpha channel)
+    are overlaid on a white background before saving.
+    Parameters:
+    - image (PIL.Image.Image): The input image to be processed.
+    Returns:
+    - str: The path to the saved RGB image.
+    """
     # Save the converted image to a temporary file
     filename = f"{uuid.uuid4()}.jpg"
     local_path = f"{filename}"
     else:
         image.save(local_path)
     return local_path  # Return the path to the saved image
 def convert_to_rgb(filepath_or_pilimg):
+    """
+    Convert an image to RGB mode, handling transparency for non-RGB images.
+    This function can accept either a file path to an image or a PIL Image object.
+    For transparent images, the function overlays the image onto a white background
+    to handle the transparency before converting it to RGB mode.
+    Parameters:
+    - filepath_or_pilimg (str or PIL.Image.Image): The file path to an image or a PIL
+                                                  Image object to be processed.
+    Returns:
+    - str: If the input was a file path, the return will be the path to the original
+           image (if it's already in RGB) or the path to the saved RGB image.
+           If the input was a PIL Image object, the return will be the path to the saved
+           RGB image.
+    """
     # `image.convert("RGB")` would only work for .jpg images, as it creates a wrong background
     # for transparent images. The call to `alpha_composite` handles this case
     if isinstance(filepath_or_pilimg, PIL.Image.Image):
         return convert_to_rgb_pil(filepath_or_pilimg)
         # Save the converted image to a temporary file
         filename = f"{uuid.uuid4()}.jpg"
         local_path = f"{filename}"
         alpha_composite.save(local_path)
         return local_path  # Return the path to the saved image
+# XXXX REMOVE
 def pil_to_markdown_im(image):
     """
     Convert a PIL image into markdown filled with the base64 string representation.
     """
     # Generate a unique filename using UUID
     filename = f"{uuid.uuid4()}.jpg"
     local_path = f"{filename}"
     pil_image = Image.open(BytesIO(decoded_image))
     return pil_image
+# XXXXX REMOVE
 def im_markdown_to_pil(im_markdown_str):
     pattern = r'<img src="data:image/png;base64,([^"]+)" />'
     match = re.search(pattern, im_markdown_str)
     ]
     ```
     """
     linearized_list = []
     for prompt in prompt_list:
         # Prompt can be either a string, or a PIL image
         if isinstance(prompt, PIL.Image.Image):
             linearized_list.append(prompt)
         elif isinstance(prompt, str) and "/tmp/gradio/" in prompt: #isinstance(prompt, PIL.Image.Image):
             linearized_list.append(prompt)
         elif isinstance(prompt, str) and "/tmp/gradio/" not in prompt:
             if "<fake_token_around_image>" not in prompt:
                 linearized_list.append(prompt)
             else:
                 prompt_splitted = prompt.split("<fake_token_around_image>")
                 for ps in prompt_splitted:
                     if ps == "":
                         continue
                     if ps.startswith("<image:"):
                 f"Unrecognized type for `prompt`. Got {type(type(prompt))}. Was expecting something in [`str`,"
                 " `PIL.Image.Image`]"
             )
     return linearized_list
     Convert a user prompt in the list format (i.e. elements are either a PIL image or a string) into
     the markdown format that is used for the chatbot history and rendering.
     """
     resulting_string = ""
     for elem in user_prompt_list:
         if isinstance(elem, str):
             if "/tmp/gradio/" not in elem:
                 resulting_string += elem
             elif "/tmp/gradio/" in elem:
                 resulting_string += f"![](/file={convert_to_rgb(elem)})"
         elif isinstance(elem, PIL.Image.Image): #or "/tmp/gradio/" in elem:  #and "/tmp/gradio/" in elem:
             resulting_string += f"![](/file={convert_to_rgb(elem)})" #pil_to_markdown_im(convert_to_rgb(elem)) <---------------
         else:
             raise ValueError(
                 "Unknown type for `user_prompt_list`. Expected an element of type `str` or `PIL.Image.Image` and got"
                 f" `{type(elem)}`"
             )
     return resulting_string
         max_memory=max_memory_map,
     )
     model.eval()
     # TODO: the device_map looks very inefficien right now. that could be improved
     return processor, tokenizer, model
     Produces the resulting list that needs to go inside the processor.
     It handles the potential image box input, the history and the system conditionning.
     """
     resulting_list = copy.deepcopy(SYSTEM_PROMPT)
     # Format history
     for turn in history:
         user_utterance, assistant_utterance = turn
         splitted_user_utterance = split_str_on_im_markdown(user_utterance)
+        #splitted_user_utterance = [
+        #    im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
+        #    for s in splitted_user_utterance
+        #    if s != ""
+        #]
         if isinstance(splitted_user_utterance[0], str):
             resulting_list.append("\nUser: ")
         else:
             resulting_list.append("\nUser:")
         resulting_list.extend(splitted_user_utterance)
         resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
     # Format current input
     current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
     if current_image is None:
         if "<img src=data:image/png;base64" in current_user_prompt_str:
             raise ValueError("The UI does not support inputing via the text box an image in base64.")
         current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
         resulting_list.append("\nUser: ")
         resulting_list.extend(current_user_prompt_list)
         resulting_list.append("<end_of_utterance>\nAssistant:")
         return resulting_list, current_user_prompt_list
     else:
+        # Choosing to put the image first when the image is inputted through the UI, but this is an arbitrary choice.
         resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
         return resulting_list, [current_user_prompt_str]
         penalty_alpha,
     ):
         # global processor, model, tokenizer
         force_words = ""
         hide_special_tokens = False
             history=chat_history,
         )
         generated_text = model_generation(
             prompt_list=formated_prompt_list,
             processor=processor,
             chat_history.append(
                 (user_prompt_list_to_markdown(user_prompt_list), generated_text.strip("<end_of_utterance>"))
             )
         else:
             # Case where the image is passed through the Image Box.
             # Convert the image into base64 for both passing it through the chat history and
                     generated_text.strip("<end_of_utterance>"),
                 )
             )
         return "", None, chat_history
         examples=[
             ["What are the armed baguettes guarding?", f"{examples_path}/example_images/baguettes_guarding_paris.png"],
             [
+                 "Can you tell me a very short story based on this image?",
+                 f"{examples_path}/example_images/chicken_on_money.png",
+             ],
+            # ["Can you describe the image?", f"{examples_path}/example_images/bear_costume.png"],
+            # ["What is this animal and why is it unusual?", f"{examples_path}/example_images/blue_dog.png"],
+            # [
+            #     "What is this object and do you think it is horrifying?",
+            #     f"{examples_path}/example_images/can_horror.png",
+            # ],
+            # ["What is this sketch for? How would you make an argument to prove this sketch was made by Picasso himself?", f"{examples_path}/example_images/cat_sketch.png"],
+            # ["Which celebrity does this claymation figure look like?", f"{examples_path}/example_images/kanye.jpg"],
+            # [
+            #     "Which famous person does the person in the image look like? Could you craft an engaging narrative featuring this character from the image as the main protagonist?",
+            #     f"{examples_path}/example_images/obama-harry-potter.jpg",
+            # ],
+            # [
+            #     "Is there a celebrity look-alike in this image? What is happening to the person?",
+            #     f"{examples_path}/example_images/ryan-reynolds-borg.jpg",
+            # ],
+            # ["Can you describe this image in details please?", f"{examples_path}/example_images/dragons_playing.png"],
+            # ["What can you tell me about the cap in this image?", f"{examples_path}/example_images/ironman_cap.png"],
+            # [
+            #     "Can you write an advertisement for Coca-Cola based on this image?",
+            #     f"{examples_path}/example_images/polar_bear_coke.png",
+            # ],
+            # [
+            #     "What is the rabbit doing in this image? Do you think this image is real?",
+            #     f"{examples_path}/example_images/rabbit_force.png",
+            # ],
+            # ["What is happening in this image and why is it unusual?", f"{examples_path}/example_images/ramen.png"],
+            # [
+            #     "What I should look most forward to when I visit this place?",
+            #     f"{examples_path}/example_images/tree_fortress.jpg",
+            # ],
+            # ["Who is the person in the image and what is he doing?", f"{examples_path}/example_images/tom-cruise-astronaut-pegasus.jpg"],
+            # [
+            #     "What is happening in this image? Which famous personality does this person in center looks like?",
+            #     f"{examples_path}/example_images/gandhi_selfie.jpg",
+            # ],
+            # [
+            #     (
+            #         "<fake_token_around_image><image:https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/blog/stable-diffusion-xl-coreml/a_high_quality_photo_of_a_surfing_dog.7667.final_float16_original.jpg><fake_token_around_image>What"
+            #         " do you think the dog is doing and is it unusual?"
+            #     ),
+            #     None,
+            # ],
         ],
         inputs=[textbox, imagebox],
         outputs=[textbox, imagebox, chatbot],