Spaces:

HuggingFaceM4
/

idefics_playground

Runtime error

App Files Files Community

VictorSanh commited on Jul 26, 2023

Commit

58777cc

•

1 Parent(s): 7036801

Update visualization

Browse files

Files changed (2) hide show

app_dialogue.py +104 -36
elon_musk.md +1 -1

app_dialogue.py CHANGED Viewed

@@ -1,10 +1,12 @@
 import os
 import gradio as gr
 import requests
 models = [
     "HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
 ]
 SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
@@ -28,21 +30,23 @@ Assistant: There is no dogs in this image. The picture shows a tennis player jum
 BAN_TOKENS = "<image>;<fake_token_around_image>"
 EOS_TOKENS = "</s>;User"
 from accelerate.utils import get_max_memory
 from transformers import AutoTokenizer
 from m4.models.vllama.configuration_vllama import VLlamaConfig
 from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
-import logging
-from PIL import Image
-from io import BytesIO
 from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
 from m4.training.utils import build_image_transform
-import torch
-import re
 TOKENIZER_FAST = True
-MAX_SEQ_LEN = 1024
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()
@@ -52,9 +56,10 @@ def load_tokenizer_model(model_name):
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
         use_fast=TOKENIZER_FAST,
-        use_auth_token=os.getenv("HF_AUTH_TOKEN", True),  # `use_fast=False` for 1B3 OPT, True for all the other models
     )
-    tokenizer.padding_side = "left"
     config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
     max_memory_map = get_max_memory()
@@ -82,7 +87,12 @@ def load_tokenizer_model(model_name):
 def fetch_images(url_images):
-    headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
     images = []
     for url in url_images:
         if isinstance(url, str):
@@ -117,7 +127,7 @@ def model_generation(
     tokens = tokenizer(
         [prompt],
         truncation=True,
-        max_length=MAX_SEQ_LEN,
         padding=True,
         add_special_tokens=False,
     )
@@ -240,9 +250,11 @@ def model_generation(
         f"----Tokens ids - prompt + generation: `{generated_tokens[0].tolist()}`\n"
         f"----Tokens converted - prompt + generation: `{tokens}`\n"
         f"----String decoded with skipped special tokens - prompt + generation: `{decoded_skip_special_tokens}`\n"
-        f"----Token ids - generation: `{actual_generated_tokens[0].tolist()}`"
-        f"----Tokens converted - generation: `{tokenizer.convert_ids_to_tokens(actual_generated_tokens[0])}`"
         f"----String decoded with skipped special tokens - generation: `{generated_text}`\n"
         f"----Generation mode: `{decoding_strategy}`\n"
         f"----Generation parameters: `{generation_args}`\n"
     )
@@ -252,13 +264,18 @@ def model_generation(
 textbox = gr.Textbox(
     show_label=False,
-    value="<fake_token_around_image><image:https://m.media-amazon.com/images/M/MV5BMjE4MTcwMTM1Nl5BMl5BanBnXkFtZTcwMTIwMzMzMw@@._V1_.jpg><fake_token_around_image>Describe all of the parts of this image.",
-    placeholder="To input images, use the following syntax: `<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>textexttext`",
     visible=True,
-    container=False
 )
 with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
-# with gr.Blocks(title="IDEFICS") as demo:
     # state = gr.State()
     with gr.Row():
@@ -269,15 +286,46 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
                     value=models[0] if len(models) > 0 else "",
                     interactive=True,
                     show_label=False,
-                    container=False)
             tokenizer, model = load_tokenizer_model(model_selector.value)
-            imagebox = gr.Image(type="pil", label="Image input - This image box is not supported yet! To include images, do through the text by adding `<fake_token_around_image><image:IMAGE_URL><fake_token_around_image>`. The backend takes care of parsing that <image:URL> and download the correponding image. That way, you can technically interleave as many images and texts as you want. No need to add space before and after `<fake_token_around_image>`")
             with gr.Accordion("Parameters", open=False, visible=True) as parameter_row:
-                temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
-                top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
-                max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
         with gr.Column(scale=6):
             chatbot = gr.Chatbot(
@@ -287,15 +335,36 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
                 height=550,
                 value=[
                     [
-                        "Where can we typically find animals like this one?<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Alpaca_%2831562329701%29.jpg/640px-Alpaca_%2831562329701%29.jpg><fake_token_around_image>",
-                        "Animals like the one in the image, which is a llama, can typically be found in rural areas, such as farms or ranches. Llamas are often used as pack animals in mountainous regions, as they are well-adapted to the challenging terrain and can carry heavy loads. They are also valued for their wool, which is used to make clothing and other textiles. In addition, llamas are sometimes kept as pets or for their therapeutic benefits, as they are known to be calm and gentle animals."
                     ],
                     [
-                        "How many of these animals can we fit into an engine like that<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Nassau_County_Police_Bell_407.jpg/1200px-Nassau_County_Police_Bell_407.jpg><fake_token_around_image>?",
-                        "The image shows a helicopter with a large engine, but it is not possible to determine the exact number of animals that can fit into it based on the image alone. The size and capacity of the helicopter's engine would depend on various factors, such as the size of the animals, the weight of the animals, and the size of the helicopter itself. However, it is safe to assume that the helicopter is designed to carry a limited number of animals, and it is not intended to be used as a means of transporting large groups of animals."
-                    ]
-                ]
-                )
             with gr.Row():
                 with gr.Column(scale=8):
                     textbox.render()
@@ -321,8 +390,8 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
         resulting_text = SYSTEM_PROMPT
         for turn in history:
             user_utterance, assistant_utterance = turn
-            resulting_text += f"\nUser:{user_utterance}</s>\nAssistant:{assistant_utterance}"
-        resulting_text += f"\nUser:{current_user_prompt}</s>\nAssistant:"
         return resulting_text
     def model_inference(
@@ -333,15 +402,15 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
         temperature = 1.0
         no_repeat_ngram_size = 0
-        max_new_tokens = 256
         min_length = 16
         force_words = ""
         repetition_penalty = 1.0
         hide_special_tokens = False
         decoding_strategy = "greedy"
         num_beams = 3
-        length_penalty = 1.
-        top_k = 50,
         top_p = 0.95
         penalty_alpha = 0.95
@@ -378,10 +447,9 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
             penalty_alpha=penalty_alpha,
         )
-        chat_history.append((user_prompt, generated_text.strip()))
         return "", chat_history
     textbox.submit(
         fn=model_inference,
         inputs=[textbox, chatbot],

 import os
 import gradio as gr
 import requests
 models = [
     "HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
+    # "HuggingFaceM4/tr_210_ift_mixture_opt_step-2500",
 ]
 SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
 BAN_TOKENS = "<image>;<fake_token_around_image>"
 EOS_TOKENS = "</s>;User"
+import logging
+import re
+from io import BytesIO
+import torch
 from accelerate.utils import get_max_memory
+from PIL import Image
 from transformers import AutoTokenizer
 from m4.models.vllama.configuration_vllama import VLlamaConfig
 from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
 from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
 from m4.training.utils import build_image_transform
 TOKENIZER_FAST = True
+MAX_SEQ_LEN = 2048
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
         use_fast=TOKENIZER_FAST,
+        use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
+        truncation_side="left",
     )
+    # tokenizer.padding_side = "left" -> we don't need that, do we?
     config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
     max_memory_map = get_max_memory()
 def fetch_images(url_images):
+    headers = {
+        "User-Agent": (
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0"
+            " Safari/537.36"
+        )
+    }
     images = []
     for url in url_images:
         if isinstance(url, str):
     tokens = tokenizer(
         [prompt],
         truncation=True,
+        max_length=MAX_SEQ_LEN - 512,  # TODO: replace the 512 value with `max_new_tokens`
         padding=True,
         add_special_tokens=False,
     )
         f"----Tokens ids - prompt + generation: `{generated_tokens[0].tolist()}`\n"
         f"----Tokens converted - prompt + generation: `{tokens}`\n"
         f"----String decoded with skipped special tokens - prompt + generation: `{decoded_skip_special_tokens}`\n"
+        f"----Total length - prompt + generation `{len(generated_tokens[0].tolist())}`\n"
+        f"----Token ids - generation: `{actual_generated_tokens[0].tolist()}`\n"
+        f"----Tokens converted - generation: `{tokenizer.convert_ids_to_tokens(actual_generated_tokens[0])}`\n"
         f"----String decoded with skipped special tokens - generation: `{generated_text}`\n"
+        f"----Total length - generation: `{len(actual_generated_tokens[0].tolist())}`\n"
         f"----Generation mode: `{decoding_strategy}`\n"
         f"----Generation parameters: `{generation_args}`\n"
     )
 textbox = gr.Textbox(
     show_label=False,
+    value=(
+        "<fake_token_around_image><image:https://m.media-amazon.com/images/M/MV5BMjE4MTcwMTM1Nl5BMl5BanBnXkFtZTcwMTIwMzMzMw@@._V1_.jpg><fake_token_around_image>Describe"
+        " all of the parts of this image."
+    ),
+    placeholder=(
+        "To input images, use the following syntax:"
+        " `<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>textexttext`"
+    ),
     visible=True,
+    container=False,
 )
 with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
     # state = gr.State()
     with gr.Row():
                     value=models[0] if len(models) > 0 else "",
                     interactive=True,
                     show_label=False,
+                    container=False,
+                )
             tokenizer, model = load_tokenizer_model(model_selector.value)
+            imagebox = gr.Image(
+                type="pil",
+                label=(
+                    "Image input - This image box is not supported yet! To include images, do through the text by"
+                    " adding `<fake_token_around_image><image:IMAGE_URL><fake_token_around_image>`. The backend takes"
+                    " care of parsing that <image:URL> and download the correponding image. That way, you can"
+                    " technically interleave as many images and texts as you want. No need to add space before and"
+                    " after `<fake_token_around_image>`"
+                ),
+            )
             with gr.Accordion("Parameters", open=False, visible=True) as parameter_row:
+                temperature = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.2,
+                    step=0.1,
+                    interactive=True,
+                    label="Temperature",
+                )
+                top_p = gr.Slider(
+                    minimum=0.0,
+                    maximum=1.0,
+                    value=0.7,
+                    step=0.1,
+                    interactive=True,
+                    label="Top P",
+                )
+                max_output_tokens = gr.Slider(
+                    minimum=0,
+                    maximum=1024,
+                    value=512,
+                    step=64,
+                    interactive=True,
+                    label="Max output tokens",
+                )
         with gr.Column(scale=6):
             chatbot = gr.Chatbot(
                 height=550,
                 value=[
                     [
+                        (
+                            "Where can we typically find animals like this"
+                            " one?<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Alpaca_%2831562329701%29.jpg/640px-Alpaca_%2831562329701%29.jpg><fake_token_around_image>"
+                        ),
+                        (
+                            "Animals like the one in the image, which is a llama, can typically be found in rural"
+                            " areas, such as farms or ranches. Llamas are often used as pack animals in mountainous"
+                            " regions, as they are well-adapted to the challenging terrain and can carry heavy loads."
+                            " They are also valued for their wool, which is used to make clothing and other textiles."
+                            " In addition, llamas are sometimes kept as pets or for their therapeutic benefits, as"
+                            " they are known to be calm and gentle animals."
+                        ),
                     ],
                     [
+                        (
+                            "How many of these animals can we fit into an engine like"
+                            " that<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Nassau_County_Police_Bell_407.jpg/1200px-Nassau_County_Police_Bell_407.jpg><fake_token_around_image>?"
+                        ),
+                        (
+                            "The image shows a helicopter with a large engine, but it is not possible to determine the"
+                            " exact number of animals that can fit into it based on the image alone. The size and"
+                            " capacity of the helicopter's engine would depend on various factors, such as the size of"
+                            " the animals, the weight of the animals, and the size of the helicopter itself. However,"
+                            " it is safe to assume that the helicopter is designed to carry a limited number of"
+                            " animals, and it is not intended to be used as a means of transporting large groups of"
+                            " animals."
+                        ),
+                    ],
+                ],
+            )
             with gr.Row():
                 with gr.Column(scale=8):
                     textbox.render()
         resulting_text = SYSTEM_PROMPT
         for turn in history:
             user_utterance, assistant_utterance = turn
+            resulting_text += f"\nUser: {user_utterance}</s>\nAssistant: {assistant_utterance}</s>"
+        resulting_text += f"\nUser: {current_user_prompt}</s>\nAssistant:"
         return resulting_text
     def model_inference(
         temperature = 1.0
         no_repeat_ngram_size = 0
+        max_new_tokens = 512
         min_length = 16
         force_words = ""
         repetition_penalty = 1.0
         hide_special_tokens = False
         decoding_strategy = "greedy"
         num_beams = 3
+        length_penalty = 1.0
+        top_k = 50
         top_p = 0.95
         penalty_alpha = 0.95
             penalty_alpha=penalty_alpha,
         )
+        chat_history.append((user_prompt, generated_text.strip("</s>")))
         return "", chat_history
     textbox.submit(
         fn=model_inference,
         inputs=[textbox, chatbot],

elon_musk.md CHANGED Viewed

@@ -20,4 +20,4 @@ Bot: Yes, it is possible. If we continue to pollute the environment and destroy
 User: I think this is a lost cause. <fake_token_around_image> <image> <fake_token_around_image>  Like Elon Musk, I think we should instead all move to another planet now.
 Bot: El ón Mus ́k is the CEO of SpaceX and Tesla Motors. Space X is developing a reusable rocket system that could be used to transport people and cargo to Mars. Mus k believes that colonizing Mars is essential for the survival of humanity, because it would provide a backup plan in case something goes wrong on earth. However, many people are skeptical about the feasibility of such a project. Some argue that it will take too long and cost too much money to establish a colony on Mars, while others say that the risks are too great. Nevertheless, Mus K remains optimistic about his vision for Mars colonization. "I think there is good reason to believe that we can build a self-sustaining city on mars," he said in an interview with Time magazine. “It would be quite an adventure.”
 ----
-The project is still in development. I'm currently working on improving the accuracy of image recognition and adding more features to it.</s>

 User: I think this is a lost cause. <fake_token_around_image> <image> <fake_token_around_image>  Like Elon Musk, I think we should instead all move to another planet now.
 Bot: El ón Mus ́k is the CEO of SpaceX and Tesla Motors. Space X is developing a reusable rocket system that could be used to transport people and cargo to Mars. Mus k believes that colonizing Mars is essential for the survival of humanity, because it would provide a backup plan in case something goes wrong on earth. However, many people are skeptical about the feasibility of such a project. Some argue that it will take too long and cost too much money to establish a colony on Mars, while others say that the risks are too great. Nevertheless, Mus K remains optimistic about his vision for Mars colonization. "I think there is good reason to believe that we can build a self-sustaining city on mars," he said in an interview with Time magazine. “It would be quite an adventure.”
 ----
+The project is still in development. I'm currently working on improving the accuracy of image recognition and adding more features to it.</s>