VictorSanh commited on
Commit
58777cc
1 Parent(s): 7036801

Update visualization

Browse files
Files changed (2) hide show
  1. app_dialogue.py +104 -36
  2. elon_musk.md +1 -1
app_dialogue.py CHANGED
@@ -1,10 +1,12 @@
1
  import os
 
2
  import gradio as gr
3
  import requests
4
 
5
 
6
  models = [
7
  "HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
 
8
  ]
9
 
10
  SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
@@ -28,21 +30,23 @@ Assistant: There is no dogs in this image. The picture shows a tennis player jum
28
  BAN_TOKENS = "<image>;<fake_token_around_image>"
29
  EOS_TOKENS = "</s>;User"
30
 
 
 
 
 
 
31
  from accelerate.utils import get_max_memory
 
32
  from transformers import AutoTokenizer
 
33
  from m4.models.vllama.configuration_vllama import VLlamaConfig
34
  from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
35
-
36
- import logging
37
- from PIL import Image
38
- from io import BytesIO
39
  from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
40
  from m4.training.utils import build_image_transform
41
- import torch
42
- import re
43
 
44
  TOKENIZER_FAST = True
45
- MAX_SEQ_LEN = 1024
46
 
47
  logging.basicConfig(level=logging.INFO)
48
  logger = logging.getLogger()
@@ -52,9 +56,10 @@ def load_tokenizer_model(model_name):
52
  tokenizer = AutoTokenizer.from_pretrained(
53
  model_name,
54
  use_fast=TOKENIZER_FAST,
55
- use_auth_token=os.getenv("HF_AUTH_TOKEN", True), # `use_fast=False` for 1B3 OPT, True for all the other models
 
56
  )
57
- tokenizer.padding_side = "left"
58
 
59
  config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
60
  max_memory_map = get_max_memory()
@@ -82,7 +87,12 @@ def load_tokenizer_model(model_name):
82
 
83
 
84
  def fetch_images(url_images):
85
- headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}
 
 
 
 
 
86
  images = []
87
  for url in url_images:
88
  if isinstance(url, str):
@@ -117,7 +127,7 @@ def model_generation(
117
  tokens = tokenizer(
118
  [prompt],
119
  truncation=True,
120
- max_length=MAX_SEQ_LEN,
121
  padding=True,
122
  add_special_tokens=False,
123
  )
@@ -240,9 +250,11 @@ def model_generation(
240
  f"----Tokens ids - prompt + generation: `{generated_tokens[0].tolist()}`\n"
241
  f"----Tokens converted - prompt + generation: `{tokens}`\n"
242
  f"----String decoded with skipped special tokens - prompt + generation: `{decoded_skip_special_tokens}`\n"
243
- f"----Token ids - generation: `{actual_generated_tokens[0].tolist()}`"
244
- f"----Tokens converted - generation: `{tokenizer.convert_ids_to_tokens(actual_generated_tokens[0])}`"
 
245
  f"----String decoded with skipped special tokens - generation: `{generated_text}`\n"
 
246
  f"----Generation mode: `{decoding_strategy}`\n"
247
  f"----Generation parameters: `{generation_args}`\n"
248
  )
@@ -252,13 +264,18 @@ def model_generation(
252
 
253
  textbox = gr.Textbox(
254
  show_label=False,
255
- value="<fake_token_around_image><image:https://m.media-amazon.com/images/M/MV5BMjE4MTcwMTM1Nl5BMl5BanBnXkFtZTcwMTIwMzMzMw@@._V1_.jpg><fake_token_around_image>Describe all of the parts of this image.",
256
- placeholder="To input images, use the following syntax: `<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>textexttext`",
 
 
 
 
 
 
257
  visible=True,
258
- container=False
259
  )
260
  with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
261
- # with gr.Blocks(title="IDEFICS") as demo:
262
  # state = gr.State()
263
 
264
  with gr.Row():
@@ -269,15 +286,46 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
269
  value=models[0] if len(models) > 0 else "",
270
  interactive=True,
271
  show_label=False,
272
- container=False)
 
273
  tokenizer, model = load_tokenizer_model(model_selector.value)
274
 
275
- imagebox = gr.Image(type="pil", label="Image input - This image box is not supported yet! To include images, do through the text by adding `<fake_token_around_image><image:IMAGE_URL><fake_token_around_image>`. The backend takes care of parsing that <image:URL> and download the correponding image. That way, you can technically interleave as many images and texts as you want. No need to add space before and after `<fake_token_around_image>`")
 
 
 
 
 
 
 
 
 
276
 
277
  with gr.Accordion("Parameters", open=False, visible=True) as parameter_row:
278
- temperature = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature",)
279
- top_p = gr.Slider(minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P",)
280
- max_output_tokens = gr.Slider(minimum=0, maximum=1024, value=512, step=64, interactive=True, label="Max output tokens",)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
281
 
282
  with gr.Column(scale=6):
283
  chatbot = gr.Chatbot(
@@ -287,15 +335,36 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
287
  height=550,
288
  value=[
289
  [
290
- "Where can we typically find animals like this one?<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Alpaca_%2831562329701%29.jpg/640px-Alpaca_%2831562329701%29.jpg><fake_token_around_image>",
291
- "Animals like the one in the image, which is a llama, can typically be found in rural areas, such as farms or ranches. Llamas are often used as pack animals in mountainous regions, as they are well-adapted to the challenging terrain and can carry heavy loads. They are also valued for their wool, which is used to make clothing and other textiles. In addition, llamas are sometimes kept as pets or for their therapeutic benefits, as they are known to be calm and gentle animals."
 
 
 
 
 
 
 
 
 
 
292
  ],
293
  [
294
- "How many of these animals can we fit into an engine like that<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Nassau_County_Police_Bell_407.jpg/1200px-Nassau_County_Police_Bell_407.jpg><fake_token_around_image>?",
295
- "The image shows a helicopter with a large engine, but it is not possible to determine the exact number of animals that can fit into it based on the image alone. The size and capacity of the helicopter's engine would depend on various factors, such as the size of the animals, the weight of the animals, and the size of the helicopter itself. However, it is safe to assume that the helicopter is designed to carry a limited number of animals, and it is not intended to be used as a means of transporting large groups of animals."
296
- ]
297
- ]
298
- )
 
 
 
 
 
 
 
 
 
 
 
299
  with gr.Row():
300
  with gr.Column(scale=8):
301
  textbox.render()
@@ -321,8 +390,8 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
321
  resulting_text = SYSTEM_PROMPT
322
  for turn in history:
323
  user_utterance, assistant_utterance = turn
324
- resulting_text += f"\nUser:{user_utterance}</s>\nAssistant:{assistant_utterance}"
325
- resulting_text += f"\nUser:{current_user_prompt}</s>\nAssistant:"
326
  return resulting_text
327
 
328
  def model_inference(
@@ -333,15 +402,15 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
333
 
334
  temperature = 1.0
335
  no_repeat_ngram_size = 0
336
- max_new_tokens = 256
337
  min_length = 16
338
  force_words = ""
339
  repetition_penalty = 1.0
340
  hide_special_tokens = False
341
  decoding_strategy = "greedy"
342
  num_beams = 3
343
- length_penalty = 1.
344
- top_k = 50,
345
  top_p = 0.95
346
  penalty_alpha = 0.95
347
 
@@ -378,10 +447,9 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
378
  penalty_alpha=penalty_alpha,
379
  )
380
 
381
- chat_history.append((user_prompt, generated_text.strip()))
382
  return "", chat_history
383
 
384
-
385
  textbox.submit(
386
  fn=model_inference,
387
  inputs=[textbox, chatbot],
 
1
  import os
2
+
3
  import gradio as gr
4
  import requests
5
 
6
 
7
  models = [
8
  "HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
9
+ # "HuggingFaceM4/tr_210_ift_mixture_opt_step-2500",
10
  ]
11
 
12
  SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
 
30
  BAN_TOKENS = "<image>;<fake_token_around_image>"
31
  EOS_TOKENS = "</s>;User"
32
 
33
+ import logging
34
+ import re
35
+ from io import BytesIO
36
+
37
+ import torch
38
  from accelerate.utils import get_max_memory
39
+ from PIL import Image
40
  from transformers import AutoTokenizer
41
+
42
  from m4.models.vllama.configuration_vllama import VLlamaConfig
43
  from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
 
 
 
 
44
  from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
45
  from m4.training.utils import build_image_transform
46
+
 
47
 
48
  TOKENIZER_FAST = True
49
+ MAX_SEQ_LEN = 2048
50
 
51
  logging.basicConfig(level=logging.INFO)
52
  logger = logging.getLogger()
 
56
  tokenizer = AutoTokenizer.from_pretrained(
57
  model_name,
58
  use_fast=TOKENIZER_FAST,
59
+ use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
60
+ truncation_side="left",
61
  )
62
+ # tokenizer.padding_side = "left" -> we don't need that, do we?
63
 
64
  config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
65
  max_memory_map = get_max_memory()
 
87
 
88
 
89
  def fetch_images(url_images):
90
+ headers = {
91
+ "User-Agent": (
92
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0"
93
+ " Safari/537.36"
94
+ )
95
+ }
96
  images = []
97
  for url in url_images:
98
  if isinstance(url, str):
 
127
  tokens = tokenizer(
128
  [prompt],
129
  truncation=True,
130
+ max_length=MAX_SEQ_LEN - 512, # TODO: replace the 512 value with `max_new_tokens`
131
  padding=True,
132
  add_special_tokens=False,
133
  )
 
250
  f"----Tokens ids - prompt + generation: `{generated_tokens[0].tolist()}`\n"
251
  f"----Tokens converted - prompt + generation: `{tokens}`\n"
252
  f"----String decoded with skipped special tokens - prompt + generation: `{decoded_skip_special_tokens}`\n"
253
+ f"----Total length - prompt + generation `{len(generated_tokens[0].tolist())}`\n"
254
+ f"----Token ids - generation: `{actual_generated_tokens[0].tolist()}`\n"
255
+ f"----Tokens converted - generation: `{tokenizer.convert_ids_to_tokens(actual_generated_tokens[0])}`\n"
256
  f"----String decoded with skipped special tokens - generation: `{generated_text}`\n"
257
+ f"----Total length - generation: `{len(actual_generated_tokens[0].tolist())}`\n"
258
  f"----Generation mode: `{decoding_strategy}`\n"
259
  f"----Generation parameters: `{generation_args}`\n"
260
  )
 
264
 
265
  textbox = gr.Textbox(
266
  show_label=False,
267
+ value=(
268
+ "<fake_token_around_image><image:https://m.media-amazon.com/images/M/MV5BMjE4MTcwMTM1Nl5BMl5BanBnXkFtZTcwMTIwMzMzMw@@._V1_.jpg><fake_token_around_image>Describe"
269
+ " all of the parts of this image."
270
+ ),
271
+ placeholder=(
272
+ "To input images, use the following syntax:"
273
+ " `<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>textexttext`"
274
+ ),
275
  visible=True,
276
+ container=False,
277
  )
278
  with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
 
279
  # state = gr.State()
280
 
281
  with gr.Row():
 
286
  value=models[0] if len(models) > 0 else "",
287
  interactive=True,
288
  show_label=False,
289
+ container=False,
290
+ )
291
  tokenizer, model = load_tokenizer_model(model_selector.value)
292
 
293
+ imagebox = gr.Image(
294
+ type="pil",
295
+ label=(
296
+ "Image input - This image box is not supported yet! To include images, do through the text by"
297
+ " adding `<fake_token_around_image><image:IMAGE_URL><fake_token_around_image>`. The backend takes"
298
+ " care of parsing that <image:URL> and download the correponding image. That way, you can"
299
+ " technically interleave as many images and texts as you want. No need to add space before and"
300
+ " after `<fake_token_around_image>`"
301
+ ),
302
+ )
303
 
304
  with gr.Accordion("Parameters", open=False, visible=True) as parameter_row:
305
+ temperature = gr.Slider(
306
+ minimum=0.0,
307
+ maximum=1.0,
308
+ value=0.2,
309
+ step=0.1,
310
+ interactive=True,
311
+ label="Temperature",
312
+ )
313
+ top_p = gr.Slider(
314
+ minimum=0.0,
315
+ maximum=1.0,
316
+ value=0.7,
317
+ step=0.1,
318
+ interactive=True,
319
+ label="Top P",
320
+ )
321
+ max_output_tokens = gr.Slider(
322
+ minimum=0,
323
+ maximum=1024,
324
+ value=512,
325
+ step=64,
326
+ interactive=True,
327
+ label="Max output tokens",
328
+ )
329
 
330
  with gr.Column(scale=6):
331
  chatbot = gr.Chatbot(
 
335
  height=550,
336
  value=[
337
  [
338
+ (
339
+ "Where can we typically find animals like this"
340
+ " one?<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Alpaca_%2831562329701%29.jpg/640px-Alpaca_%2831562329701%29.jpg><fake_token_around_image>"
341
+ ),
342
+ (
343
+ "Animals like the one in the image, which is a llama, can typically be found in rural"
344
+ " areas, such as farms or ranches. Llamas are often used as pack animals in mountainous"
345
+ " regions, as they are well-adapted to the challenging terrain and can carry heavy loads."
346
+ " They are also valued for their wool, which is used to make clothing and other textiles."
347
+ " In addition, llamas are sometimes kept as pets or for their therapeutic benefits, as"
348
+ " they are known to be calm and gentle animals."
349
+ ),
350
  ],
351
  [
352
+ (
353
+ "How many of these animals can we fit into an engine like"
354
+ " that<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Nassau_County_Police_Bell_407.jpg/1200px-Nassau_County_Police_Bell_407.jpg><fake_token_around_image>?"
355
+ ),
356
+ (
357
+ "The image shows a helicopter with a large engine, but it is not possible to determine the"
358
+ " exact number of animals that can fit into it based on the image alone. The size and"
359
+ " capacity of the helicopter's engine would depend on various factors, such as the size of"
360
+ " the animals, the weight of the animals, and the size of the helicopter itself. However,"
361
+ " it is safe to assume that the helicopter is designed to carry a limited number of"
362
+ " animals, and it is not intended to be used as a means of transporting large groups of"
363
+ " animals."
364
+ ),
365
+ ],
366
+ ],
367
+ )
368
  with gr.Row():
369
  with gr.Column(scale=8):
370
  textbox.render()
 
390
  resulting_text = SYSTEM_PROMPT
391
  for turn in history:
392
  user_utterance, assistant_utterance = turn
393
+ resulting_text += f"\nUser: {user_utterance}</s>\nAssistant: {assistant_utterance}</s>"
394
+ resulting_text += f"\nUser: {current_user_prompt}</s>\nAssistant:"
395
  return resulting_text
396
 
397
  def model_inference(
 
402
 
403
  temperature = 1.0
404
  no_repeat_ngram_size = 0
405
+ max_new_tokens = 512
406
  min_length = 16
407
  force_words = ""
408
  repetition_penalty = 1.0
409
  hide_special_tokens = False
410
  decoding_strategy = "greedy"
411
  num_beams = 3
412
+ length_penalty = 1.0
413
+ top_k = 50
414
  top_p = 0.95
415
  penalty_alpha = 0.95
416
 
 
447
  penalty_alpha=penalty_alpha,
448
  )
449
 
450
+ chat_history.append((user_prompt, generated_text.strip("</s>")))
451
  return "", chat_history
452
 
 
453
  textbox.submit(
454
  fn=model_inference,
455
  inputs=[textbox, chatbot],
elon_musk.md CHANGED
@@ -20,4 +20,4 @@ Bot: Yes, it is possible. If we continue to pollute the environment and destroy
20
  User: I think this is a lost cause. <fake_token_around_image> <image> <fake_token_around_image> Like Elon Musk, I think we should instead all move to another planet now.
21
  Bot: El ón Mus ́k is the CEO of SpaceX and Tesla Motors. Space X is developing a reusable rocket system that could be used to transport people and cargo to Mars. Mus k believes that colonizing Mars is essential for the survival of humanity, because it would provide a backup plan in case something goes wrong on earth. However, many people are skeptical about the feasibility of such a project. Some argue that it will take too long and cost too much money to establish a colony on Mars, while others say that the risks are too great. Nevertheless, Mus K remains optimistic about his vision for Mars colonization. "I think there is good reason to believe that we can build a self-sustaining city on mars," he said in an interview with Time magazine. “It would be quite an adventure.”
22
  ----
23
- The project is still in development. I'm currently working on improving the accuracy of image recognition and adding more features to it.</s>
 
20
  User: I think this is a lost cause. <fake_token_around_image> <image> <fake_token_around_image> Like Elon Musk, I think we should instead all move to another planet now.
21
  Bot: El ón Mus ́k is the CEO of SpaceX and Tesla Motors. Space X is developing a reusable rocket system that could be used to transport people and cargo to Mars. Mus k believes that colonizing Mars is essential for the survival of humanity, because it would provide a backup plan in case something goes wrong on earth. However, many people are skeptical about the feasibility of such a project. Some argue that it will take too long and cost too much money to establish a colony on Mars, while others say that the risks are too great. Nevertheless, Mus K remains optimistic about his vision for Mars colonization. "I think there is good reason to believe that we can build a self-sustaining city on mars," he said in an interview with Time magazine. “It would be quite an adventure.”
22
  ----
23
+ The project is still in development. I'm currently working on improving the accuracy of image recognition and adding more features to it.</s>