Spaces:
Runtime error
Runtime error
VictorSanh
commited on
Commit
•
58777cc
1
Parent(s):
7036801
Update visualization
Browse files- app_dialogue.py +104 -36
- elon_musk.md +1 -1
app_dialogue.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
3 |
import requests
|
4 |
|
5 |
|
6 |
models = [
|
7 |
"HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
|
|
|
8 |
]
|
9 |
|
10 |
SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
|
@@ -28,21 +30,23 @@ Assistant: There is no dogs in this image. The picture shows a tennis player jum
|
|
28 |
BAN_TOKENS = "<image>;<fake_token_around_image>"
|
29 |
EOS_TOKENS = "</s>;User"
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
from accelerate.utils import get_max_memory
|
|
|
32 |
from transformers import AutoTokenizer
|
|
|
33 |
from m4.models.vllama.configuration_vllama import VLlamaConfig
|
34 |
from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
|
35 |
-
|
36 |
-
import logging
|
37 |
-
from PIL import Image
|
38 |
-
from io import BytesIO
|
39 |
from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
|
40 |
from m4.training.utils import build_image_transform
|
41 |
-
|
42 |
-
import re
|
43 |
|
44 |
TOKENIZER_FAST = True
|
45 |
-
MAX_SEQ_LEN =
|
46 |
|
47 |
logging.basicConfig(level=logging.INFO)
|
48 |
logger = logging.getLogger()
|
@@ -52,9 +56,10 @@ def load_tokenizer_model(model_name):
|
|
52 |
tokenizer = AutoTokenizer.from_pretrained(
|
53 |
model_name,
|
54 |
use_fast=TOKENIZER_FAST,
|
55 |
-
use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
|
|
|
56 |
)
|
57 |
-
tokenizer.padding_side = "left"
|
58 |
|
59 |
config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
|
60 |
max_memory_map = get_max_memory()
|
@@ -82,7 +87,12 @@ def load_tokenizer_model(model_name):
|
|
82 |
|
83 |
|
84 |
def fetch_images(url_images):
|
85 |
-
headers={
|
|
|
|
|
|
|
|
|
|
|
86 |
images = []
|
87 |
for url in url_images:
|
88 |
if isinstance(url, str):
|
@@ -117,7 +127,7 @@ def model_generation(
|
|
117 |
tokens = tokenizer(
|
118 |
[prompt],
|
119 |
truncation=True,
|
120 |
-
max_length=MAX_SEQ_LEN,
|
121 |
padding=True,
|
122 |
add_special_tokens=False,
|
123 |
)
|
@@ -240,9 +250,11 @@ def model_generation(
|
|
240 |
f"----Tokens ids - prompt + generation: `{generated_tokens[0].tolist()}`\n"
|
241 |
f"----Tokens converted - prompt + generation: `{tokens}`\n"
|
242 |
f"----String decoded with skipped special tokens - prompt + generation: `{decoded_skip_special_tokens}`\n"
|
243 |
-
f"----
|
244 |
-
f"----
|
|
|
245 |
f"----String decoded with skipped special tokens - generation: `{generated_text}`\n"
|
|
|
246 |
f"----Generation mode: `{decoding_strategy}`\n"
|
247 |
f"----Generation parameters: `{generation_args}`\n"
|
248 |
)
|
@@ -252,13 +264,18 @@ def model_generation(
|
|
252 |
|
253 |
textbox = gr.Textbox(
|
254 |
show_label=False,
|
255 |
-
value=
|
256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
visible=True,
|
258 |
-
container=False
|
259 |
)
|
260 |
with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
261 |
-
# with gr.Blocks(title="IDEFICS") as demo:
|
262 |
# state = gr.State()
|
263 |
|
264 |
with gr.Row():
|
@@ -269,15 +286,46 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
269 |
value=models[0] if len(models) > 0 else "",
|
270 |
interactive=True,
|
271 |
show_label=False,
|
272 |
-
container=False
|
|
|
273 |
tokenizer, model = load_tokenizer_model(model_selector.value)
|
274 |
|
275 |
-
imagebox = gr.Image(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
276 |
|
277 |
with gr.Accordion("Parameters", open=False, visible=True) as parameter_row:
|
278 |
-
temperature = gr.Slider(
|
279 |
-
|
280 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
281 |
|
282 |
with gr.Column(scale=6):
|
283 |
chatbot = gr.Chatbot(
|
@@ -287,15 +335,36 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
287 |
height=550,
|
288 |
value=[
|
289 |
[
|
290 |
-
|
291 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
],
|
293 |
[
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
299 |
with gr.Row():
|
300 |
with gr.Column(scale=8):
|
301 |
textbox.render()
|
@@ -321,8 +390,8 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
321 |
resulting_text = SYSTEM_PROMPT
|
322 |
for turn in history:
|
323 |
user_utterance, assistant_utterance = turn
|
324 |
-
resulting_text += f"\nUser:{user_utterance}</s>\nAssistant:{assistant_utterance}"
|
325 |
-
resulting_text += f"\nUser:{current_user_prompt}</s>\nAssistant:"
|
326 |
return resulting_text
|
327 |
|
328 |
def model_inference(
|
@@ -333,15 +402,15 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
333 |
|
334 |
temperature = 1.0
|
335 |
no_repeat_ngram_size = 0
|
336 |
-
max_new_tokens =
|
337 |
min_length = 16
|
338 |
force_words = ""
|
339 |
repetition_penalty = 1.0
|
340 |
hide_special_tokens = False
|
341 |
decoding_strategy = "greedy"
|
342 |
num_beams = 3
|
343 |
-
length_penalty = 1.
|
344 |
-
top_k = 50
|
345 |
top_p = 0.95
|
346 |
penalty_alpha = 0.95
|
347 |
|
@@ -378,10 +447,9 @@ with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
378 |
penalty_alpha=penalty_alpha,
|
379 |
)
|
380 |
|
381 |
-
chat_history.append((user_prompt, generated_text.strip()))
|
382 |
return "", chat_history
|
383 |
|
384 |
-
|
385 |
textbox.submit(
|
386 |
fn=model_inference,
|
387 |
inputs=[textbox, chatbot],
|
|
|
1 |
import os
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import requests
|
5 |
|
6 |
|
7 |
models = [
|
8 |
"HuggingFaceM4/tr_209_ift_mixture_opt_step-14000"
|
9 |
+
# "HuggingFaceM4/tr_210_ift_mixture_opt_step-2500",
|
10 |
]
|
11 |
|
12 |
SYSTEM_PROMPT = """The following is a conversation between a highly knowledgeable and intelligent AI assistant, called Assistant, and a human user, called User. In the following interactions, User and Assistant will converse in natural language, and Assistant will do its best to answer User’s questions. Assistant was built to be respectful, polite and inclusive. It knows a lot, and always tells the truth. When prompted with an image, it does not make up facts.
|
|
|
30 |
BAN_TOKENS = "<image>;<fake_token_around_image>"
|
31 |
EOS_TOKENS = "</s>;User"
|
32 |
|
33 |
+
import logging
|
34 |
+
import re
|
35 |
+
from io import BytesIO
|
36 |
+
|
37 |
+
import torch
|
38 |
from accelerate.utils import get_max_memory
|
39 |
+
from PIL import Image
|
40 |
from transformers import AutoTokenizer
|
41 |
+
|
42 |
from m4.models.vllama.configuration_vllama import VLlamaConfig
|
43 |
from m4.models.vllama.modeling_vllama import VLlamaForCausalLM
|
|
|
|
|
|
|
|
|
44 |
from m4.training.packing import image_attention_mask_for_packed_input_ids, incremental_to_binary_attention_mask
|
45 |
from m4.training.utils import build_image_transform
|
46 |
+
|
|
|
47 |
|
48 |
TOKENIZER_FAST = True
|
49 |
+
MAX_SEQ_LEN = 2048
|
50 |
|
51 |
logging.basicConfig(level=logging.INFO)
|
52 |
logger = logging.getLogger()
|
|
|
56 |
tokenizer = AutoTokenizer.from_pretrained(
|
57 |
model_name,
|
58 |
use_fast=TOKENIZER_FAST,
|
59 |
+
use_auth_token=os.getenv("HF_AUTH_TOKEN", True),
|
60 |
+
truncation_side="left",
|
61 |
)
|
62 |
+
# tokenizer.padding_side = "left" -> we don't need that, do we?
|
63 |
|
64 |
config = VLlamaConfig.from_pretrained(model_name, use_auth_token=os.getenv("HF_AUTH_TOKEN", True))
|
65 |
max_memory_map = get_max_memory()
|
|
|
87 |
|
88 |
|
89 |
def fetch_images(url_images):
|
90 |
+
headers = {
|
91 |
+
"User-Agent": (
|
92 |
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0"
|
93 |
+
" Safari/537.36"
|
94 |
+
)
|
95 |
+
}
|
96 |
images = []
|
97 |
for url in url_images:
|
98 |
if isinstance(url, str):
|
|
|
127 |
tokens = tokenizer(
|
128 |
[prompt],
|
129 |
truncation=True,
|
130 |
+
max_length=MAX_SEQ_LEN - 512, # TODO: replace the 512 value with `max_new_tokens`
|
131 |
padding=True,
|
132 |
add_special_tokens=False,
|
133 |
)
|
|
|
250 |
f"----Tokens ids - prompt + generation: `{generated_tokens[0].tolist()}`\n"
|
251 |
f"----Tokens converted - prompt + generation: `{tokens}`\n"
|
252 |
f"----String decoded with skipped special tokens - prompt + generation: `{decoded_skip_special_tokens}`\n"
|
253 |
+
f"----Total length - prompt + generation `{len(generated_tokens[0].tolist())}`\n"
|
254 |
+
f"----Token ids - generation: `{actual_generated_tokens[0].tolist()}`\n"
|
255 |
+
f"----Tokens converted - generation: `{tokenizer.convert_ids_to_tokens(actual_generated_tokens[0])}`\n"
|
256 |
f"----String decoded with skipped special tokens - generation: `{generated_text}`\n"
|
257 |
+
f"----Total length - generation: `{len(actual_generated_tokens[0].tolist())}`\n"
|
258 |
f"----Generation mode: `{decoding_strategy}`\n"
|
259 |
f"----Generation parameters: `{generation_args}`\n"
|
260 |
)
|
|
|
264 |
|
265 |
textbox = gr.Textbox(
|
266 |
show_label=False,
|
267 |
+
value=(
|
268 |
+
"<fake_token_around_image><image:https://m.media-amazon.com/images/M/MV5BMjE4MTcwMTM1Nl5BMl5BanBnXkFtZTcwMTIwMzMzMw@@._V1_.jpg><fake_token_around_image>Describe"
|
269 |
+
" all of the parts of this image."
|
270 |
+
),
|
271 |
+
placeholder=(
|
272 |
+
"To input images, use the following syntax:"
|
273 |
+
" `<fake_token_around_image><image:URL_IMAGE><fake_token_around_image>textexttext`"
|
274 |
+
),
|
275 |
visible=True,
|
276 |
+
container=False,
|
277 |
)
|
278 |
with gr.Blocks(title="IDEFICS", theme=gr.themes.Base()) as demo:
|
|
|
279 |
# state = gr.State()
|
280 |
|
281 |
with gr.Row():
|
|
|
286 |
value=models[0] if len(models) > 0 else "",
|
287 |
interactive=True,
|
288 |
show_label=False,
|
289 |
+
container=False,
|
290 |
+
)
|
291 |
tokenizer, model = load_tokenizer_model(model_selector.value)
|
292 |
|
293 |
+
imagebox = gr.Image(
|
294 |
+
type="pil",
|
295 |
+
label=(
|
296 |
+
"Image input - This image box is not supported yet! To include images, do through the text by"
|
297 |
+
" adding `<fake_token_around_image><image:IMAGE_URL><fake_token_around_image>`. The backend takes"
|
298 |
+
" care of parsing that <image:URL> and download the correponding image. That way, you can"
|
299 |
+
" technically interleave as many images and texts as you want. No need to add space before and"
|
300 |
+
" after `<fake_token_around_image>`"
|
301 |
+
),
|
302 |
+
)
|
303 |
|
304 |
with gr.Accordion("Parameters", open=False, visible=True) as parameter_row:
|
305 |
+
temperature = gr.Slider(
|
306 |
+
minimum=0.0,
|
307 |
+
maximum=1.0,
|
308 |
+
value=0.2,
|
309 |
+
step=0.1,
|
310 |
+
interactive=True,
|
311 |
+
label="Temperature",
|
312 |
+
)
|
313 |
+
top_p = gr.Slider(
|
314 |
+
minimum=0.0,
|
315 |
+
maximum=1.0,
|
316 |
+
value=0.7,
|
317 |
+
step=0.1,
|
318 |
+
interactive=True,
|
319 |
+
label="Top P",
|
320 |
+
)
|
321 |
+
max_output_tokens = gr.Slider(
|
322 |
+
minimum=0,
|
323 |
+
maximum=1024,
|
324 |
+
value=512,
|
325 |
+
step=64,
|
326 |
+
interactive=True,
|
327 |
+
label="Max output tokens",
|
328 |
+
)
|
329 |
|
330 |
with gr.Column(scale=6):
|
331 |
chatbot = gr.Chatbot(
|
|
|
335 |
height=550,
|
336 |
value=[
|
337 |
[
|
338 |
+
(
|
339 |
+
"Where can we typically find animals like this"
|
340 |
+
" one?<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/d/db/Alpaca_%2831562329701%29.jpg/640px-Alpaca_%2831562329701%29.jpg><fake_token_around_image>"
|
341 |
+
),
|
342 |
+
(
|
343 |
+
"Animals like the one in the image, which is a llama, can typically be found in rural"
|
344 |
+
" areas, such as farms or ranches. Llamas are often used as pack animals in mountainous"
|
345 |
+
" regions, as they are well-adapted to the challenging terrain and can carry heavy loads."
|
346 |
+
" They are also valued for their wool, which is used to make clothing and other textiles."
|
347 |
+
" In addition, llamas are sometimes kept as pets or for their therapeutic benefits, as"
|
348 |
+
" they are known to be calm and gentle animals."
|
349 |
+
),
|
350 |
],
|
351 |
[
|
352 |
+
(
|
353 |
+
"How many of these animals can we fit into an engine like"
|
354 |
+
" that<fake_token_around_image><image:https://upload.wikimedia.org/wikipedia/commons/thumb/4/4e/Nassau_County_Police_Bell_407.jpg/1200px-Nassau_County_Police_Bell_407.jpg><fake_token_around_image>?"
|
355 |
+
),
|
356 |
+
(
|
357 |
+
"The image shows a helicopter with a large engine, but it is not possible to determine the"
|
358 |
+
" exact number of animals that can fit into it based on the image alone. The size and"
|
359 |
+
" capacity of the helicopter's engine would depend on various factors, such as the size of"
|
360 |
+
" the animals, the weight of the animals, and the size of the helicopter itself. However,"
|
361 |
+
" it is safe to assume that the helicopter is designed to carry a limited number of"
|
362 |
+
" animals, and it is not intended to be used as a means of transporting large groups of"
|
363 |
+
" animals."
|
364 |
+
),
|
365 |
+
],
|
366 |
+
],
|
367 |
+
)
|
368 |
with gr.Row():
|
369 |
with gr.Column(scale=8):
|
370 |
textbox.render()
|
|
|
390 |
resulting_text = SYSTEM_PROMPT
|
391 |
for turn in history:
|
392 |
user_utterance, assistant_utterance = turn
|
393 |
+
resulting_text += f"\nUser: {user_utterance}</s>\nAssistant: {assistant_utterance}</s>"
|
394 |
+
resulting_text += f"\nUser: {current_user_prompt}</s>\nAssistant:"
|
395 |
return resulting_text
|
396 |
|
397 |
def model_inference(
|
|
|
402 |
|
403 |
temperature = 1.0
|
404 |
no_repeat_ngram_size = 0
|
405 |
+
max_new_tokens = 512
|
406 |
min_length = 16
|
407 |
force_words = ""
|
408 |
repetition_penalty = 1.0
|
409 |
hide_special_tokens = False
|
410 |
decoding_strategy = "greedy"
|
411 |
num_beams = 3
|
412 |
+
length_penalty = 1.0
|
413 |
+
top_k = 50
|
414 |
top_p = 0.95
|
415 |
penalty_alpha = 0.95
|
416 |
|
|
|
447 |
penalty_alpha=penalty_alpha,
|
448 |
)
|
449 |
|
450 |
+
chat_history.append((user_prompt, generated_text.strip("</s>")))
|
451 |
return "", chat_history
|
452 |
|
|
|
453 |
textbox.submit(
|
454 |
fn=model_inference,
|
455 |
inputs=[textbox, chatbot],
|
elon_musk.md
CHANGED
@@ -20,4 +20,4 @@ Bot: Yes, it is possible. If we continue to pollute the environment and destroy
|
|
20 |
User: I think this is a lost cause. <fake_token_around_image> <image> <fake_token_around_image> Like Elon Musk, I think we should instead all move to another planet now.
|
21 |
Bot: El ón Mus ́k is the CEO of SpaceX and Tesla Motors. Space X is developing a reusable rocket system that could be used to transport people and cargo to Mars. Mus k believes that colonizing Mars is essential for the survival of humanity, because it would provide a backup plan in case something goes wrong on earth. However, many people are skeptical about the feasibility of such a project. Some argue that it will take too long and cost too much money to establish a colony on Mars, while others say that the risks are too great. Nevertheless, Mus K remains optimistic about his vision for Mars colonization. "I think there is good reason to believe that we can build a self-sustaining city on mars," he said in an interview with Time magazine. “It would be quite an adventure.”
|
22 |
----
|
23 |
-
The project is still in development. I'm currently working on improving the accuracy of image recognition and adding more features to it.</s>
|
|
|
20 |
User: I think this is a lost cause. <fake_token_around_image> <image> <fake_token_around_image> Like Elon Musk, I think we should instead all move to another planet now.
|
21 |
Bot: El ón Mus ́k is the CEO of SpaceX and Tesla Motors. Space X is developing a reusable rocket system that could be used to transport people and cargo to Mars. Mus k believes that colonizing Mars is essential for the survival of humanity, because it would provide a backup plan in case something goes wrong on earth. However, many people are skeptical about the feasibility of such a project. Some argue that it will take too long and cost too much money to establish a colony on Mars, while others say that the risks are too great. Nevertheless, Mus K remains optimistic about his vision for Mars colonization. "I think there is good reason to believe that we can build a self-sustaining city on mars," he said in an interview with Time magazine. “It would be quite an adventure.”
|
22 |
----
|
23 |
+
The project is still in development. I'm currently working on improving the accuracy of image recognition and adding more features to it.</s>
|