ysharma HF staff commited on
Commit
5225464
1 Parent(s): b479823

Update app_dialogue.py

Browse files
Files changed (1) hide show
  1. app_dialogue.py +44 -2
app_dialogue.py CHANGED
@@ -115,7 +115,26 @@ def convert_to_rgb(filepath_or_pilimg):
115
 
116
  return temp_file_path # Return the path to the saved image
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
 
119
  def base64_to_pil(encoded_image):
120
  decoded_image = base64.b64decode(encoded_image)
121
  pil_image = Image.open(BytesIO(decoded_image))
@@ -322,37 +341,60 @@ def format_user_prompt_with_im_history_and_system_conditioning(
322
  Produces the resulting list that needs to go inside the processor.
323
  It handles the potential image box input, the history and the system conditionning.
324
  """
 
 
 
 
 
325
  resulting_list = copy.deepcopy(SYSTEM_PROMPT)
326
 
327
  # Format history
328
  for turn in history:
 
329
  user_utterance, assistant_utterance = turn
 
330
  splitted_user_utterance = split_str_on_im_markdown(user_utterance)
 
331
  splitted_user_utterance = [
332
  im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
333
  for s in splitted_user_utterance
334
  if s != ""
335
  ]
 
 
336
  if isinstance(splitted_user_utterance[0], str):
337
  resulting_list.append("\nUser: ")
338
  else:
339
  resulting_list.append("\nUser:")
 
340
  resulting_list.extend(splitted_user_utterance)
 
341
  resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
 
 
342
 
343
  # Format current input
344
  current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
 
 
345
  if current_image is None:
 
346
  if "<img src=data:image/png;base64" in current_user_prompt_str:
347
  raise ValueError("The UI does not support inputing via the text box an image in base64.")
348
  current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
 
349
  resulting_list.append("\nUser: ")
 
350
  resulting_list.extend(current_user_prompt_list)
 
351
  resulting_list.append("<end_of_utterance>\nAssistant:")
 
352
  return resulting_list, current_user_prompt_list
353
  else:
 
354
  # Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
355
- resulting_list.extend(["\nUser:", current_image, f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"])
 
356
  return resulting_list, [current_user_prompt_str]
357
 
358
 
@@ -535,7 +577,7 @@ with gr.Blocks(title="IDEFICS-Chat", theme=gr.themes.Base()) as demo:
535
  )
536
  processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
537
 
538
- imagebox = gr.Image(type="pil", label="Image input")
539
 
540
  with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
541
  max_new_tokens = gr.Slider(
 
115
 
116
  return temp_file_path # Return the path to the saved image
117
 
118
+ def pil_to_markdown_im(image):
119
+ """
120
+ Convert a PIL image into markdown filled with the base64 string representation.
121
+ """
122
+ print(f"***** pil_to_markdown_im ******")
123
+ print(f"params: image is - {image}")
124
+ #if isinstance(image, PIL.Image.Image):
125
+ #img_b64_str = pil_to_base64(image)
126
+ #img_str = f'<img src="data:image/png;base64,{img_b64_str}" />'
127
+ #if path_or_url.startswith(("http://", "https://")):
128
+ #response = requests.get(image)
129
+ #image = Image.open(BytesIO(response.content))
130
+ # Generate a unique filename using UUID
131
+ filename = f"{uuid.uuid4()}.jpg"
132
+ local_path = f"{filename}"
133
+ image.save(local_path)
134
+ img_str = f"![](/file={local_path})"
135
+ return img_str
136
 
137
+
138
  def base64_to_pil(encoded_image):
139
  decoded_image = base64.b64decode(encoded_image)
140
  pil_image = Image.open(BytesIO(decoded_image))
 
341
  Produces the resulting list that needs to go inside the processor.
342
  It handles the potential image box input, the history and the system conditionning.
343
  """
344
+ print(f"*********format_user_prompt_with_im_history_and_system_conditioning*********")
345
+ print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_user_prompt_str is - {current_user_prompt_str} ")
346
+ print(f"format_user_prompt_with_im_history_and_system_conditioning -- param current_image is - {current_image} ")
347
+ print(f"format_user_prompt_with_im_history_and_system_conditioning -- param history is - {history} ")
348
+
349
  resulting_list = copy.deepcopy(SYSTEM_PROMPT)
350
 
351
  # Format history
352
  for turn in history:
353
+ print(f"inside for loop, turn is - {turn}")
354
  user_utterance, assistant_utterance = turn
355
+ print("calling split_str_on_im_markdown from inside for loop inside format_user_prompt_with_im_history_and_system_conditioning")
356
  splitted_user_utterance = split_str_on_im_markdown(user_utterance)
357
+ print(f"splitted_user_utterance from split_str_on_im_markdown is - {splitted_user_utterance} ")
358
  splitted_user_utterance = [
359
  im_markdown_to_pil(s) if s.startswith('<img src="data:image/png;base64,') else s
360
  for s in splitted_user_utterance
361
  if s != ""
362
  ]
363
+ print(f"splitted_user_utterance after im_markdown_to_pil() is - {splitted_user_utterance} ")
364
+
365
  if isinstance(splitted_user_utterance[0], str):
366
  resulting_list.append("\nUser: ")
367
  else:
368
  resulting_list.append("\nUser:")
369
+ print(f"resulting_list after if..else block is - {resulting_list}")
370
  resulting_list.extend(splitted_user_utterance)
371
+ print(f"resulting_list after extend is - {resulting_list}")
372
  resulting_list.append(f"<end_of_utterance>\nAssistant: {assistant_utterance}")
373
+ print(f"resulting_list after append is - {resulting_list}")
374
+
375
 
376
  # Format current input
377
  current_user_prompt_str = remove_spaces_around_token(current_user_prompt_str)
378
+ print(f"current_user_prompt_str is - {current_user_prompt_str}")
379
+
380
  if current_image is None:
381
+ print("inside IF : current_image is NONE")
382
  if "<img src=data:image/png;base64" in current_user_prompt_str:
383
  raise ValueError("The UI does not support inputing via the text box an image in base64.")
384
  current_user_prompt_list = handle_manual_images_in_user_prompt(current_user_prompt_str)
385
+ print(f"current_user_prompt_list (or [user_prompt]/resulting_user_prompt((most likely this one)) from handle_manual_images_in_user_prompt ) is - {current_user_prompt_list}")
386
  resulting_list.append("\nUser: ")
387
+ print(f"resulting_list with append user - {resulting_list}")
388
  resulting_list.extend(current_user_prompt_list)
389
+ print(f"resulting_list after extend with current_user_prompt_list is - {resulting_list}")
390
  resulting_list.append("<end_of_utterance>\nAssistant:")
391
+ print(f"resulting_list after append with end_of_utteranceAssistant is - {resulting_list}")
392
  return resulting_list, current_user_prompt_list
393
  else:
394
+ print("inside ELSE : current_image is not NONE")
395
  # Choosing to put the image first when the image is inputted through the UI, but this is an arbiratrary choice.
396
+ resulting_list.extend(["\nUser:", Image.open(current_image), f"{current_user_prompt_str}<end_of_utterance>\nAssistant:"]) #current_image
397
+ print(f"final resulting_list passed on to calling function is - {resulting_list}")
398
  return resulting_list, [current_user_prompt_str]
399
 
400
 
 
577
  )
578
  processor, tokenizer, model = load_processor_tokenizer_model(model_selector.value)
579
 
580
+ imagebox = gr.Image(type="filepath", label="Image input")
581
 
582
  with gr.Accordion("Advanced parameters", open=False, visible=True) as parameter_row:
583
  max_new_tokens = gr.Slider(