Spaces:

AdrienB134
/

rag_colpali_idefics3

Sleeping

App Files Files Community

AdrienB134 commited on Aug 30

Commit

79fd59c

•

1 Parent(s): cc33a9b

feazrgf

Browse files

Files changed (1) hide show

app.py +82 -4

app.py CHANGED Viewed

@@ -13,7 +13,83 @@ from pdf2image import convert_from_path
 from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm import tqdm
-from transformers import AutoProcessor
 # Load model
 model_name = "vidore/colpali-v1.2"
@@ -96,7 +172,7 @@ def index_gpu(images, ds):
             embeddings_doc = model(**batch_doc)
         ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
     return f"Uploaded and converted {len(images)} pages", ds, images
 @spaces.GPU
 def answer_gpu():
     return 0
@@ -116,6 +192,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             message = gr.Textbox("Files not yet uploaded", label="Status")
             embeds = gr.State(value=[])
             imgs = gr.State(value=[])
         with gr.Column(scale=3):
             gr.Markdown("## 2️⃣ Search")
@@ -133,10 +210,11 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     output_gallery = gr.Gallery(label="Retrieved Documents", height=600, show_label=True)
     convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
-    search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery])
     answer_button = gr.Button("Answer", variant="primary")
-    answer_button.click(answer_gpu, inputs=[])
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)

 from PIL import Image
 from torch.utils.data import DataLoader
 from tqdm import tqdm
+from transformers import AutoProcessor, Idefics3ForConditionalGeneration
+import re
+import time
+from PIL import Image
+import torch
+import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+## Load idefics
+id_processor = AutoProcessor.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3")
+id_model = Idefics3ForConditionalGeneration.from_pretrained("HuggingFaceM4/Idefics3-8B-Llama3",
+        torch_dtype=torch.bfloat16,
+        #_attn_implementation="flash_attention_2"
+                                                        ).to("cuda")
+BAD_WORDS_IDS = processor.tokenizer(["<image>", "<fake_token_around_image>"], add_special_tokens=False).input_ids
+EOS_WORDS_IDS = [processor.tokenizer.eos_token_id]
+@spaces.GPU
+def model_inference(
+    images, text, assistant_prefix= None, decoding_strategy = "Greedy", temperature= 0.4, max_new_tokens=512,
+    repetition_penalty=1.2, top_p=0.8
+):
+    if text == "" and not images:
+        gr.Error("Please input a query and optionally image(s).")
+    if text == "" and images:
+        gr.Error("Please input a text query along the image(s).")
+    if isinstance(images, Image.Image):
+        images = [images]
+    resulting_messages = [
+                {
+                    "role": "user",
+                    "content": [{"type": "image"}] + [
+                        {"type": "text", "text": text}
+                    ]
+                }
+            ]
+    if assistant_prefix:
+      text = f"{assistant_prefix} {text}"
+    prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
+    inputs = processor(text=prompt, images=[images], return_tensors="pt")
+    inputs = {k: v.to("cuda") for k, v in inputs.items()}
+    generation_args = {
+        "max_new_tokens": max_new_tokens,
+        "repetition_penalty": repetition_penalty,
+    }
+    assert decoding_strategy in [
+        "Greedy",
+        "Top P Sampling",
+    ]
+    if decoding_strategy == "Greedy":
+        generation_args["do_sample"] = False
+    elif decoding_strategy == "Top P Sampling":
+        generation_args["temperature"] = temperature
+        generation_args["do_sample"] = True
+        generation_args["top_p"] = top_p
+    generation_args.update(inputs)
+    # Generate
+    generated_ids = model.generate(**generation_args)
+    generated_texts = processor.batch_decode(generated_ids[:, generation_args["input_ids"].size(1):], skip_special_tokens=True)
+    return generated_texts[0]
 # Load model
 model_name = "vidore/colpali-v1.2"
             embeddings_doc = model(**batch_doc)
         ds.extend(list(torch.unbind(embeddings_doc.to("cpu"))))
     return f"Uploaded and converted {len(images)} pages", ds, images
 @spaces.GPU
 def answer_gpu():
     return 0
             message = gr.Textbox("Files not yet uploaded", label="Status")
             embeds = gr.State(value=[])
             imgs = gr.State(value=[])
+            img_chunk = gr.State(value=[])
         with gr.Column(scale=3):
             gr.Markdown("## 2️⃣ Search")
     output_gallery = gr.Gallery(label="Retrieved Documents", height=600, show_label=True)
     convert_button.click(index, inputs=[file, embeds], outputs=[message, embeds, imgs])
+    search_button.click(search, inputs=[query, embeds, imgs, k], outputs=[output_gallery, img_chunk])
     answer_button = gr.Button("Answer", variant="primary")
+    output = gr.Textbox(label="Output")
+    answer_button.click(model_inference, inputs=[img_chunk, query], outputs=output)
 if __name__ == "__main__":
     demo.queue(max_size=10).launch(debug=True)