Spaces:

pranshh
/

ocr-assignment

Running

App Files Files Community

pranshh commited on Sep 30, 2024

Commit

bf86837

verified ·

1 Parent(s): d21030f

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -24

app.py CHANGED Viewed

@@ -4,9 +4,11 @@ import gradio as gr
 from PIL import Image
 from byaldi import RAGMultiModalModel
 from qwen_vl_utils import process_vision_info
 # Load ColPali model
-RAG = RAGMultiModalModel.from_pretrained("vidore/colpali")
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
@@ -16,30 +18,33 @@ def load_model():
 vlm = load_model()
 def ocr_image(image, keyword=""):
-    # Convert PIL Image to file-like object
-    import io
-    img_byte_arr = io.BytesIO()
-    image.save(img_byte_arr, format='PNG')
-    img_byte_arr = img_byte_arr.getvalue()
-    # Index the image
-    RAG.index(input_data=img_byte_arr, index_name="temp_index", overwrite=True)
-    # Retrieve text from the image
-    results = RAG.search("Extract all text from this image", k=1)
-    # Extract text from results
-    output_text = results[0].get('text', '')
-    if keyword:
-        keyword_lower = keyword.lower()
-        if keyword_lower in output_text.lower():
-            highlighted_text = output_text.replace(keyword, f"**{keyword}**")
-            return f"Keyword '{keyword}' found in the text:\n\n{highlighted_text}"
         else:
-            return f"Keyword '{keyword}' not found in the text:\n\n{output_text}"
-    else:
-        return output_text
 def process_image(image, keyword=""):
     max_size = 1024

 from PIL import Image
 from byaldi import RAGMultiModalModel
 from qwen_vl_utils import process_vision_info
+import os
+import tempfile
 # Load ColPali model
+RAG = RAGMultiModalModel.from_pretrained("vidore/colpali", device_map="cpu", torch_dtype=torch.float32)
 processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
 vlm = load_model()
 def ocr_image(image, keyword=""):
+    # Save the image to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_file:
+        image.save(temp_file, format='PNG')
+        temp_file_path = temp_file.name
+    try:
+        # Index the image
+        RAG.index(input_path=temp_file_path, index_name="temp_index", overwrite=True)
+        # Retrieve text from the image
+        results = RAG.search("Extract all text from this image", k=1)
+        # Extract text from results
+        output_text = results[0].get('text', '')
+        if keyword:
+            keyword_lower = keyword.lower()
+            if keyword_lower in output_text.lower():
+                highlighted_text = output_text.replace(keyword, f"**{keyword}**")
+                return f"Keyword '{keyword}' found in the text:\n\n{highlighted_text}"
+            else:
+                return f"Keyword '{keyword}' not found in the text:\n\n{output_text}"
         else:
+            return output_text
+    finally:
+        # Clean up the temporary file
+        os.unlink(temp_file_path)
 def process_image(image, keyword=""):
     max_size = 1024