Spaces:

sprakhil
/

OCRQuest-2.0

Runtime error

App Files Files Community

sprakhil commited on Sep 30, 2024

Commit

1512254

1 Parent(s): 96c0816

adding pipeline

Browse files

Files changed (1) hide show

app.py +6 -10

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 from PIL import Image
 import torch
-from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoModelForImageToText
 from colpali_engine.models import ColPali, ColPaliProcessor
 from huggingface_hub import login
 import os
@@ -15,10 +15,9 @@ hf_token = os.getenv('HF_TOKEN')
 # Log in to Hugging Face Hub (this will authenticate globally)
 login(token=hf_token)
-# Load the processor and image-to-text model directly
 try:
-    processor_img_to_text = AutoProcessor.from_pretrained("google/paligemma-3b-mix-448")
-    model_img_to_text = AutoModelForImageToText.from_pretrained("google/paligemma-3b-mix-448").to(device)
 except Exception as e:
     st.error(f"Error loading image-to-text model: {e}")
     st.stop()
@@ -52,11 +51,8 @@ if uploaded_file is not None:
         st.image(image, caption='Uploaded Image.', use_column_width=True)
         st.write("")
-        # Use the image-to-text model to extract text from the image
-        inputs_img_to_text = processor_img_to_text(images=image, return_tensors="pt").to(device)
-        with torch.no_grad():
-            generated_ids_img_to_text = model_img_to_text.generate(**inputs_img_to_text, max_new_tokens=128)
-            output_text_img_to_text = processor_img_to_text.batch_decode(generated_ids_img_to_text, skip_special_tokens=True, clean_up_tokenization_spaces=True)
         st.write("Extracted Text from Image:")
         st.write(output_text_img_to_text)
@@ -78,7 +74,7 @@ if uploaded_file is not None:
         # Keyword search in the extracted text
         keyword = st.text_input("Enter a keyword to search in the extracted text:")
         if keyword:
-            if keyword.lower() in output_text_img_to_text[0].lower():
                 st.write(f"Keyword '{keyword}' found in the text.")
             else:
                 st.write(f"Keyword '{keyword}' not found in the text.")

 import streamlit as st
 from PIL import Image
 import torch
+from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, pipeline
 from colpali_engine.models import ColPali, ColPaliProcessor
 from huggingface_hub import login
 import os
 # Log in to Hugging Face Hub (this will authenticate globally)
 login(token=hf_token)
+# Use pipeline for image-to-text task
 try:
+    image_to_text_pipeline = pipeline("image-to-text", model="google/paligemma-3b-mix-448", device=0 if torch.cuda.is_available() else -1)
 except Exception as e:
     st.error(f"Error loading image-to-text model: {e}")
     st.stop()
         st.image(image, caption='Uploaded Image.', use_column_width=True)
         st.write("")
+        # Use the image-to-text pipeline to extract text from the image
+        output_text_img_to_text = image_to_text_pipeline(image)
         st.write("Extracted Text from Image:")
         st.write(output_text_img_to_text)
         # Keyword search in the extracted text
         keyword = st.text_input("Enter a keyword to search in the extracted text:")
         if keyword:
+            if keyword.lower() in output_text_img_to_text[0]['generated_text'].lower():
                 st.write(f"Keyword '{keyword}' found in the text.")
             else:
                 st.write(f"Keyword '{keyword}' not found in the text.")