sprakhil commited on
Commit
1512254
·
1 Parent(s): 96c0816

adding pipeline

Browse files
Files changed (1) hide show
  1. app.py +6 -10
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from PIL import Image
3
  import torch
4
- from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, AutoModelForImageToText
5
  from colpali_engine.models import ColPali, ColPaliProcessor
6
  from huggingface_hub import login
7
  import os
@@ -15,10 +15,9 @@ hf_token = os.getenv('HF_TOKEN')
15
  # Log in to Hugging Face Hub (this will authenticate globally)
16
  login(token=hf_token)
17
 
18
- # Load the processor and image-to-text model directly
19
  try:
20
- processor_img_to_text = AutoProcessor.from_pretrained("google/paligemma-3b-mix-448")
21
- model_img_to_text = AutoModelForImageToText.from_pretrained("google/paligemma-3b-mix-448").to(device)
22
  except Exception as e:
23
  st.error(f"Error loading image-to-text model: {e}")
24
  st.stop()
@@ -52,11 +51,8 @@ if uploaded_file is not None:
52
  st.image(image, caption='Uploaded Image.', use_column_width=True)
53
  st.write("")
54
 
55
- # Use the image-to-text model to extract text from the image
56
- inputs_img_to_text = processor_img_to_text(images=image, return_tensors="pt").to(device)
57
- with torch.no_grad():
58
- generated_ids_img_to_text = model_img_to_text.generate(**inputs_img_to_text, max_new_tokens=128)
59
- output_text_img_to_text = processor_img_to_text.batch_decode(generated_ids_img_to_text, skip_special_tokens=True, clean_up_tokenization_spaces=True)
60
 
61
  st.write("Extracted Text from Image:")
62
  st.write(output_text_img_to_text)
@@ -78,7 +74,7 @@ if uploaded_file is not None:
78
  # Keyword search in the extracted text
79
  keyword = st.text_input("Enter a keyword to search in the extracted text:")
80
  if keyword:
81
- if keyword.lower() in output_text_img_to_text[0].lower():
82
  st.write(f"Keyword '{keyword}' found in the text.")
83
  else:
84
  st.write(f"Keyword '{keyword}' not found in the text.")
 
1
  import streamlit as st
2
  from PIL import Image
3
  import torch
4
+ from transformers import Qwen2VLForConditionalGeneration, AutoProcessor, pipeline
5
  from colpali_engine.models import ColPali, ColPaliProcessor
6
  from huggingface_hub import login
7
  import os
 
15
  # Log in to Hugging Face Hub (this will authenticate globally)
16
  login(token=hf_token)
17
 
18
+ # Use pipeline for image-to-text task
19
  try:
20
+ image_to_text_pipeline = pipeline("image-to-text", model="google/paligemma-3b-mix-448", device=0 if torch.cuda.is_available() else -1)
 
21
  except Exception as e:
22
  st.error(f"Error loading image-to-text model: {e}")
23
  st.stop()
 
51
  st.image(image, caption='Uploaded Image.', use_column_width=True)
52
  st.write("")
53
 
54
+ # Use the image-to-text pipeline to extract text from the image
55
+ output_text_img_to_text = image_to_text_pipeline(image)
 
 
 
56
 
57
  st.write("Extracted Text from Image:")
58
  st.write(output_text_img_to_text)
 
74
  # Keyword search in the extracted text
75
  keyword = st.text_input("Enter a keyword to search in the extracted text:")
76
  if keyword:
77
+ if keyword.lower() in output_text_img_to_text[0]['generated_text'].lower():
78
  st.write(f"Keyword '{keyword}' found in the text.")
79
  else:
80
  st.write(f"Keyword '{keyword}' not found in the text.")