Spaces:

coolfrxcrazy
/

YOLO_MODEL_DETECTION

Sleeping

coolfrxcrazy commited on Oct 5, 2024

Commit

27ce2a0

verified ·

1 Parent(s): 87d5df7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -193,22 +193,26 @@ def ocr(image):
     # # Return the extracted text
     # return res
     try:
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
         model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
         # Ensure the /tmp directory exists
         temp_dir = "/tmp"
         if not os.path.exists(temp_dir):
             os.makedirs(temp_dir)
         # Save the image to a temporary file in /tmp directory
         temp_image_path = os.path.join(temp_dir, "temp_image.jpg")
         image.save(temp_image_path, format='JPEG')
         # Perform OCR on the image using the file path
         res = model.chat(tokenizer=tokenizer, image=temp_image_path, ocr_type='ocr')  # Pass the file path here
         # Return the extracted text
         return res['text']  # Adjust this based on the actual return structure
     except Exception as e:

     # # Return the extracted text
     # return res
     try:
+        # Convert image to PIL Image if it's a NumPy array
+        if isinstance(image, np.ndarray):
+            image = Image.fromarray(image)
         device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
         tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
         model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id).to(device)
         # Ensure the /tmp directory exists
         temp_dir = "/tmp"
         if not os.path.exists(temp_dir):
             os.makedirs(temp_dir)
         # Save the image to a temporary file in /tmp directory
         temp_image_path = os.path.join(temp_dir, "temp_image.jpg")
         image.save(temp_image_path, format='JPEG')
         # Perform OCR on the image using the file path
         res = model.chat(tokenizer=tokenizer, image=temp_image_path, ocr_type='ocr')  # Pass the file path here
         # Return the extracted text
         return res['text']  # Adjust this based on the actual return structure
     except Exception as e: