Spaces:

ShahzainHaider
/

OCR

Build error

App Files Files Community

ShahzainHaider commited on Jun 8, 2023

Commit

c11535f

1 Parent(s): e8fc64a

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

app/__pycache__/app.cpython-38.pyc +0 -0
app/app.py +1 -1
app/extract_country/__pycache__/country_dictionary.cpython-38.pyc +0 -0
app/extract_country/country_dictionary.py +1 -1
app/images/idcards/input.jpg +0 -0
deploy.py +28 -3
test.py +8 -15

app/__pycache__/app.cpython-38.pyc CHANGED Viewed

Binary files a/app/__pycache__/app.cpython-38.pyc and b/app/__pycache__/app.cpython-38.pyc differ

app/app.py CHANGED Viewed

@@ -23,7 +23,7 @@ async def ocr(Id_card: UploadFile = File(...)):
     with open(file_path, "wb+") as file_object:
         file_object.write(Id_card.file.read())
-    card_type = "passport"
     dictionary = custom_ocr(file_path, card_type)
     if '' in list(dictionary.values()):

     with open(file_path, "wb+") as file_object:
         file_object.write(Id_card.file.read())
+    card_type = "id_card"
     dictionary = custom_ocr(file_path, card_type)
     if '' in list(dictionary.values()):

app/extract_country/__pycache__/country_dictionary.cpython-38.pyc CHANGED Viewed

Binary files a/app/extract_country/__pycache__/country_dictionary.cpython-38.pyc and b/app/extract_country/__pycache__/country_dictionary.cpython-38.pyc differ

app/extract_country/country_dictionary.py CHANGED Viewed

@@ -26,7 +26,7 @@ country_data = {
     'GUATEMALA' : ['GUATEMALA'],
     'Bolivia' : ['Bolivariano', 'Bolivia'],
     'El Salvador' : ['Salvador'],
-    'Dominican Republic' : ['REPUBLICADOMINICANA']
 }
 def find_country(ocr_list):

     'GUATEMALA' : ['GUATEMALA'],
     'Bolivia' : ['Bolivariano', 'Bolivia'],
     'El Salvador' : ['Salvador'],
+    'Dominican Republic' : ['REPUBLICA DOMINICANA']
 }
 def find_country(ocr_list):

app/images/idcards/input.jpg CHANGED Viewed

deploy.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import gradio as gr
 from app.extract_country.country_dictionary import find_country
 from app.extract_dates.date_engine import date_extractor
 from app.extract_gender.gender_extractor import gender_extract
@@ -7,6 +8,8 @@ from app.ocr_engine.ocr import OCR
 from paddleocr import PaddleOCR
 from app.layoutLM_api.api import custom_ocr
 import cv2
 def ocr(image_input, webcam_input, card_type_input):
@@ -16,17 +19,39 @@ def ocr(image_input, webcam_input, card_type_input):
     print("Webcam Image : ", type(webcam_input))
     if image_input is not None and image_input.any():
-        print("In input image")
         cv2.imwrite(file_path, image_input)
     else:
         print("In webcam")
         cv2.imwrite(file_path, cv2.flip(webcam_input, 1))
     # print(type(input_img))
     ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
     # with open(file_path, "wb+") as file_object:
     #     file_object.write(input_img.file.read())

 import gradio as gr
+import numpy as np
 from app.extract_country.country_dictionary import find_country
 from app.extract_dates.date_engine import date_extractor
 from app.extract_gender.gender_extractor import gender_extract
 from paddleocr import PaddleOCR
 from app.layoutLM_api.api import custom_ocr
 import cv2
+from PIL import Image, ImageFilter
 def ocr(image_input, webcam_input, card_type_input):
     print("Webcam Image : ", type(webcam_input))
     if image_input is not None and image_input.any():
+        print("In input image")
         cv2.imwrite(file_path, image_input)
     else:
         print("In webcam")
         cv2.imwrite(file_path, cv2.flip(webcam_input, 1))
+    image = Image.open(file_path)
+    upscaled_image = image.resize((image.width*2, image.height*2), resample=Image.Resampling.BILINEAR)
+    sharpened_image = upscaled_image.filter(ImageFilter.SHARPEN)
+    sharpened_image.save(file_path)
+    sharpened_image = cv2.imread(file_path)
+    # Apply smoothing
+    smoothed_image = cv2.GaussianBlur(sharpened_image, (5, 5), 0)
+    # Set the desired output level range
+    output_min = 55
+    output_max = 255
+    # Calculate the minimum and maximum pixel values in the image
+    min_value = np.min(image)
+    max_value = np.max(image)
+    # Normalize the image to the desired output level range
+    normalized_image = cv2.normalize(smoothed_image, None, output_min, output_max, cv2.NORM_MINMAX)
+    # Save the upscaled, smoothed, and sharpened image
+    cv2.imwrite(file_path, normalized_image)
     # print(type(input_img))
     ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
     # with open(file_path, "wb+") as file_object:
     #     file_object.write(input_img.file.read())

test.py CHANGED Viewed

@@ -1,19 +1,12 @@
-import re
-def identify_document_id(data_list):
-    keywords = ["Document ID", "Document Number", "Passport Number", "ID Number"]  # Add other possible keywords
-    for item in data_list:
-        for keyword in keywords:
-            if keyword in item:
-                # Extract document ID based on format and length
-                document_id = re.findall(r'\b[A-Za-z0-9]+\b', item)
-                # Additional checks for format and length can be added here
-                return document_id[0] if document_id else None
-    return None
-# Test the function with the given data list
-data_list = ["Govermment of the People's Republic of Bangladesh", 'NationalIDCard', '12May 1975', 'HETH', 'Caaaat', 'Name', 'ROMANARAHMAN', 'Date of tn 12 May 1975', '8673674936']
-document_id = identify_document_id(data_list)
-print(document_id)

+from datetime import datetime
+dob = "56-01-02"
+#  Parse the string date into a datetime object
+date_object = datetime.strptime(dob, "%Y-%m-%d")
+# Format the date object using strftime
+formatted_date = date_object.strftime("%B %d, %Y")
+# Print the formatted date
+print(formatted_date)