Spaces:
Build error
Build error
ShahzainHaider
commited on
Commit
·
c11535f
1
Parent(s):
e8fc64a
Upload folder using huggingface_hub
Browse files- app/__pycache__/app.cpython-38.pyc +0 -0
- app/app.py +1 -1
- app/extract_country/__pycache__/country_dictionary.cpython-38.pyc +0 -0
- app/extract_country/country_dictionary.py +1 -1
- app/images/idcards/input.jpg +0 -0
- deploy.py +28 -3
- test.py +8 -15
app/__pycache__/app.cpython-38.pyc
CHANGED
Binary files a/app/__pycache__/app.cpython-38.pyc and b/app/__pycache__/app.cpython-38.pyc differ
|
|
app/app.py
CHANGED
@@ -23,7 +23,7 @@ async def ocr(Id_card: UploadFile = File(...)):
|
|
23 |
with open(file_path, "wb+") as file_object:
|
24 |
file_object.write(Id_card.file.read())
|
25 |
|
26 |
-
card_type = "
|
27 |
dictionary = custom_ocr(file_path, card_type)
|
28 |
|
29 |
if '' in list(dictionary.values()):
|
|
|
23 |
with open(file_path, "wb+") as file_object:
|
24 |
file_object.write(Id_card.file.read())
|
25 |
|
26 |
+
card_type = "id_card"
|
27 |
dictionary = custom_ocr(file_path, card_type)
|
28 |
|
29 |
if '' in list(dictionary.values()):
|
app/extract_country/__pycache__/country_dictionary.cpython-38.pyc
CHANGED
Binary files a/app/extract_country/__pycache__/country_dictionary.cpython-38.pyc and b/app/extract_country/__pycache__/country_dictionary.cpython-38.pyc differ
|
|
app/extract_country/country_dictionary.py
CHANGED
@@ -26,7 +26,7 @@ country_data = {
|
|
26 |
'GUATEMALA' : ['GUATEMALA'],
|
27 |
'Bolivia' : ['Bolivariano', 'Bolivia'],
|
28 |
'El Salvador' : ['Salvador'],
|
29 |
-
'Dominican Republic' : ['
|
30 |
}
|
31 |
|
32 |
def find_country(ocr_list):
|
|
|
26 |
'GUATEMALA' : ['GUATEMALA'],
|
27 |
'Bolivia' : ['Bolivariano', 'Bolivia'],
|
28 |
'El Salvador' : ['Salvador'],
|
29 |
+
'Dominican Republic' : ['REPUBLICA DOMINICANA']
|
30 |
}
|
31 |
|
32 |
def find_country(ocr_list):
|
app/images/idcards/input.jpg
CHANGED
deploy.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
from app.extract_country.country_dictionary import find_country
|
3 |
from app.extract_dates.date_engine import date_extractor
|
4 |
from app.extract_gender.gender_extractor import gender_extract
|
@@ -7,6 +8,8 @@ from app.ocr_engine.ocr import OCR
|
|
7 |
from paddleocr import PaddleOCR
|
8 |
from app.layoutLM_api.api import custom_ocr
|
9 |
import cv2
|
|
|
|
|
10 |
|
11 |
def ocr(image_input, webcam_input, card_type_input):
|
12 |
|
@@ -16,17 +19,39 @@ def ocr(image_input, webcam_input, card_type_input):
|
|
16 |
print("Webcam Image : ", type(webcam_input))
|
17 |
|
18 |
if image_input is not None and image_input.any():
|
19 |
-
print("In input image")
|
20 |
cv2.imwrite(file_path, image_input)
|
|
|
21 |
else:
|
22 |
print("In webcam")
|
23 |
cv2.imwrite(file_path, cv2.flip(webcam_input, 1))
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
# print(type(input_img))
|
26 |
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
|
27 |
|
28 |
-
|
29 |
-
|
30 |
# with open(file_path, "wb+") as file_object:
|
31 |
# file_object.write(input_img.file.read())
|
32 |
|
|
|
1 |
import gradio as gr
|
2 |
+
import numpy as np
|
3 |
from app.extract_country.country_dictionary import find_country
|
4 |
from app.extract_dates.date_engine import date_extractor
|
5 |
from app.extract_gender.gender_extractor import gender_extract
|
|
|
8 |
from paddleocr import PaddleOCR
|
9 |
from app.layoutLM_api.api import custom_ocr
|
10 |
import cv2
|
11 |
+
from PIL import Image, ImageFilter
|
12 |
+
|
13 |
|
14 |
def ocr(image_input, webcam_input, card_type_input):
|
15 |
|
|
|
19 |
print("Webcam Image : ", type(webcam_input))
|
20 |
|
21 |
if image_input is not None and image_input.any():
|
22 |
+
print("In input image")
|
23 |
cv2.imwrite(file_path, image_input)
|
24 |
+
|
25 |
else:
|
26 |
print("In webcam")
|
27 |
cv2.imwrite(file_path, cv2.flip(webcam_input, 1))
|
28 |
|
29 |
+
image = Image.open(file_path)
|
30 |
+
upscaled_image = image.resize((image.width*2, image.height*2), resample=Image.Resampling.BILINEAR)
|
31 |
+
sharpened_image = upscaled_image.filter(ImageFilter.SHARPEN)
|
32 |
+
sharpened_image.save(file_path)
|
33 |
+
|
34 |
+
sharpened_image = cv2.imread(file_path)
|
35 |
+
# Apply smoothing
|
36 |
+
smoothed_image = cv2.GaussianBlur(sharpened_image, (5, 5), 0)
|
37 |
+
# Set the desired output level range
|
38 |
+
output_min = 55
|
39 |
+
output_max = 255
|
40 |
+
|
41 |
+
# Calculate the minimum and maximum pixel values in the image
|
42 |
+
min_value = np.min(image)
|
43 |
+
max_value = np.max(image)
|
44 |
+
|
45 |
+
# Normalize the image to the desired output level range
|
46 |
+
normalized_image = cv2.normalize(smoothed_image, None, output_min, output_max, cv2.NORM_MINMAX)
|
47 |
+
|
48 |
+
|
49 |
+
# Save the upscaled, smoothed, and sharpened image
|
50 |
+
cv2.imwrite(file_path, normalized_image)
|
51 |
+
|
52 |
# print(type(input_img))
|
53 |
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
|
54 |
|
|
|
|
|
55 |
# with open(file_path, "wb+") as file_object:
|
56 |
# file_object.write(input_img.file.read())
|
57 |
|
test.py
CHANGED
@@ -1,19 +1,12 @@
|
|
1 |
-
import
|
2 |
|
3 |
-
|
4 |
-
keywords = ["Document ID", "Document Number", "Passport Number", "ID Number"] # Add other possible keywords
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
if keyword in item:
|
9 |
-
# Extract document ID based on format and length
|
10 |
-
document_id = re.findall(r'\b[A-Za-z0-9]+\b', item)
|
11 |
-
# Additional checks for format and length can be added here
|
12 |
-
return document_id[0] if document_id else None
|
13 |
|
14 |
-
|
|
|
15 |
|
16 |
-
#
|
17 |
-
|
18 |
-
document_id = identify_document_id(data_list)
|
19 |
-
print(document_id)
|
|
|
1 |
+
from datetime import datetime
|
2 |
|
3 |
+
dob = "56-01-02"
|
|
|
4 |
|
5 |
+
# Parse the string date into a datetime object
|
6 |
+
date_object = datetime.strptime(dob, "%Y-%m-%d")
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
+
# Format the date object using strftime
|
9 |
+
formatted_date = date_object.strftime("%B %d, %Y")
|
10 |
|
11 |
+
# Print the formatted date
|
12 |
+
print(formatted_date)
|
|
|
|