Spaces:
Build error
Build error
ShahzainHaider
commited on
Commit
·
6a33dbc
1
Parent(s):
53d07c1
Upload folder using huggingface_hub
Browse files- .env +2 -1
- app/__pycache__/app.cpython-38.pyc +0 -0
- app/app.py +3 -2
- app/images/idcards/input.jpg +0 -0
- app/layoutLM_api/__pycache__/api.cpython-38.pyc +0 -0
- app/layoutLM_api/api.py +34 -31
- deploy.py +5 -4
.env
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
api_key = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDExNjY1NDE1MzQ0MDY1NjEzNTI5MSIsImVtYWlsIjoic2hhaHphaW5oYWlkZXJuYXF2aUBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWF0IjoxNjg1MDAxMjgzMDExfQ.Fb5ODO7KUchlLnrK0KBvSR4pkfIAfYiECRVWXj44RTQ'
|
2 |
-
|
|
|
|
1 |
api_key = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDExNjY1NDE1MzQ0MDY1NjEzNTI5MSIsImVtYWlsIjoic2hhaHphaW5oYWlkZXJuYXF2aUBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWF0IjoxNjg1MDAxMjgzMDExfQ.Fb5ODO7KUchlLnrK0KBvSR4pkfIAfYiECRVWXj44RTQ'
|
2 |
+
id_key = 'c0f9e6f8-73d0-42f9-bd4f-700bdf002c04'
|
3 |
+
passport_key = 'daf046a8-109a-4d2d-a301-0ca90256b0db'
|
app/__pycache__/app.cpython-38.pyc
CHANGED
Binary files a/app/__pycache__/app.cpython-38.pyc and b/app/__pycache__/app.cpython-38.pyc differ
|
|
app/app.py
CHANGED
@@ -22,8 +22,9 @@ async def ocr(Id_card: UploadFile = File(...)):
|
|
22 |
|
23 |
with open(file_path, "wb+") as file_object:
|
24 |
file_object.write(Id_card.file.read())
|
25 |
-
|
26 |
-
|
|
|
27 |
|
28 |
if '' in list(dictionary.values()):
|
29 |
print("Missing value found in Dic")
|
|
|
22 |
|
23 |
with open(file_path, "wb+") as file_object:
|
24 |
file_object.write(Id_card.file.read())
|
25 |
+
|
26 |
+
card_type = "passport"
|
27 |
+
dictionary = custom_ocr(file_path, card_type)
|
28 |
|
29 |
if '' in list(dictionary.values()):
|
30 |
print("Missing value found in Dic")
|
app/images/idcards/input.jpg
CHANGED
app/layoutLM_api/__pycache__/api.cpython-38.pyc
CHANGED
Binary files a/app/layoutLM_api/__pycache__/api.cpython-38.pyc and b/app/layoutLM_api/__pycache__/api.cpython-38.pyc differ
|
|
app/layoutLM_api/api.py
CHANGED
@@ -7,48 +7,51 @@ from dotenv import load_dotenv
|
|
7 |
mimetypes.init()
|
8 |
load_dotenv()
|
9 |
|
10 |
-
api_key =
|
11 |
-
|
|
|
12 |
|
|
|
13 |
|
14 |
-
def custom_ocr(image_path):
|
15 |
-
|
16 |
extracted_field = {
|
17 |
-
'name':
|
18 |
-
'dob':
|
19 |
-
'country':
|
20 |
-
'gender':
|
21 |
-
'document_number':
|
22 |
}
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
|
|
27 |
|
28 |
-
# Response is a strongly typed object
|
29 |
-
response = Client(api_key).extract_document(queue_id, image_path)
|
30 |
dictionary = response.to_dict()
|
31 |
-
print("dictionary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
for field in dictionary['formFields']:
|
|
|
34 |
|
35 |
-
if
|
36 |
-
|
37 |
-
if field['fieldName'] == 'Last Name':
|
38 |
-
extracted_field['name'] = field['value']
|
39 |
-
elif field['fieldName'] == 'First Name':
|
40 |
extracted_field['name'] = field['value']
|
41 |
-
elif field['fieldName'] == 'Middle Name':
|
42 |
-
extracted_field['name'] = field['value']
|
43 |
-
|
44 |
-
if field['fieldName'] == 'Document Number':
|
45 |
-
extracted_field['document_number'] = field['value']
|
46 |
-
|
47 |
-
if field['fieldName'] == 'Date of Birth':
|
48 |
-
extracted_field['dob'] = field['value']
|
49 |
|
50 |
-
if
|
51 |
-
|
|
|
|
|
52 |
|
53 |
-
print("MODEL EXTRACTED FIELDS
|
54 |
return extracted_field
|
|
|
7 |
mimetypes.init()
|
8 |
load_dotenv()
|
9 |
|
10 |
+
api_key = os.getenv('api_key')
|
11 |
+
id_key = os.getenv('id_key')
|
12 |
+
passport_key = os.getenv('passport_key')
|
13 |
|
14 |
+
def custom_ocr(image_path, card_type):
|
15 |
|
|
|
|
|
16 |
extracted_field = {
|
17 |
+
'name': '',
|
18 |
+
'dob': '',
|
19 |
+
'country': '',
|
20 |
+
'gender': '',
|
21 |
+
'document_number': '',
|
22 |
}
|
23 |
|
24 |
+
if card_type == "passport":
|
25 |
+
response = Client(api_key).extract_document(passport_key, image_path)
|
26 |
+
elif card_type == "id_card":
|
27 |
+
response = Client(api_key).extract_document(id_key, image_path)
|
28 |
|
|
|
|
|
29 |
dictionary = response.to_dict()
|
30 |
+
print("dictionary: ", dictionary)
|
31 |
+
|
32 |
+
name_fields = {'Last Name', 'First Name', 'Middle Name'}
|
33 |
+
field_mapping = {
|
34 |
+
'Document Number': 'document_number',
|
35 |
+
'Date of Birth': 'dob',
|
36 |
+
'Birth Date': 'dob',
|
37 |
+
'State': 'country',
|
38 |
+
'County': 'country',
|
39 |
+
'Place of Birth': 'country',
|
40 |
+
'Nationality': 'country',
|
41 |
+
'Sex': 'gender'
|
42 |
+
}
|
43 |
|
44 |
for field in dictionary['formFields']:
|
45 |
+
field_name = field['fieldName']
|
46 |
|
47 |
+
if 'Name' in field_name and extracted_field['name'] == '':
|
48 |
+
if field_name in name_fields:
|
|
|
|
|
|
|
49 |
extracted_field['name'] = field['value']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
+
if field_name in field_mapping:
|
52 |
+
field_key = field_mapping[field_name]
|
53 |
+
if len(field['value']) > 0:
|
54 |
+
extracted_field[field_key] = field['value']
|
55 |
|
56 |
+
print("MODEL EXTRACTED FIELDS: ", extracted_field)
|
57 |
return extracted_field
|
deploy.py
CHANGED
@@ -8,7 +8,7 @@ from paddleocr import PaddleOCR
|
|
8 |
from app.layoutLM_api.api import custom_ocr
|
9 |
import cv2
|
10 |
|
11 |
-
def ocr(image_input, webcam_input):
|
12 |
|
13 |
file_path = 'app/images/idcards/input.jpg'
|
14 |
|
@@ -29,8 +29,8 @@ def ocr(image_input, webcam_input):
|
|
29 |
|
30 |
# with open(file_path, "wb+") as file_object:
|
31 |
# file_object.write(input_img.file.read())
|
32 |
-
|
33 |
-
dictionary = custom_ocr(file_path)
|
34 |
|
35 |
if '' in list(dictionary.values()):
|
36 |
print("Missing value found in Dic")
|
@@ -72,7 +72,8 @@ def ocr(image_input, webcam_input):
|
|
72 |
# Define the input objects
|
73 |
image_input = gr.inputs.Image(label="Upload Image")
|
74 |
webcam_input = gr.inputs.Image(label="Webcam", source="webcam")
|
|
|
75 |
|
76 |
# Create the Gradio interface
|
77 |
-
interface = gr.Interface(fn=ocr, inputs=[image_input, webcam_input], outputs="json")
|
78 |
interface.launch()
|
|
|
8 |
from app.layoutLM_api.api import custom_ocr
|
9 |
import cv2
|
10 |
|
11 |
+
def ocr(image_input, webcam_input, card_type_input):
|
12 |
|
13 |
file_path = 'app/images/idcards/input.jpg'
|
14 |
|
|
|
29 |
|
30 |
# with open(file_path, "wb+") as file_object:
|
31 |
# file_object.write(input_img.file.read())
|
32 |
+
|
33 |
+
dictionary = custom_ocr(file_path, card_type_input)
|
34 |
|
35 |
if '' in list(dictionary.values()):
|
36 |
print("Missing value found in Dic")
|
|
|
72 |
# Define the input objects
|
73 |
image_input = gr.inputs.Image(label="Upload Image")
|
74 |
webcam_input = gr.inputs.Image(label="Webcam", source="webcam")
|
75 |
+
card_type_input = gr.inputs.Dropdown(["passport", "id_card"], label="Card Type")
|
76 |
|
77 |
# Create the Gradio interface
|
78 |
+
interface = gr.Interface(fn=ocr, inputs=[image_input, webcam_input, card_type_input], outputs="json")
|
79 |
interface.launch()
|