ShahzainHaider commited on
Commit
6a33dbc
·
1 Parent(s): 53d07c1

Upload folder using huggingface_hub

Browse files
.env CHANGED
@@ -1,2 +1,3 @@
1
  api_key = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDExNjY1NDE1MzQ0MDY1NjEzNTI5MSIsImVtYWlsIjoic2hhaHphaW5oYWlkZXJuYXF2aUBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWF0IjoxNjg1MDAxMjgzMDExfQ.Fb5ODO7KUchlLnrK0KBvSR4pkfIAfYiECRVWXj44RTQ'
2
- queue_id = 'c0f9e6f8-73d0-42f9-bd4f-700bdf002c04'
 
 
1
  api_key = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDExNjY1NDE1MzQ0MDY1NjEzNTI5MSIsImVtYWlsIjoic2hhaHphaW5oYWlkZXJuYXF2aUBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWF0IjoxNjg1MDAxMjgzMDExfQ.Fb5ODO7KUchlLnrK0KBvSR4pkfIAfYiECRVWXj44RTQ'
2
+ id_key = 'c0f9e6f8-73d0-42f9-bd4f-700bdf002c04'
3
+ passport_key = 'daf046a8-109a-4d2d-a301-0ca90256b0db'
app/__pycache__/app.cpython-38.pyc CHANGED
Binary files a/app/__pycache__/app.cpython-38.pyc and b/app/__pycache__/app.cpython-38.pyc differ
 
app/app.py CHANGED
@@ -22,8 +22,9 @@ async def ocr(Id_card: UploadFile = File(...)):
22
 
23
  with open(file_path, "wb+") as file_object:
24
  file_object.write(Id_card.file.read())
25
-
26
- dictionary = custom_ocr(file_path)
 
27
 
28
  if '' in list(dictionary.values()):
29
  print("Missing value found in Dic")
 
22
 
23
  with open(file_path, "wb+") as file_object:
24
  file_object.write(Id_card.file.read())
25
+
26
+ card_type = "passport"
27
+ dictionary = custom_ocr(file_path, card_type)
28
 
29
  if '' in list(dictionary.values()):
30
  print("Missing value found in Dic")
app/images/idcards/input.jpg CHANGED
app/layoutLM_api/__pycache__/api.cpython-38.pyc CHANGED
Binary files a/app/layoutLM_api/__pycache__/api.cpython-38.pyc and b/app/layoutLM_api/__pycache__/api.cpython-38.pyc differ
 
app/layoutLM_api/api.py CHANGED
@@ -7,48 +7,51 @@ from dotenv import load_dotenv
7
  mimetypes.init()
8
  load_dotenv()
9
 
10
- api_key = os.getenv('api_key')
11
- queue_id = os.getenv('queue_id')
 
12
 
 
13
 
14
- def custom_ocr(image_path):
15
-
16
  extracted_field = {
17
- 'name': '',
18
- 'dob': '',
19
- 'country': '',
20
- 'gender': '',
21
- 'document_number': '',
22
  }
23
 
24
- # PUT THIS IN ENV FILE
25
- # Make sure to add your API Key to the auth headers
26
-
 
27
 
28
- # Response is a strongly typed object
29
- response = Client(api_key).extract_document(queue_id, image_path)
30
  dictionary = response.to_dict()
31
- print("dictionary : ", dictionary)
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  for field in dictionary['formFields']:
 
34
 
35
- if ('Name' in field['fieldName']) and (extracted_field['name'] == ''):
36
-
37
- if field['fieldName'] == 'Last Name':
38
- extracted_field['name'] = field['value']
39
- elif field['fieldName'] == 'First Name':
40
  extracted_field['name'] = field['value']
41
- elif field['fieldName'] == 'Middle Name':
42
- extracted_field['name'] = field['value']
43
-
44
- if field['fieldName'] == 'Document Number':
45
- extracted_field['document_number'] = field['value']
46
-
47
- if field['fieldName'] == 'Date of Birth':
48
- extracted_field['dob'] = field['value']
49
 
50
- if (field['fieldName'] == 'State' or field['fieldName'] == 'County' or field['fieldName'] == 'Place of Birth' ) and (len(field['value']) > 0):
51
- extracted_field['country'] = field['value']
 
 
52
 
53
- print("MODEL EXTRACTED FIELDS : ", extracted_field)
54
  return extracted_field
 
7
  mimetypes.init()
8
  load_dotenv()
9
 
10
+ api_key = os.getenv('api_key')
11
+ id_key = os.getenv('id_key')
12
+ passport_key = os.getenv('passport_key')
13
 
14
+ def custom_ocr(image_path, card_type):
15
 
 
 
16
  extracted_field = {
17
+ 'name': '',
18
+ 'dob': '',
19
+ 'country': '',
20
+ 'gender': '',
21
+ 'document_number': '',
22
  }
23
 
24
+ if card_type == "passport":
25
+ response = Client(api_key).extract_document(passport_key, image_path)
26
+ elif card_type == "id_card":
27
+ response = Client(api_key).extract_document(id_key, image_path)
28
 
 
 
29
  dictionary = response.to_dict()
30
+ print("dictionary: ", dictionary)
31
+
32
+ name_fields = {'Last Name', 'First Name', 'Middle Name'}
33
+ field_mapping = {
34
+ 'Document Number': 'document_number',
35
+ 'Date of Birth': 'dob',
36
+ 'Birth Date': 'dob',
37
+ 'State': 'country',
38
+ 'County': 'country',
39
+ 'Place of Birth': 'country',
40
+ 'Nationality': 'country',
41
+ 'Sex': 'gender'
42
+ }
43
 
44
  for field in dictionary['formFields']:
45
+ field_name = field['fieldName']
46
 
47
+ if 'Name' in field_name and extracted_field['name'] == '':
48
+ if field_name in name_fields:
 
 
 
49
  extracted_field['name'] = field['value']
 
 
 
 
 
 
 
 
50
 
51
+ if field_name in field_mapping:
52
+ field_key = field_mapping[field_name]
53
+ if len(field['value']) > 0:
54
+ extracted_field[field_key] = field['value']
55
 
56
+ print("MODEL EXTRACTED FIELDS: ", extracted_field)
57
  return extracted_field
deploy.py CHANGED
@@ -8,7 +8,7 @@ from paddleocr import PaddleOCR
8
  from app.layoutLM_api.api import custom_ocr
9
  import cv2
10
 
11
- def ocr(image_input, webcam_input):
12
 
13
  file_path = 'app/images/idcards/input.jpg'
14
 
@@ -29,8 +29,8 @@ def ocr(image_input, webcam_input):
29
 
30
  # with open(file_path, "wb+") as file_object:
31
  # file_object.write(input_img.file.read())
32
-
33
- dictionary = custom_ocr(file_path)
34
 
35
  if '' in list(dictionary.values()):
36
  print("Missing value found in Dic")
@@ -72,7 +72,8 @@ def ocr(image_input, webcam_input):
72
  # Define the input objects
73
  image_input = gr.inputs.Image(label="Upload Image")
74
  webcam_input = gr.inputs.Image(label="Webcam", source="webcam")
 
75
 
76
  # Create the Gradio interface
77
- interface = gr.Interface(fn=ocr, inputs=[image_input, webcam_input], outputs="json")
78
  interface.launch()
 
8
  from app.layoutLM_api.api import custom_ocr
9
  import cv2
10
 
11
+ def ocr(image_input, webcam_input, card_type_input):
12
 
13
  file_path = 'app/images/idcards/input.jpg'
14
 
 
29
 
30
  # with open(file_path, "wb+") as file_object:
31
  # file_object.write(input_img.file.read())
32
+
33
+ dictionary = custom_ocr(file_path, card_type_input)
34
 
35
  if '' in list(dictionary.values()):
36
  print("Missing value found in Dic")
 
72
  # Define the input objects
73
  image_input = gr.inputs.Image(label="Upload Image")
74
  webcam_input = gr.inputs.Image(label="Webcam", source="webcam")
75
+ card_type_input = gr.inputs.Dropdown(["passport", "id_card"], label="Card Type")
76
 
77
  # Create the Gradio interface
78
+ interface = gr.Interface(fn=ocr, inputs=[image_input, webcam_input, card_type_input], outputs="json")
79
  interface.launch()