Spaces:
Build error
Build error
ShahzainHaider
commited on
Commit
·
7bbae49
1
Parent(s):
b74a4db
Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .env +2 -0
- .gitattributes +8 -0
- .gitignore +2 -0
- README.md +1 -7
- __pycache__/gradio.cpython-38.pyc +0 -0
- app/.gitignore +4 -0
- app/__pycache__/app.cpython-38.pyc +0 -0
- app/app.py +64 -0
- app/constants/__pycache__/paths.cpython-38.pyc +0 -0
- app/constants/paths.py +2 -0
- app/constants/regex_expressions.py +1 -0
- app/custome.py +0 -0
- app/extract_country/__pycache__/country_dictionary.cpython-38.pyc +0 -0
- app/extract_country/__pycache__/country_extract.cpython-38.pyc +0 -0
- app/extract_country/__pycache__/country_validator.cpython-38.pyc +0 -0
- app/extract_country/country_dictionary.py +44 -0
- app/extract_country/country_extract.py +23 -0
- app/extract_country/country_validator.py +15 -0
- app/extract_dates/__pycache__/date_engine.cpython-38.pyc +0 -0
- app/extract_dates/__pycache__/validate_date.cpython-38.pyc +0 -0
- app/extract_dates/date_engine.py +34 -0
- app/extract_dates/validate_date.py +21 -0
- app/extract_gender/__pycache__/gender_extractor.cpython-38.pyc +0 -0
- app/extract_gender/gender_extractor.py +26 -0
- app/extract_identity_number/__pycache__/doc_number_extractor.cpython-38.pyc +0 -0
- app/extract_identity_number/__pycache__/identity_number.cpython-38.pyc +0 -0
- app/extract_identity_number/doc_number_extractor.py +13 -0
- app/extract_identity_number/identity_number.py +2 -0
- app/extract_mrz/__pycache__/mrz_detect.cpython-38.pyc +0 -0
- app/extract_mrz/__pycache__/mrz_engine.cpython-38.pyc +0 -0
- app/extract_mrz/mrz_detect.py +34 -0
- app/extract_mrz/mrz_engine.py +72 -0
- app/images/idcards/1.png +0 -0
- app/images/idcards/BlacksSharpen.jpg +0 -0
- app/images/idcards/EO3kzEEUcAEeNbf.jpg +0 -0
- app/images/idcards/Genunie-ID-Card-Online.jpg +0 -0
- app/images/idcards/IMG_20221130_180809.jpg +3 -0
- app/images/idcards/IMG_20230210_171555.jpg +0 -0
- app/images/idcards/IMG_20230210_171610.jpg +0 -0
- app/images/idcards/ShahzainCNIC.jpg +3 -0
- app/images/idcards/_13.jpg +0 -0
- app/images/idcards/aadhaar backside image .jpg +3 -0
- app/images/idcards/aadhaar frontside image.jpg +3 -0
- app/images/idcards/cnic.jpg +0 -0
- app/images/idcards/d.jpg +3 -0
- app/images/idcards/driving licence backside image.jpg +3 -0
- app/images/idcards/driving licence frontside image.jpg +3 -0
- app/images/idcards/e4d62a4127719bb07ed88275c3802bf907f026978d1de2a531c1e7967e60bea9.webp +0 -0
- app/images/idcards/front.jpg +3 -0
- app/images/idcards/image.jpg +0 -0
.env
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
api_key = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJnb29nbGUtb2F1dGgyfDExNjY1NDE1MzQ0MDY1NjEzNTI5MSIsImVtYWlsIjoic2hhaHphaW5oYWlkZXJuYXF2aUBnbWFpbC5jb20iLCJlbWFpbF92ZXJpZmllZCI6dHJ1ZSwiaWF0IjoxNjg1MDAxMjgzMDExfQ.Fb5ODO7KUchlLnrK0KBvSR4pkfIAfYiECRVWXj44RTQ'
|
2 |
+
queue_id = 'c0f9e6f8-73d0-42f9-bd4f-700bdf002c04'
|
.gitattributes
CHANGED
@@ -32,3 +32,11 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
app/images/idcards/IMG_20221130_180809.jpg filter=lfs diff=lfs merge=lfs -text
|
36 |
+
app/images/idcards/ShahzainCNIC.jpg filter=lfs diff=lfs merge=lfs -text
|
37 |
+
app/images/idcards/aadhaar[[:space:]]backside[[:space:]]image .jpg filter=lfs diff=lfs merge=lfs -text
|
38 |
+
app/images/idcards/aadhaar[[:space:]]frontside[[:space:]]image.jpg filter=lfs diff=lfs merge=lfs -text
|
39 |
+
app/images/idcards/d.jpg filter=lfs diff=lfs merge=lfs -text
|
40 |
+
app/images/idcards/driving[[:space:]]licence[[:space:]]backside[[:space:]]image.jpg filter=lfs diff=lfs merge=lfs -text
|
41 |
+
app/images/idcards/driving[[:space:]]licence[[:space:]]frontside[[:space:]]image.jpg filter=lfs diff=lfs merge=lfs -text
|
42 |
+
app/images/idcards/front.jpg filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
__pycache__/app.cpython-38.pyc
|
2 |
+
*pyc
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
title: OCR
|
3 |
-
|
4 |
-
colorFrom: yellow
|
5 |
-
colorTo: pink
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.33.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: OCR
|
3 |
+
app_file: deploy.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 3.33.1
|
|
|
|
|
6 |
---
|
|
|
|
__pycache__/gradio.cpython-38.pyc
ADDED
Binary file (441 Bytes). View file
|
|
app/.gitignore
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
images/*
|
2 |
+
output/*
|
3 |
+
*.pyc
|
4 |
+
.vscode
|
app/__pycache__/app.cpython-38.pyc
ADDED
Binary file (2.05 kB). View file
|
|
app/app.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI
|
2 |
+
from fastapi.middleware.cors import CORSMiddleware
|
3 |
+
from fastapi import FastAPI, File, UploadFile
|
4 |
+
from app.extract_country.country_dictionary import find_country
|
5 |
+
from app.extract_dates.date_engine import date_extractor
|
6 |
+
from app.extract_gender.gender_extractor import gender_extract
|
7 |
+
from app.extract_identity_number.doc_number_extractor import doc_number
|
8 |
+
from app.ocr_engine.ocr import OCR
|
9 |
+
from paddleocr import PaddleOCR
|
10 |
+
from app.layoutLM_api.api import custom_ocr
|
11 |
+
|
12 |
+
app = FastAPI()
|
13 |
+
|
14 |
+
origins = ["*"]
|
15 |
+
|
16 |
+
|
17 |
+
@app.post('/extract_info')
|
18 |
+
async def ocr(Id_card: UploadFile = File(...)):
|
19 |
+
|
20 |
+
ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
|
21 |
+
|
22 |
+
file_path = 'app/images/idcards/input.jpg'
|
23 |
+
|
24 |
+
with open(file_path, "wb+") as file_object:
|
25 |
+
file_object.write(Id_card.file.read())
|
26 |
+
|
27 |
+
dictionary = custom_ocr(file_path)
|
28 |
+
|
29 |
+
if '' in list(dictionary.values()):
|
30 |
+
print("Missing value found in Dic")
|
31 |
+
|
32 |
+
extract_text = []
|
33 |
+
result = ocr.ocr(file_path, cls=True)
|
34 |
+
|
35 |
+
extract_text = [line[1][0] for res in result for line in res]
|
36 |
+
print("extract_text", extract_text)
|
37 |
+
|
38 |
+
if len(dictionary['gender']) == 0:
|
39 |
+
print("Gender Missing")
|
40 |
+
gender_found, gender, ocr_list = gender_extract(extract_text)
|
41 |
+
|
42 |
+
dictionary["gender"] = gender if gender_found else None
|
43 |
+
|
44 |
+
if len(dictionary['dob']) == 0:
|
45 |
+
print("Dob Missing")
|
46 |
+
dob_found, dob, ocr_list = date_extractor(extract_text)
|
47 |
+
|
48 |
+
dictionary["dob"] = dob if dob_found else None
|
49 |
+
|
50 |
+
if len(dictionary['country']) == 0:
|
51 |
+
print("Country Missing")
|
52 |
+
country_found, country, ocr_list = find_country(extract_text)
|
53 |
+
dictionary["country"] = country if country_found else None
|
54 |
+
|
55 |
+
if len(dictionary['document_number']) == 0:
|
56 |
+
print("document Number missing")
|
57 |
+
|
58 |
+
document_number_found, document_number, ocr_list = doc_number(extract_text)
|
59 |
+
dictionary["document_number"] = document_number if document_number_found else None
|
60 |
+
|
61 |
+
|
62 |
+
print("Updated Dict ",dictionary)
|
63 |
+
response = {"Status" : 200, "OCR" : dictionary}
|
64 |
+
return response
|
app/constants/__pycache__/paths.cpython-38.pyc
ADDED
Binary file (251 Bytes). View file
|
|
app/constants/paths.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
IMAGE_PATH = 'app/images/idcards/front.jpg'
|
2 |
+
EXTRACTED_MRZ_PATH = "output/mrz_image.jpg"
|
app/constants/regex_expressions.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
DATA_PATTERN = r"\b\d{2}\.\d{2}\.\d{4}\b"
|
app/custome.py
ADDED
File without changes
|
app/extract_country/__pycache__/country_dictionary.cpython-38.pyc
ADDED
Binary file (1.61 kB). View file
|
|
app/extract_country/__pycache__/country_extract.cpython-38.pyc
ADDED
Binary file (868 Bytes). View file
|
|
app/extract_country/__pycache__/country_validator.cpython-38.pyc
ADDED
Binary file (563 Bytes). View file
|
|
app/extract_country/country_dictionary.py
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
country_data = {
|
2 |
+
'PAKISTAN' : ['PAK', 'PAKISTAN'],
|
3 |
+
'SWITZERLAND' : ['SCHWEIZERISCHE EIDGENOSSENSCHAFT', 'CONFEDERATION SUISSE', 'CONFEDERAZIONESVIZZERA', 'CONFEDERAZIUN SVIZRA', 'SWISS CONFEDERATION'],
|
4 |
+
'INDIA' : ['INDIA', 'Government of India'],
|
5 |
+
'GERMANY' : ['Bundesrepublik Deutschland','Germany', 'Westdeutschland' , 'Ostdeutschland'],
|
6 |
+
'BANGLADESH' : ['Bangladesh'],
|
7 |
+
'UNITED KINGDOM' : ['British Citizen', 'UNITED KINGDOM'],
|
8 |
+
'NETHERLANDS' : ['NETHERLANDS', 'NEDERLANDSE', 'NEDERLANDEN'],
|
9 |
+
'CANADA' : ['canada'],
|
10 |
+
'UNITED ARAB EMIRATES' : ['Arab Emirates', 'UAE'],
|
11 |
+
'OMAN' : ['OMAN'],
|
12 |
+
'JORDAN' : ['jordan'],
|
13 |
+
'BAHRAIN' : ['BAHRAIN'],
|
14 |
+
'KUWAIT' : ['KUWAIT'],
|
15 |
+
'QATAR' : ['Qatar'],
|
16 |
+
'LIBYA' : ['AFRiN MAHALLi MECLiSi'],
|
17 |
+
'SOUTH SUDAN' : ['Akon'],
|
18 |
+
'CHILLE' : ['DECHILE'],
|
19 |
+
'COLOMBIA' : ['COLOMBIA'],
|
20 |
+
'BRAZIL' : ['BRAZIL', 'BRASIL'],
|
21 |
+
'PERU' : ['DELPERU', 'CASADO'],
|
22 |
+
'URUGUAY' : ['DELURUGUAY'],
|
23 |
+
'Coasta Rica' : ['COSTARICA'],
|
24 |
+
'PARAGUAY' : ['PARAGUAY'],
|
25 |
+
'ECUADOR' : ['ECUADOR'],
|
26 |
+
'GUATEMALA' : ['GUATEMALA'],
|
27 |
+
'Bolivia' : ['Bolivariano', 'Bolivia'],
|
28 |
+
'El Salvador' : ['Salvador'],
|
29 |
+
'Dominican Republic' : ['REPUBLICADOMINICANA']
|
30 |
+
}
|
31 |
+
|
32 |
+
def find_country(ocr_list):
|
33 |
+
try:
|
34 |
+
for word_from_ocr_list in ocr_list:
|
35 |
+
word_normalized = word_from_ocr_list.upper()
|
36 |
+
for key, values in country_data.items():
|
37 |
+
for value in values:
|
38 |
+
if value.upper() in word_normalized:
|
39 |
+
return True, key, ocr_list
|
40 |
+
else:
|
41 |
+
return False, None, ocr_list
|
42 |
+
except Exception as e:
|
43 |
+
print(f"An error occurred: {e}")
|
44 |
+
return False, None, ocr_list
|
app/extract_country/country_extract.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from extract_country.country_validator import extract_country_name
|
2 |
+
|
3 |
+
|
4 |
+
def country_extractor(ocr_list):
|
5 |
+
try:
|
6 |
+
country_found = False
|
7 |
+
country = None
|
8 |
+
|
9 |
+
result_string = ' '.join(string for string in ocr_list)
|
10 |
+
words = result_string.split()
|
11 |
+
|
12 |
+
for index,word in enumerate(words):
|
13 |
+
country_found, country_name = extract_country_name(word)
|
14 |
+
if country_found:
|
15 |
+
country_found = True
|
16 |
+
country = country_name
|
17 |
+
break
|
18 |
+
|
19 |
+
return country_found, country, ocr_list
|
20 |
+
|
21 |
+
except Exception as e:
|
22 |
+
print("[Exception in country_extractor ] ", str(e))
|
23 |
+
return country_found, country, ocr_list
|
app/extract_country/country_validator.py
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pycountry
|
2 |
+
import re
|
3 |
+
|
4 |
+
def extract_country_name(string):
|
5 |
+
pattern = r"\b[A-Za-z ]+\b" # Matches any sequence of letters and spaces
|
6 |
+
matches = re.findall(pattern, string)
|
7 |
+
for match in matches:
|
8 |
+
try:
|
9 |
+
country = pycountry.countries.lookup(match)
|
10 |
+
print("COUNTRY : ",country.name)
|
11 |
+
return True, country.name
|
12 |
+
except LookupError:
|
13 |
+
pass
|
14 |
+
return False, None
|
15 |
+
|
app/extract_dates/__pycache__/date_engine.cpython-38.pyc
ADDED
Binary file (805 Bytes). View file
|
|
app/extract_dates/__pycache__/validate_date.cpython-38.pyc
ADDED
Binary file (880 Bytes). View file
|
|
app/extract_dates/date_engine.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from app.extract_dates.validate_date import find_smallest_date, validate_date
|
2 |
+
|
3 |
+
|
4 |
+
def date_extractor(ocr_list):
|
5 |
+
try:
|
6 |
+
global dob_found
|
7 |
+
dob_found = False
|
8 |
+
dob = None
|
9 |
+
dates_list = []
|
10 |
+
|
11 |
+
for index,word in enumerate(ocr_list):
|
12 |
+
date_valid ,pattern = validate_date(word)
|
13 |
+
if date_valid:
|
14 |
+
# valid_pattern = pattern
|
15 |
+
ocr_list.pop(index) # removing elements from list that is being used.
|
16 |
+
dates_list.append(word)
|
17 |
+
|
18 |
+
if dates_list:
|
19 |
+
# print(dates_list)
|
20 |
+
dob = find_smallest_date(dates_list) # smallest date will be DOB
|
21 |
+
dob_found = True
|
22 |
+
|
23 |
+
print("DATE OF BIRTH : ", dob)
|
24 |
+
else:
|
25 |
+
print("Date not found")
|
26 |
+
dob_found = False
|
27 |
+
dob = ''
|
28 |
+
|
29 |
+
return dob_found, dob, ocr_list
|
30 |
+
|
31 |
+
except Exception as e:
|
32 |
+
print("[Exception in date_extractor] : ", str(e))
|
33 |
+
dob = ''
|
34 |
+
return dob_found, dob, ocr_list
|
app/extract_dates/validate_date.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
from dateutil import parser
|
3 |
+
|
4 |
+
def find_smallest_date(dates):
|
5 |
+
parsed_dates = [parser.parse(date) for date in dates]
|
6 |
+
smallest_date = min(parsed_dates)
|
7 |
+
print(smallest_date.strftime('%d-%m-%Y'))
|
8 |
+
return smallest_date.strftime('%d-%m-%Y')
|
9 |
+
|
10 |
+
def validate_date(date_string):
|
11 |
+
patterns = [
|
12 |
+
r'^\d{2}/\d{2}/\d{4}$', # MM/DD/YYYY
|
13 |
+
r'^\d{2}-\d{2}-\d{4}$', # DD-MM-YYYY
|
14 |
+
r'^\d{2}.\d{2}.\d{4}$' # DD.MM.YYYY
|
15 |
+
]
|
16 |
+
|
17 |
+
for pattern in patterns:
|
18 |
+
if re.match(pattern, date_string):
|
19 |
+
return True, pattern
|
20 |
+
|
21 |
+
return False, pattern
|
app/extract_gender/__pycache__/gender_extractor.cpython-38.pyc
ADDED
Binary file (810 Bytes). View file
|
|
app/extract_gender/gender_extractor.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gender_labels = ["M","MALE", "F", "FEMALE", "V/F","N/F","FEMENINO","MASCULINA"]
|
2 |
+
|
3 |
+
def gender_extract(ocr_list):
|
4 |
+
try:
|
5 |
+
for words in ocr_list:
|
6 |
+
for word in words.split():
|
7 |
+
word = word.upper()
|
8 |
+
if word in gender_labels:
|
9 |
+
return True, word, ocr_list
|
10 |
+
elif "FEMALE" in word:
|
11 |
+
gender = 'F'
|
12 |
+
return True, gender, ocr_list
|
13 |
+
elif "MALE" in word:
|
14 |
+
gender = 'M'
|
15 |
+
return True, gender, ocr_list
|
16 |
+
elif "FEMENINO" in word:
|
17 |
+
gender = "F"
|
18 |
+
return True, gender, ocr_list
|
19 |
+
elif "MASCULINA" in word:
|
20 |
+
gender = "M"
|
21 |
+
return True, gender, ocr_list
|
22 |
+
|
23 |
+
return False, None, ocr_list
|
24 |
+
except Exception as e:
|
25 |
+
print("[Exception in gender_extract] ", str(e))
|
26 |
+
return None, None, ocr_list
|
app/extract_identity_number/__pycache__/doc_number_extractor.cpython-38.pyc
ADDED
Binary file (625 Bytes). View file
|
|
app/extract_identity_number/__pycache__/identity_number.cpython-38.pyc
ADDED
Binary file (300 Bytes). View file
|
|
app/extract_identity_number/doc_number_extractor.py
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
|
3 |
+
def doc_number(ocr_list):
|
4 |
+
try:
|
5 |
+
|
6 |
+
for item in ocr_list:
|
7 |
+
if (len(item) >= 6 and item[0].isnumeric() ) or (len(item) >= 6 and (item[0].isalpha() and item[1].isnumeric())):
|
8 |
+
print("DOCUMENT NUMBRE IS : ", item)
|
9 |
+
return True, item, ocr_list
|
10 |
+
return False, None, ocr_list
|
11 |
+
except Exception as e:
|
12 |
+
print("Exception in doc_number : " ,str(e))
|
13 |
+
return False, None, ocr_list
|
app/extract_identity_number/identity_number.py
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
def identity_card_extractor(ocr_list):
|
2 |
+
pass
|
app/extract_mrz/__pycache__/mrz_detect.cpython-38.pyc
ADDED
Binary file (849 Bytes). View file
|
|
app/extract_mrz/__pycache__/mrz_engine.cpython-38.pyc
ADDED
Binary file (1.59 kB). View file
|
|
app/extract_mrz/mrz_detect.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2
|
2 |
+
from readmrz import MrzDetector
|
3 |
+
|
4 |
+
from constants.paths import EXTRACTED_MRZ_PATH
|
5 |
+
|
6 |
+
|
7 |
+
def MRZ_detector(img_path):
|
8 |
+
try:
|
9 |
+
mrz_found = False
|
10 |
+
gray = []
|
11 |
+
|
12 |
+
detector = MrzDetector()
|
13 |
+
|
14 |
+
image = detector.read(img_path)
|
15 |
+
cropped = detector.crop_area(image)
|
16 |
+
gray = cv2.cvtColor(cropped, cv2.COLOR_BGR2GRAY)
|
17 |
+
|
18 |
+
cv2.imshow('Image', cropped)
|
19 |
+
cv2.waitKey(0)
|
20 |
+
cv2.destroyAllWindows()
|
21 |
+
|
22 |
+
if len(cropped) != 13:
|
23 |
+
mrz_found = True
|
24 |
+
|
25 |
+
cv2.imwrite(EXTRACTED_MRZ_PATH, gray)
|
26 |
+
print("MRZ FOUND")
|
27 |
+
# cv2.imshow('Image', cropped)
|
28 |
+
# cv2.waitKey(0)
|
29 |
+
# cv2.destroyAllWindows()
|
30 |
+
|
31 |
+
return mrz_found, gray
|
32 |
+
except Exception as e:
|
33 |
+
print("[Exception in MRZ_detector] : ", str(e))
|
34 |
+
return mrz_found, gray
|
app/extract_mrz/mrz_engine.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mrz.checker.td1 import TD1CodeChecker
|
2 |
+
from mrz.checker.td2 import TD2CodeChecker
|
3 |
+
from mrz.checker.td3 import TD3CodeChecker
|
4 |
+
|
5 |
+
def mrz_engine(mrz_list, lenghtOfChars):
|
6 |
+
try:
|
7 |
+
user_data = {}
|
8 |
+
|
9 |
+
if lenghtOfChars == 90:
|
10 |
+
check = TD1CodeChecker(f"{mrz_list[0]}\n"
|
11 |
+
f"{mrz_list[1]}\n"
|
12 |
+
f"{mrz_list[2]}"
|
13 |
+
)
|
14 |
+
elif lenghtOfChars == 72:
|
15 |
+
check = TD2CodeChecker(f"{mrz_list[0]}\n"
|
16 |
+
f"{mrz_list[1]}")
|
17 |
+
|
18 |
+
elif lenghtOfChars == 88:
|
19 |
+
check = TD3CodeChecker(f"{mrz_list[0]}\n"
|
20 |
+
f"{mrz_list[1]}")
|
21 |
+
else:
|
22 |
+
check = False
|
23 |
+
|
24 |
+
result = bool(check)
|
25 |
+
# print(result)
|
26 |
+
if result:
|
27 |
+
user_data = check.fields()
|
28 |
+
# print(fields.name,fields.surname )
|
29 |
+
# print(get_country(fields.country))
|
30 |
+
else:
|
31 |
+
print("FAILED")
|
32 |
+
|
33 |
+
return user_data
|
34 |
+
except Exception as e:
|
35 |
+
print("[Exception in mrz_engine] : ", str(e))
|
36 |
+
return user_data
|
37 |
+
|
38 |
+
def mrz_corrector(capital_strings, lenghtOfCharaters):
|
39 |
+
|
40 |
+
try:
|
41 |
+
lines = len(capital_strings)
|
42 |
+
|
43 |
+
if lines == 2 and lenghtOfCharaters not in [72, 88]:
|
44 |
+
safe_check = {72: 3, 88: 3}
|
45 |
+
for index, line in enumerate(capital_strings):
|
46 |
+
if len(line) != safe_check[lenghtOfCharaters]:
|
47 |
+
char_difference = safe_check[lenghtOfCharaters] - len(line)
|
48 |
+
line = line + char_difference * '<'
|
49 |
+
capital_strings[index] = line
|
50 |
+
lenghtOfCharaters = len(capital_strings)*len(line)
|
51 |
+
|
52 |
+
for index, line in enumerate(capital_strings):
|
53 |
+
if len(line) != 30:
|
54 |
+
char_difference = 30 - len(line)
|
55 |
+
line = line + char_difference * '<'
|
56 |
+
capital_strings[index] = line
|
57 |
+
lenghtOfCharaters = len(capital_strings)*len(line)
|
58 |
+
|
59 |
+
elif lines == 3 and lenghtOfCharaters != 90:
|
60 |
+
for index, line in enumerate(capital_strings):
|
61 |
+
if len(line) != 30:
|
62 |
+
char_difference = 30 - len(line)
|
63 |
+
line = line + char_difference * '<'
|
64 |
+
capital_strings[index] = line
|
65 |
+
lenghtOfCharaters = len(capital_strings)*len(line)
|
66 |
+
|
67 |
+
return lenghtOfCharaters, capital_strings
|
68 |
+
except Exception as e:
|
69 |
+
print("[Exception in mrz_corrector] : ", str(e))
|
70 |
+
lenghtOfCharaters = 0
|
71 |
+
capital_strings = []
|
72 |
+
return lenghtOfCharaters, capital_strings
|
app/images/idcards/1.png
ADDED
app/images/idcards/BlacksSharpen.jpg
ADDED
app/images/idcards/EO3kzEEUcAEeNbf.jpg
ADDED
app/images/idcards/Genunie-ID-Card-Online.jpg
ADDED
app/images/idcards/IMG_20221130_180809.jpg
ADDED
Git LFS Details
|
app/images/idcards/IMG_20230210_171555.jpg
ADDED
app/images/idcards/IMG_20230210_171610.jpg
ADDED
app/images/idcards/ShahzainCNIC.jpg
ADDED
Git LFS Details
|
app/images/idcards/_13.jpg
ADDED
app/images/idcards/aadhaar backside image .jpg
ADDED
Git LFS Details
|
app/images/idcards/aadhaar frontside image.jpg
ADDED
Git LFS Details
|
app/images/idcards/cnic.jpg
ADDED
app/images/idcards/d.jpg
ADDED
Git LFS Details
|
app/images/idcards/driving licence backside image.jpg
ADDED
Git LFS Details
|
app/images/idcards/driving licence frontside image.jpg
ADDED
Git LFS Details
|
app/images/idcards/e4d62a4127719bb07ed88275c3802bf907f026978d1de2a531c1e7967e60bea9.webp
ADDED
app/images/idcards/front.jpg
ADDED
Git LFS Details
|
app/images/idcards/image.jpg
ADDED