Spaces:
Build error
Build error
import gradio as gr | |
from app.NER.NER import name_entity_recognizer | |
from app.extract_country.country_dictionary import find_country | |
from app.extract_dates.date_engine import date_extractor | |
from app.extract_gender.gender_extractor import gender_extract | |
from app.extract_identity_number.doc_number_extractor import doc_number | |
import cv2 | |
from app.functions import get_index, get_latest_value_from_csv, image_enhacement | |
import json | |
def ocr(card_type_input, image_input, webcam_input): | |
global dictionary | |
dictionary = { | |
"NAME": "", | |
"DOB": "", | |
"COUNTRY": "", | |
"GENDER": "", | |
"DOCUMENT NUMBER": "", | |
} | |
file_path = "app/images/idcards/input.jpg" | |
if image_input is not None and image_input.any(): | |
print("In input image") | |
cv2.imwrite(file_path, image_input) | |
else: | |
print("In webcam") | |
cv2.imwrite(file_path, cv2.flip(webcam_input, 1)) | |
image_enhacement(file_path) | |
# print("file_path", file_path) | |
dictionary, NER_check, result = name_entity_recognizer(file_path, dictionary) | |
extract_text = [] | |
extract_text = [line[1][0] for res in result for line in res] | |
extract_text = " ".join(extract_text) | |
print("extract_text", extract_text) | |
if "" in list(dictionary.values()): | |
print("Missing value found in Dic") | |
if len(dictionary["GENDER"]) == 0: | |
print("Gender Missing") | |
gender_found, gender, ocr_list = gender_extract(extract_text) | |
dictionary["GENDER"] = gender if gender_found else None | |
if len(dictionary["DOB"]) == 0: | |
print("Dob Missing") | |
dob_found, dob, ocr_list = date_extractor(extract_text) | |
dictionary["DOB"] = dob if dob_found else None | |
if len(dictionary["COUNTRY"]) == 0: | |
print("Country Missing") | |
country_found, country, ocr_list = find_country(extract_text) | |
dictionary["country"] = country if country_found else None | |
if len(dictionary["DOCUMENT NUMBER"]) == 0: | |
print("document Number missing") | |
document_number_found, document_number, ocr_list = doc_number(extract_text) | |
dictionary["DOCUMENT NUMBER"] = ( | |
document_number if document_number_found else None | |
) | |
# bool_check , training_data = get_latest_value_from_csv(extract_text) | |
# print(training_data) | |
print("Updated Dict ", dictionary) | |
response = {"Status": 200, "OCR": dictionary} | |
return response, extract_text | |
def save_traning_data( | |
name_input, dob_input, country_input, gender_input, doc_num_input,string_output | |
): | |
print(name_input, dob_input, country_input, gender_input, doc_num_input) | |
print("Saving data") | |
# print(string_output) | |
entities_list = [] | |
if name_input: | |
entry = [] | |
start_index, end_index = get_index(string_output, name_input) | |
entry.append(start_index) | |
entry.append(end_index) | |
entry.append("NAME") | |
entities_list.append(entry) | |
if dob_input: | |
entry = [] | |
start_index, end_index = get_index(string_output, dob_input) | |
entry.append(start_index) | |
entry.append(end_index) | |
entry.append("DOB") | |
entities_list.append(entry) | |
if country_input: | |
entry = [] | |
start_index, end_index = get_index(string_output, country_input) | |
entry.append(start_index) | |
entry.append(end_index) | |
entry.append("COUNTRY") | |
entities_list.append(entry) | |
if gender_input: | |
entry = [] | |
start_index, end_index = get_index(string_output, gender_input) | |
entry.append(start_index) | |
entry.append(end_index) | |
entry.append("GENDER") | |
entities_list.append(entry) | |
if doc_num_input: | |
entry = [] | |
start_index, end_index = get_index(string_output, doc_num_input) | |
entry.append(start_index) | |
entry.append(end_index) | |
entry.append("DOCUMENT NUMBER") | |
entities_list.append(entry) | |
print("entities_list" , entities_list) | |
with open("app/training_data/annotated_data.json") as f: | |
data = json.load(f) | |
new_list = [ | |
string_output, | |
{ | |
"entities": entities_list | |
} | |
] | |
annotations = data["annotations"] | |
annotations.append(new_list) | |
with open('app/training_data/annotated_data.json', 'w') as file: | |
json.dump(data, file) | |
# text = "PAKISTAN Nationalldentity Card ISLAMIC REPUBLIC OF PAKISTAN Name Muhammad Owais Siddiqui Father Name Zia Ud Din Siddiqui Country of Stay Gender M Pakistan Identity Number Date of Birth 42201-1177064-3 11.08.2001 Date of Expiry Date of Issue 24.09.2019 24.09.2029 Holder's Signature" | |
# save_traning_data( | |
# "Muhammad Owais Siddiqui", "Card", "REPUBLIC", "PAKISTAN", "Nationalldentity",text | |
# ) | |
with gr.Blocks() as interface: | |
with gr.Row(): | |
with gr.Column(): | |
card_type_input = gr.inputs.Dropdown( | |
["passport", "id_card"], label="Card Type", default="id_card" | |
) | |
image_input = gr.inputs.Image(label="Upload Image") | |
webcam_input = gr.inputs.Image(label="Webcam", source="webcam") | |
with gr.Column(): | |
# Define the output components | |
json_output = gr.outputs.JSON(label="JSON Output") | |
string_output = gr.outputs.Textbox(label="OCR Extracted Text") | |
ocr_btn = gr.Button("Process OCR") | |
name_input = gr.inputs.Textbox(label="NAME", optional=True, type="text") | |
dob_input = gr.inputs.Textbox(label="DOB", optional=True, type="text") | |
country_input = gr.inputs.Textbox( | |
label="COUNTRY", optional=True, type="text" | |
) | |
gender_input = gr.inputs.Textbox(label="GENDER", optional=True, type="text") | |
doc_num_input = gr.inputs.Textbox( | |
label="DOCUMENT NUMBER", optional=True, type="text" | |
) | |
save_btn = gr.Button("Save Data") | |
ocr_btn.click( | |
fn=ocr, | |
inputs=[card_type_input, image_input, webcam_input], | |
outputs=[json_output, string_output], | |
) | |
save_btn.click( | |
fn=save_traning_data, | |
inputs=[name_input, dob_input, country_input, gender_input, doc_num_input,string_output], | |
) | |
interface.launch(share=True, show_tips=True, debug=True) | |