Spaces:

ShahzainHaider
/

OCR

Build error

File size: 2,235 Bytes

from fastapi import FastAPI
from fastapi import FastAPI, File, UploadFile
from app.extract_country.country_dictionary import find_country
from app.extract_dates.date_engine import date_extractor
from app.extract_gender.gender_extractor import gender_extract
from app.extract_identity_number.doc_number_extractor import doc_number
from app.ocr_engine.ocr import OCR
from paddleocr import PaddleOCR
from app.layoutLM_api.api import custom_ocr

app = FastAPI() 

origins = ["*"] 


@app.post('/extract_info')
async def ocr(Id_card: UploadFile = File(...)):

    ocr = PaddleOCR(use_angle_cls=True, lang='en') # need to run only once to download and load model into memory
    
    file_path = 'app/images/idcards/input.jpg'

    with open(file_path, "wb+") as file_object:
        file_object.write(Id_card.file.read())

    card_type = "passport"
    dictionary = custom_ocr(file_path, card_type)

    if '' in list(dictionary.values()):
        print("Missing value found in Dic")
        
        extract_text = []
        result = ocr.ocr(file_path, cls=True)

        extract_text = [line[1][0] for res in result for line in res]
        print("extract_text", extract_text)

        if len(dictionary['gender']) == 0:
            print("Gender Missing")
            gender_found, gender, ocr_list = gender_extract(extract_text)
            
            dictionary["gender"] = gender if gender_found else None

        if len(dictionary['dob']) == 0:
            print("Dob Missing")
            dob_found, dob, ocr_list = date_extractor(extract_text)

            dictionary["dob"] = dob if dob_found else None

        if len(dictionary['country']) == 0:
            print("Country Missing")
            country_found, country, ocr_list = find_country(extract_text)
            dictionary["country"] = country if country_found else None

        if len(dictionary['document_number']) == 0:
            print("document Number missing")

            document_number_found, document_number, ocr_list = doc_number(extract_text)
            dictionary["document_number"] = document_number if document_number_found else None


    print("Updated Dict ",dictionary)
    response = {"Status" : 200, "OCR" : dictionary}  
    return response