Spaces:

ShahzainHaider
/

OCR

Build error

OCR

File size: 834 Bytes

7bbae49

import re

def identify_document_id(data_list):
    keywords = ["Document ID", "Document Number", "Passport Number", "ID Number"]  # Add other possible keywords

    for item in data_list:
        for keyword in keywords:
            if keyword in item:
                # Extract document ID based on format and length
                document_id = re.findall(r'\b[A-Za-z0-9]+\b', item)
                # Additional checks for format and length can be added here
                return document_id[0] if document_id else None

    return None

# Test the function with the given data list
data_list = ["Govermment of the People's Republic of Bangladesh", 'NationalIDCard', '12May 1975', 'HETH', 'Caaaat', 'Name', 'ROMANARAHMAN', 'Date of tn 12 May 1975', '8673674936']
document_id = identify_document_id(data_list)
print(document_id)