Spaces:

ShahzainHaider
/

OCR

Build error

OCR / test.py

Upload folder using huggingface_hub

7bbae49 over 1 year ago

834 Bytes

	import re

	def identify_document_id(data_list):
	keywords = ["Document ID", "Document Number", "Passport Number", "ID Number"] # Add other possible keywords

	for item in data_list:
	for keyword in keywords:
	if keyword in item:
	# Extract document ID based on format and length
	document_id = re.findall(r'\b[A-Za-z0-9]+\b', item)
	# Additional checks for format and length can be added here
	return document_id[0] if document_id else None

	return None

	# Test the function with the given data list
	data_list = ["Govermment of the People's Republic of Bangladesh", 'NationalIDCard', '12May 1975', 'HETH', 'Caaaat', 'Name', 'ROMANARAHMAN', 'Date of tn 12 May 1975', '8673674936']
	document_id = identify_document_id(data_list)
	print(document_id)