OCR / app /extract_country /country_dictionary.py
ShahzainHaider's picture
Upload folder using huggingface_hub
7bbae49
raw
history blame
1.73 kB
country_data = {
'PAKISTAN' : ['PAK', 'PAKISTAN'],
'SWITZERLAND' : ['SCHWEIZERISCHE EIDGENOSSENSCHAFT', 'CONFEDERATION SUISSE', 'CONFEDERAZIONESVIZZERA', 'CONFEDERAZIUN SVIZRA', 'SWISS CONFEDERATION'],
'INDIA' : ['INDIA', 'Government of India'],
'GERMANY' : ['Bundesrepublik Deutschland','Germany', 'Westdeutschland' , 'Ostdeutschland'],
'BANGLADESH' : ['Bangladesh'],
'UNITED KINGDOM' : ['British Citizen', 'UNITED KINGDOM'],
'NETHERLANDS' : ['NETHERLANDS', 'NEDERLANDSE', 'NEDERLANDEN'],
'CANADA' : ['canada'],
'UNITED ARAB EMIRATES' : ['Arab Emirates', 'UAE'],
'OMAN' : ['OMAN'],
'JORDAN' : ['jordan'],
'BAHRAIN' : ['BAHRAIN'],
'KUWAIT' : ['KUWAIT'],
'QATAR' : ['Qatar'],
'LIBYA' : ['AFRiN MAHALLi MECLiSi'],
'SOUTH SUDAN' : ['Akon'],
'CHILLE' : ['DECHILE'],
'COLOMBIA' : ['COLOMBIA'],
'BRAZIL' : ['BRAZIL', 'BRASIL'],
'PERU' : ['DELPERU', 'CASADO'],
'URUGUAY' : ['DELURUGUAY'],
'Coasta Rica' : ['COSTARICA'],
'PARAGUAY' : ['PARAGUAY'],
'ECUADOR' : ['ECUADOR'],
'GUATEMALA' : ['GUATEMALA'],
'Bolivia' : ['Bolivariano', 'Bolivia'],
'El Salvador' : ['Salvador'],
'Dominican Republic' : ['REPUBLICADOMINICANA']
}
def find_country(ocr_list):
try:
for word_from_ocr_list in ocr_list:
word_normalized = word_from_ocr_list.upper()
for key, values in country_data.items():
for value in values:
if value.upper() in word_normalized:
return True, key, ocr_list
else:
return False, None, ocr_list
except Exception as e:
print(f"An error occurred: {e}")
return False, None, ocr_list