Spaces:
Runtime error
Runtime error
import re | |
import string | |
# ----------------------------- indic_num.py ----------------------------- | |
supported_lang = {"en", "hi", "gu", "mr", "bn", "te", "ta", "kn", "or", "pa"} | |
# supported_lang = {'eng', 'hin', 'guj', 'mar', 'ben', 'tel', 'tam', 'kan', 'ori', 'pan'} # Three alphabet lang code | |
all_num = { | |
"en": ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"], | |
"hi": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"], | |
"gu": ["૦", "૧", "૨", "૩", "૪", "૫", "૬", "૭", "૮", "૯"], | |
"mr": ["०", "१", "२", "३", "४", "५", "६", "७", "८", "९"], | |
"bn": ["০", "১", "২", "৩", "৪", "৫", "৬", "৭", "৮", "৯"], | |
"te": ["౦", "౧", "౨", "౩", "౪", "౫", "౬", "౭", "౮", "౯"], | |
"ta": ["0", "௧", "௨", "௩", "௪", "௫", "௬", "௭", "௮", "௯", "௰"], | |
"kn": ["೦", "೧", "೨", "೩", "೪", "೫", "೬", "೭", "೮", "೯"], | |
"or": ["୦", "୧", "୨", "୩", "୪", "୫", "୬", "୭", "୮", "୯"], | |
"pa": ["੦", "੧", "੨", "੩", "੪", "੫", "੬", "੭", "੮", "੯"], | |
} | |
num_dict = dict() | |
num_dict["en"] = { | |
"0": "zero", | |
"1": "one", | |
"2": "two", | |
"3": "three", | |
"4": "four", | |
"5": "five", | |
"6": "six", | |
"7": "seven", | |
"8": "eight", | |
"9": "nine", | |
"10": "ten", | |
"11": "eleven", | |
"12": "twelve", | |
"13": "thirteen", | |
"14": "fourteen", | |
"15": "fifteen", | |
"16": "sixteen", | |
"17": "seventeen", | |
"18": "eighteen", | |
"19": "nineteen", | |
"20": "twenty", | |
"21": "twenty-one", | |
"22": "twenty-two", | |
"23": "twenty-three", | |
"24": "twenty-four", | |
"25": "twenty-five", | |
"26": "twenty-six", | |
"27": "twenty-seven", | |
"28": "twenty-eight", | |
"29": "twenty-nine", | |
"30": "thirty", | |
"31": "thirty-one", | |
"32": "thirty-two", | |
"33": "thirty-three", | |
"34": "thirty-four", | |
"35": "thirty-five", | |
"36": "thirty-six", | |
"37": "thirty-seven", | |
"38": "thirty-eight", | |
"39": "thirty-nine", | |
"40": "forty", | |
"41": "forty-one", | |
"42": "forty-two", | |
"43": "forty-three", | |
"44": "forty-four", | |
"45": "forty-five", | |
"46": "forty-six", | |
"47": "forty-seven", | |
"48": "forty-eight", | |
"49": "forty-nine", | |
"50": "fifty", | |
"51": "fifty-one", | |
"52": "fifty-two", | |
"53": "fifty-three", | |
"54": "fifty-four", | |
"55": "fifty-five", | |
"56": "fifty-six", | |
"57": "fifty-seven", | |
"58": "fifty-eight", | |
"59": "fifty-nine", | |
"60": "sixty", | |
"61": "sixty-one", | |
"62": "sixty-two", | |
"63": "sixty-three", | |
"64": "sixty-four", | |
"65": "sixty-five", | |
"66": "sixty-six", | |
"67": "sixty-seven", | |
"68": "sixty-eight", | |
"69": "sixty-nine", | |
"70": "seventy", | |
"71": "seventy-one", | |
"72": "seventy-two", | |
"73": "seventy-three", | |
"74": "seventy-four", | |
"75": "seventy-five", | |
"76": "seventy-six", | |
"77": "seventy-seven", | |
"78": "seventy-eight", | |
"79": "seventy-nine", | |
"80": "eighty", | |
"81": "eighty-one", | |
"82": "eighty-two", | |
"83": "eighty-three", | |
"84": "eighty-four", | |
"85": "eighty-five", | |
"86": "eighty-six", | |
"87": "eighty-seven", | |
"88": "eighty-eight", | |
"89": "eighty-nine", | |
"90": "ninety", | |
"91": "ninety-one", | |
"92": "ninety-two", | |
"93": "ninety-three", | |
"94": "ninety-four", | |
"95": "ninety-five", | |
"96": "ninety-six", | |
"97": "ninety-seven", | |
"98": "ninety-eight", | |
"99": "ninety-nine", | |
"100": "hundred", | |
"1000": "thousand", | |
"100000": "lac", | |
"10000000": "crore", | |
"1000000000": "arab", | |
} # English-India | |
num_dict["hi"] = { | |
"0": "शून्य", | |
"1": "एक", | |
"2": "दो", | |
"3": "तीन", | |
"4": "चार", | |
"5": "पाँच", | |
"6": "छः", | |
"7": "सात", | |
"8": "आठ", | |
"9": "नौ", | |
"10": "दस", | |
"11": "ग्यारह", | |
"12": "बारह", | |
"13": "तेरह", | |
"14": "चौदह", | |
"15": "पंद्रह", | |
"16": "सोलह", | |
"17": "सत्रह", | |
"18": "अट्ठारह", | |
"19": "उन्नीस", | |
"20": "बीस", | |
"21": "इक्कीस", | |
"22": "बाईस", | |
"23": "तेईस", | |
"24": "चौबिस", | |
"25": "पच्चीस", | |
"26": "छब्बीस", | |
"27": "सत्ताईस", | |
"28": "अट्ठाईस", | |
"29": "उनतीस", | |
"30": "तीस", | |
"31": "इकतीस", | |
"32": "बत्तीस", | |
"33": "तैंतीस", | |
"34": "चौंतीस", | |
"35": "पैंतीस", | |
"36": "छत्तीस", | |
"37": "सैंतीस", | |
"38": "अड़तीस", | |
"39": "उनतालीस", | |
"40": "चालीस", | |
"41": "इकतालीस", | |
"42": "बयालीस", | |
"43": "तैंतालीस", | |
"44": "चौंतालीस", | |
"45": "पैंतालीस", | |
"46": "छियालीस", | |
"47": "सैंतालीस", | |
"48": "अड़तालीस", | |
"49": "उनचास", | |
"50": "पचास", | |
"51": "इक्यावन", | |
"52": "बावन", | |
"53": "तिरेपन", | |
"54": "चौवन", | |
"55": "पचपन", | |
"56": "छप्पन", | |
"57": "सत्तावन", | |
"58": "अट्ठावन", | |
"59": "उनसठ", | |
"60": "साठ", | |
"61": "इकसठ", | |
"62": "बासठ", | |
"63": "तिरेसठ", | |
"64": "चौंसठ", | |
"65": "पैंसठ", | |
"66": "छयासठ", | |
"67": "सरसठ", | |
"68": "अड़सठ", | |
"69": "उनहत्तर", | |
"70": "सत्तर", | |
"71": "इकहत्तर", | |
"72": "बहत्तर", | |
"73": "तिहत्तर", | |
"74": "चौहत्तर", | |
"75": "पचहत्तर", | |
"76": "छिहत्तर", | |
"77": "सतहत्तर", | |
"78": "अठहत्तर", | |
"79": "उन्यासी", | |
"80": "अस्सी", | |
"81": "इक्यासी", | |
"82": "बयासी", | |
"83": "तिरासी", | |
"84": "चौरासी", | |
"85": "पचासी", | |
"86": "छियासी", | |
"87": "सत्तासी", | |
"88": "अठासी", | |
"89": "नवासी", | |
"90": "नब्बे", | |
"91": "इक्यानवे", | |
"92": "बानवे", | |
"93": "तिरानवे", | |
"94": "चौरानवे", | |
"95": "पचानवे", | |
"96": "छियानवे", | |
"97": "सत्तानवे", | |
"98": "अट्ठानवे", | |
"99": "निन्यानवे", | |
"100": "सौ", | |
"1000": "हज़ार", | |
"100000": "लाख", | |
"10000000": "करोड़", | |
"1000000000": "अरब", | |
} # Hindi | |
num_dict["gu"] = { | |
"0": "શૂન્ય", | |
"1": "એક", | |
"2": "બે", | |
"3": "ત્રણ", | |
"4": "ચાર", | |
"5": "પાંચ", | |
"6": "છ", | |
"7": "સાત", | |
"8": "આઠ", | |
"9": "નવ", | |
"10": "દસ", | |
"11": "અગિયાર", | |
"12": "બાર", | |
"13": "તેર", | |
"14": "ચૌદ", | |
"15": "પંદર", | |
"16": "સોળ", | |
"17": "સત્તર", | |
"18": "અઢાર", | |
"19": "ઓગણિસ", | |
"20": "વીસ", | |
"21": "એકવીસ", | |
"22": "બાવીસ", | |
"23": "તેવીસ", | |
"24": "ચોવીસ", | |
"25": "પચ્ચીસ", | |
"26": "છવીસ", | |
"27": "સત્તાવીસ", | |
"28": "અઠ્ઠાવીસ", | |
"29": "ઓગણત્રીસ", | |
"30": "ત્રીસ", | |
"31": "એકત્રીસ", | |
"32": "બત્રીસ", | |
"33": "તેત્રીસ", | |
"34": "ચોત્રીસ", | |
"35": "પાંત્રીસ", | |
"36": "છત્રીસ", | |
"37": "સડત્રીસ", | |
"38": "અડત્રીસ", | |
"39": "ઓગણચાલીસ", | |
"40": "ચાલીસ", | |
"41": "એકતાલીસ", | |
"42": "બેતાલીસ", | |
"43": "ત્રેતાલીસ", | |
"44": "ચુંમાલીસ", | |
"45": "પિસ્તાલીસ", | |
"46": "છેતાલીસ", | |
"47": "સુડતાલીસ", | |
"48": "અડતાલીસ", | |
"49": "ઓગણપચાસ", | |
"50": "પચાસ", | |
"51": "એકાવન", | |
"52": "બાવન", | |
"53": "ત્રેપન", | |
"54": "ચોપન", | |
"55": "પંચાવન", | |
"56": "છપ્પન", | |
"57": "સત્તાવન", | |
"58": "અઠ્ઠાવન", | |
"59": "ઓગણસાઠ", | |
"60": "સાઈઠ", | |
"61": "એકસઠ", | |
"62": "બાસઠ", | |
"63": "ત્રેસઠ", | |
"64": "ચોસઠ", | |
"65": "પાંસઠ", | |
"66": "છાસઠ", | |
"67": "સડસઠ", | |
"68": "અડસઠ", | |
"69": "અગણોસિત્તેર", | |
"70": "સિત્તેર", | |
"71": "એકોતેર", | |
"72": "બોતેર", | |
"73": "તોતેર", | |
"74": "ચુમોતેર", | |
"75": "પંચોતેર", | |
"76": "છોતેર", | |
"77": "સિત્યોતેર", | |
"78": "ઇઠ્યોતેર", | |
"79": "ઓગણાએંસી", | |
"80": "એંસી", | |
"81": "એક્યાસી", | |
"82": "બ્યાસી", | |
"83": "ત્યાસી", | |
"84": "ચોર્યાસી", | |
"85": "પંચાસી", | |
"86": "છ્યાસી", | |
"87": "સિત્યાસી", | |
"88": "ઈઠ્યાસી", | |
"89": "નેવ્યાસી", | |
"90": "નેવું", | |
"91": "એકાણું", | |
"92": "બાણું", | |
"93": "ત્રાણું", | |
"94": "ચોરાણું", | |
"95": "પંચાણું", | |
"96": "છન્નું", | |
"97": "સત્તાણું", | |
"98": "અઠ્ઠાણું", | |
"99": "નવ્વાણું", | |
"100": "સો", | |
"1000": "હજાર", | |
"100000": "લાખ", | |
"1000000": "દસ લાખ", | |
"10000000": "કરોડ઼", | |
} # Gujarati | |
num_dict["mr"] = { | |
"0": "शून्य", | |
"1": "एक", | |
"2": "दोन", | |
"3": "तीन", | |
"4": "चार", | |
"5": "पाच", | |
"6": "सहा", | |
"7": "सात", | |
"8": "आठ", | |
"9": "नऊ", | |
"10": "दहा", | |
"11": "अकरा", | |
"12": "बारा", | |
"13": "तेरा", | |
"14": "चौदा", | |
"15": "पंधरा", | |
"16": "सोळा", | |
"17": "सतरा", | |
"18": "अठरा", | |
"19": "एकोणीस", | |
"20": "वीस", | |
"21": "एकवीस", | |
"22": "बावीस", | |
"23": "तेवीस", | |
"24": "चोवीस", | |
"25": "पंचवीस", | |
"26": "सव्वीस", | |
"27": "सत्तावीस", | |
"28": "अठ्ठावीस", | |
"29": "एकोणतीस", | |
"30": "तीस", | |
"31": "एकतीस", | |
"32": "बत्तीस", | |
"33": "तेहेतीस", | |
"34": "चौतीस", | |
"35": "पस्तीस", | |
"36": "छत्तीस", | |
"37": "सदतीस", | |
"38": "अडतीस", | |
"39": "एकोणचाळीस", | |
"40": "चाळीस", | |
"41": "एक्केचाळीस", | |
"42": "बेचाळीस", | |
"43": "त्रेचाळीस", | |
"44": "चव्वेचाळीस", | |
"45": "पंचेचाळीस", | |
"46": "सेहेचाळीस", | |
"47": "सत्तेचाळीस", | |
"48": "अठ्ठेचाळीस", | |
"49": "एकोणपन्नास", | |
"50": "पन्नास", | |
"51": "एक्कावन्न", | |
"52": "बावन्न", | |
"53": "त्रेपन्न", | |
"54": "चोपन्न", | |
"55": "पंचावन्न", | |
"56": "छप्पन्न", | |
"57": "सत्तावन्न", | |
"58": "अठ्ठावन्न", | |
"59": "एकोणसाठ", | |
"60": "साठ", | |
"61": "एकसष्ठ", | |
"62": "बासष्ठ", | |
"63": "त्रेसष्ठ", | |
"64": "चौसष्ठ", | |
"65": "पासष्ठ", | |
"66": "सहासष्ठ", | |
"67": "सदुसष्ठ", | |
"68": "अडुसष्ठ", | |
"69": "एकोणसत्तर", | |
"70": "सत्तर", | |
"71": "एक्काहत्तर", | |
"72": "बाहत्तर", | |
"73": "त्र्याहत्तर", | |
"74": "चौर्याहत्तर", | |
"75": "पंच्याहत्तर", | |
"76": "शहात्तर", | |
"77": "सत्याहत्तर", | |
"78": "अठ्ठ्याहत्तर", | |
"79": "एकोण ऐंशी", | |
"80": "ऐंशी", | |
"81": "एक्क्याऐंशी", | |
"82": "ब्याऐंशी", | |
"83": "त्र्याऐंशी", | |
"84": "चौऱ्याऐंशी", | |
"85": "पंच्याऐंशी", | |
"86": "शहाऐंशी", | |
"87": "सत्त्याऐंशी", | |
"88": "अठ्ठ्याऐंशी", | |
"89": "एकोणनव्वद", | |
"90": "नव्वद", | |
"91": "एक्क्याण्णव", | |
"92": "ब्याण्णव", | |
"93": "त्र्याण्णव", | |
"94": "चौऱ्याण्णव", | |
"95": "पंच्याण्णव", | |
"96": "शहाण्णव", | |
"97": "सत्त्याण्णव", | |
"98": "अठ्ठ्याण्णव", | |
"99": "नव्व्याण्णव", | |
"100": "शे", | |
"1000": "हजार", | |
"100000": "लाख", | |
"10000000": "कोटी", | |
"1000000000": "अब्ज", | |
} # Marathi | |
num_dict["bn"] = { | |
"0": "শূন্য", | |
"1": "এক", | |
"2": "দুই", | |
"3": "তিন", | |
"4": "চার", | |
"5": "পাঁচ", | |
"6": "ছয়", | |
"7": "সাত", | |
"8": "আট", | |
"9": "নয়", | |
"10": "দশ", | |
"11": "এগার", | |
"12": "বার", | |
"13": "তের", | |
"14": "চৌদ্দ", | |
"15": "পনের", | |
"16": "ষোল", | |
"17": "সতের", | |
"18": "আঠার", | |
"19": "ঊনিশ", | |
"20": "বিশ", | |
"21": "একুশ", | |
"22": "বাইশ", | |
"23": "তেইশ", | |
"24": "চব্বিশ", | |
"25": "পঁচিশ", | |
"26": "ছাব্বিশ", | |
"27": "সাতাশ", | |
"28": "আঠাশ", | |
"29": "ঊনত্রিশ", | |
"30": "ত্রিশ", | |
"31": "একত্রিশ", | |
"32": "বত্রিশ", | |
"33": "তেত্রিশ", | |
"34": "চৌত্রিশ", | |
"35": "পঁয়ত্রিশ", | |
"36": "ছত্রিশ", | |
"37": "সাঁইত্রিশ", | |
"38": "আটত্রিশ", | |
"39": "ঊনচল্লিশ", | |
"40": "চল্লিশ", | |
"41": "একচল্লিশ", | |
"42": "বিয়াল্লিশ", | |
"43": "তেতাল্লিশ", | |
"44": "চুয়াল্লিশ", | |
"45": "পঁয়তাল্লিশ", | |
"46": "ছেচল্লিশ", | |
"47": "সাতচল্লিশ", | |
"48": "আটচল্লিশ", | |
"49": "ঊনপঞ্চাশ", | |
"50": "পঞ্চাশ", | |
"51": "একান্ন", | |
"52": "বায়ান্ন", | |
"53": "তিপ্পান্ন", | |
"54": "চুয়ান্ন", | |
"55": "পঞ্চান্ন", | |
"56": "ছাপ্পান্ন", | |
"57": "সাতান্ন", | |
"58": "আটান্ন", | |
"59": "ঊনষাট", | |
"60": "ষাট", | |
"61": "একষট্টি", | |
"62": "বাষট্টি", | |
"63": "তেষট্টি", | |
"64": "চৌষট্টি", | |
"65": "পঁয়ষট্টি", | |
"66": "ছেষট্টি", | |
"67": "সাতষট্টি", | |
"68": "আটষট্টি", | |
"69": "ঊনসত্তর", | |
"70": "সত্তর", | |
"71": "একাত্তর", | |
"72": "বাহাত্তর", | |
"73": "তিয়াত্তর", | |
"74": "চুয়াত্তর", | |
"75": "পঁচাত্তর", | |
"76": "ছিয়াত্তর", | |
"77": "সাতাত্তর", | |
"78": "আটাত্তর", | |
"79": "ঊনআশি", | |
"80": "আশি", | |
"81": "একাশি", | |
"82": "বিরাশি", | |
"83": "তিরাশি", | |
"84": "চুরাশি", | |
"85": "পঁচাশি", | |
"86": "ছিয়াশি", | |
"87": "সাতাশি", | |
"88": "আটাশি", | |
"89": "ঊননব্বই", | |
"90": "নব্বই", | |
"91": "একানব্বই", | |
"92": "বিরানব্বই", | |
"93": "তিরানব্বই", | |
"94": "চুরানব্বই", | |
"95": "পঁচানব্বই", | |
"96": "ছিয়ানব্বই", | |
"97": "সাতানব্বই", | |
"98": "আটানব্বই", | |
"99": "নিরানব্বই", | |
"100": "শো", | |
"1000": "হাজার", | |
"100000": "লাখ", | |
"10000000": "কোটি", | |
"1000000000": "একশ’ কোটি", | |
} # Bengali | |
num_dict["te"] = { | |
"0": "సున్నా", | |
"1": "ఒకటి", | |
"2": "రెండు", | |
"3": "మూడు", | |
"4": "నాలుగు", | |
"5": "ఐదు", | |
"6": "ఆరు", | |
"7": "ఏడు", | |
"8": "ఎనిమిది", | |
"9": "తొమ్మిది", | |
"10": "పది", | |
"11": "పదకొండు", | |
"12": "పన్నెండు", | |
"13": "పదమూడు", | |
"14": "పద్నాలుగు", | |
"15": "పదిహేను", | |
"16": "పదహారు", | |
"17": "పదిహేడు", | |
"18": "పద్దెనిమిది", | |
"19": "పందొమ్మిది", | |
"20": "ఇరవై", | |
"21": "ఇరవై ఒకటి", | |
"22": "ఇరవై రెండు", | |
"23": "ఇరవై మూడు", | |
"24": "ఇరవై నాలుగు", | |
"25": "ఇరవై ఐదు", | |
"26": "ఇరవై ఆరు", | |
"27": "ఇరవై ఏడు", | |
"28": "ఇరవై ఎనిమిది", | |
"29": "ఇరవై తొమ్మిది", | |
"30": "ముప్పై", | |
"31": "ముప్పై ఒకటి", | |
"32": "ముప్పై రెండు", | |
"33": "ముప్పై మూడు", | |
"34": "ముప్పై నాలుగు", | |
"35": "ముప్పై ఐదు", | |
"36": "ముప్పై ఆరు", | |
"37": "ముప్పై ఏడు", | |
"38": "ముప్పై ఎనిమిది", | |
"39": "ముప్పై తొమ్మిది", | |
"40": "నలభై", | |
"41": "నలభై ఒకటి", | |
"42": "నలభై రెండు", | |
"43": "నలభై మూడు", | |
"44": "నలభై నాలుగు", | |
"45": "నలభై ఐదు", | |
"46": "నలభై ఆరు", | |
"47": "నలభై ఏడు", | |
"48": "నలభై ఎనిమిది", | |
"49": "నలభై తొమ్మిది", | |
"50": "యాభై", | |
"51": "యాభై ఒకటి", | |
"52": "యాభై రెండు", | |
"53": "యాభై మూడు", | |
"54": "యాభై నాలుగు", | |
"55": "యాభై ఐదు", | |
"56": "యాభై ఆరు", | |
"57": "యాభై ఏడు", | |
"58": "యాభై ఎనిమిది", | |
"59": "యాభై తొమ్మిది", | |
"60": "అరవై", | |
"61": "అరవై ఒకటి", | |
"62": "అరవై రెండు", | |
"63": "అరవై మూడు", | |
"64": "అరవై నాలుగు", | |
"65": "అరవై ఐదు", | |
"66": "అరవై ఆరు", | |
"67": "అరవై ఏడు", | |
"68": "అరవై ఎనిమిది", | |
"69": "అరవై తొమ్మిది", | |
"70": "డెబ్బై", | |
"71": "డెబ్బై ఒకటి", | |
"72": "డెబ్బై రెండు", | |
"73": "డెబ్బై మూడు", | |
"74": "డెబ్బై నాలుగు", | |
"75": "డెబ్బై ఐదు", | |
"76": "డెబ్బై ఆరు", | |
"77": "డెబ్బై ఏడు", | |
"78": "డెబ్బై ఎనిమిది", | |
"79": "డెబ్బై తొమ్మిది", | |
"80": "ఎనభై", | |
"81": "ఎనభై ఒకటి", | |
"82": "ఎనభై రెండు", | |
"83": "ఎనభై మూడు", | |
"84": "ఎనభై నాలుగు", | |
"85": "ఎనభై ఐదు", | |
"86": "ఎనభై ఆరు", | |
"87": "ఎనభై ఏడు", | |
"88": "ఎనభై ఎనిమిది", | |
"89": "ఎనభై తొమ్మిది", | |
"90": "తొంభై", | |
"91": "తొంభై ఒకటి", | |
"92": "తొంభై రెండు", | |
"93": "తొంభై మూడు", | |
"94": "తొంభై నాలుగు", | |
"95": "తొంభై ఐదు", | |
"96": "తొంభై ఆరు", | |
"97": "తొంభై ఏడు", | |
"98": "తొంభై ఎనిమిది", | |
"99": "తొంభై తొమ్మిది", | |
"100": "వందల", | |
"1000": "వేల", | |
"100000": "లక్షల", | |
"10000000": "కోట్ల", | |
"1000000000": "బిలియన్", | |
} # Telugu | |
num_dict["ta"] = { | |
"0": "பூஜ்ஜியம்", | |
"1": "ஒன்று", | |
"2": "இரண்டு", | |
"3": "மூன்று", | |
"4": "நான்கு", | |
"5": "ஐந்து", | |
"6": "ஆறு", | |
"7": "ஏழு", | |
"8": "எட்டு", | |
"9": "ஒன்பது", | |
"10": "பத்து", | |
"11": "பதினொன்று", | |
"12": "பன்னிரண்டு", | |
"13": "பதிமூன்று", | |
"14": "பதினான்கு", | |
"15": "பதினைந்து", | |
"16": "பதினாறு", | |
"17": "பதினேழு", | |
"18": "பதினெட்டு", | |
"19": "பத்தொன்பது", | |
"20": "இருபது", | |
"21": "இருபது ஒன்று", | |
"22": "இருபத்து இரண்டு", | |
"23": "இருபத்து மூன்று", | |
"24": "இருபத்து நான்கு", | |
"25": "இருபத்து ஐந்து", | |
"26": "இருபத்து ஆறு", | |
"27": "இருபத்து ஏழு", | |
"28": "இருபத்து எட்டு", | |
"29": "இருபத்து ஒன்பது", | |
"30": "முப்பது", | |
"31": "முப்பத்து ஒன்று", | |
"32": "முப்பத்து இரண்டு", | |
"33": "முப்பத்து மூன்று", | |
"34": "முப்பத்து நான்கு", | |
"35": "முப்பத்து ஐந்து", | |
"36": "முப்பத்து ஆறு", | |
"37": "முப்பத்து ஏழு", | |
"38": "முப்பத்து எட்டு", | |
"39": "முப்பத்து ஒன்பது", | |
"40": "நாற்பது", | |
"41": "நாற்பத்து ஒன்று", | |
"42": "நாற்பத்து இரண்டு", | |
"43": "நாற்பத்து மூன்று", | |
"44": "நாற்பத்து நான்கு", | |
"45": "நாற்பத்து ஐந்து", | |
"46": "நாற்பத்து ஆறு", | |
"47": " நாற்பத்து ஏழு", | |
"48": "நாற்பத்து எட்டு", | |
"49": "நாற்பத்து ஒன்பது", | |
"50": "ஐம்பது", | |
"51": "ஐம்பத்து ஒன்று", | |
"52": "ஐம்பத்து இரண்டு", | |
"53": "ஐம்பத்து மூன்று", | |
"54": "ஐம்பத்து நான்கு", | |
"55": "ஐம்பத்து ஐந்து", | |
"56": "ஐம்பத்து ஆறு", | |
"57": "ஐம்பத்து ஏழு", | |
"58": "ஐம்பத்து எட்டு", | |
"59": "ஐம்பத்து ஒன்பது", | |
"60": "அறுபது", | |
"61": "அறுபத்து ஒன்று", | |
"62": "அறுபத்து இரண்டு", | |
"63": "அறுபத்து மூன்று", | |
"64": "அறுபத்து நான்கு", | |
"65": "அறுபத்து ஐந்து", | |
"66": "அறுபத்து ஆறு", | |
"67": "அறுபத்து ஏழு", | |
"68": "அறுபத்து எட்டு", | |
"69": "அறுபத்து ஒன்பது", | |
"70": "எழுபது", | |
"71": "எழுபத்தி ஒன்று", | |
"72": "எழுபத்தி இரண்டு", | |
"73": "எழுபத்தி முச்சக்கர", | |
"74": "எழுபத்தி நான்கு", | |
"75": "எழுபத்தி ஐந்து", | |
"76": "எழுபத்தி ஆறு", | |
"77": "எழுபத்தி ஏழு", | |
"78": "எழுபத்தி எட்டு", | |
"79": "எழுபத்தி ஒன்பது", | |
"80": "எண்பது", | |
"81": "எண்பத்தியொன்று", | |
"82": "எண்பத்திரண்டு", | |
"83": "எண்பத்திமூன்று", | |
"84": "என்பதினான்கு", | |
"85": "என்பதினைந்து", | |
"86": "எண்பத்திஆறு", | |
"87": "எண்பத்திஏழு", | |
"88": "எண்பத்தியெட்டு", | |
"89": "எண்பத்தியொன்பது", | |
"90": "தொன்னூறு", | |
"91": "தொண்ணூற்றியொன்று", | |
"92": "தொண்ணூற்றிரண்டு", | |
"93": "தொண்ணூற்றிமூன்று", | |
"94": "தொண்ணூற்றிநான்கு", | |
"95": "தொண்ணூற்றிஐந்து", | |
"96": "தொண்ணூற்றியாறு", | |
"97": "தொண்ணூற்றியேழு", | |
"98": "தொண்ணூற்றியெட்டு", | |
"99": "தொண்ணூற்றிஒன்பது", | |
"100": "நூறு", | |
"1000": "ஆயிரம்", | |
"100000": "இலட்சம்", | |
"10000000": "கோடி", | |
"1000000000": "பில்லியன்", | |
} # Tamil | |
num_dict["kn"] = { | |
"0": "ಸೊನ್ನೆ", | |
"1": "ಒಂದು", | |
"2": "ಎರಡು", | |
"3": "ಮೂರು", | |
"4": "ನಾಲ್ಕು", | |
"5": "ಅಯ್ದು", | |
"6": "ಆರು", | |
"7": "ಏಳು", | |
"8": "ಎಂಟು", | |
"9": "ಒಂಬತ್ತು", | |
"10": "ಹತ್ತು", | |
"11": "ಹನ್ನೊಂದು", | |
"12": "ಹನ್ನೆರಡು", | |
"13": "ಹದಿಮೂರು", | |
"14": "ಹದಿನಾಲ್ಕು", | |
"15": "ಹದಿನೈದು", | |
"16": "ಹದಿನಾರು", | |
"17": "ಹದಿನೇಳು", | |
"18": "ಹದಿನೆಂಟು", | |
"19": "ಹತ್ತೊಂಬತ್ತು", | |
"20": "ಇಪ್ಪತ್ತು", | |
"21": "ಇಪ್ಪತ್ತ್’ಒಂದು", | |
"22": "ಇಪ್ಪತ್ತ್’ಎರಡು", | |
"23": "ಇಪ್ಪತ್ತ್’ಮೂರು", | |
"24": "ಇಪ್ಪತ್ತ್’ನಾಲ್ಕು", | |
"25": "ಇಪ್ಪತ್ತ್’ಐದು", | |
"26": "ಇಪ್ಪತ್ತ್’ಆರು", | |
"27": "ಇಪ್ಪತ್ತ್’ಏಳು", | |
"28": "ಇಪ್ಪತ್ತ್’ಎಂಟು", | |
"29": "ಇಪ್ಪತ್ತ್’ಒಂಬತ್ತು", | |
"30": "ಮೂವತ್ತು", | |
"31": "ಮುವತ್ತ್’ಒಂದು", | |
"32": "ಮುವತ್ತ್’ಎರಡು", | |
"33": "ಮುವತ್ತ್’ಮೂರು", | |
"34": "ಮೂವತ್ತ್’ನಾಲ್ಕು", | |
"35": "ಮೂವತ್ತ್’ಐದು", | |
"36": "ಮೂವತ್ತ್’ಆರು", | |
"37": "ಮೂವತ್ತ್’ಏಳು", | |
"38": "ಮೂವತ್ತ್’ಎಂಟು", | |
"39": "ಮೂವತ್ತ್’ಒಂಬತ್ತು", | |
"40": "ನಲವತ್ತು", | |
"41": "ನಲವತ್ತೊಂದು", | |
"42": "ನಲವತ್ತ್ ಎರಡು", | |
"43": "ನಲವತ್ತ್ ಮೂರು", | |
"44": "ನಲವತ್ತ್ ನಾಲ್ಕು", | |
"45": "ನಲವತ್ತೈದು", | |
"46": "ನಲವತ್ತಾರು", | |
"47": "ನಲವತ್ತೇಳು", | |
"48": "ನಲವತ್ತೆಂಟು", | |
"49": "ನಲವತ್ತೊಂಬತ್ತು", | |
"50": "ಐವತ್ತು", | |
"51": "ಐವತ್ತೊಂದು", | |
"52": "ಐವತ್ತೆರಡು", | |
"53": "ಐವತ್ತಮೂರು", | |
"54": "ಐವತ್ತ್ನಾಲ್ಕು", | |
"55": "ಐವತ್ತೈದು", | |
"56": "ಐವತ್ತಾರು", | |
"57": "ಐವತ್ತೇಳು", | |
"58": "ಐವತ್ತೆಂಟು", | |
"59": "ಐವತ್ತೊಂಬತ್ತು", | |
"60": "ಅರವತ್ತು", | |
"61": "ಅರವತ್ತೊಂದು", | |
"62": "ಅರವತ್ತೆರಡು", | |
"63": "ಅರವತ್ತ್ ಮೂರು", | |
"64": "ಅರವತ್ತ್ ನಾಲ್ಕು", | |
"65": "ಅರವತ್ತೈದು", | |
"66": "ಅರವತ್ತಾರು", | |
"67": "ಅರವತ್ತೇಳು", | |
"68": "ಅರವತ್ತೆಂಟು", | |
"69": "ಅರವತ್ತೊಂಬತ್ತು", | |
"70": "ಎಪ್ಪತ್ತು", | |
"71": "ಎಪ್ಪತ್ತೊಂದು", | |
"72": "ಎಪ್ಪತ್ತೆರಡು", | |
"73": "ಎಪ್ಪತ್ತ್ ಮೂರು", | |
"74": "ಎಪ್ಪತ್ತ್ ನಾಲ್ಕು", | |
"75": "ಎಪ್ಪತ್ತೈದು", | |
"76": "ಎಪ್ಪತ್ತಾರು", | |
"77": "ಎಪ್ಪತ್ತೇಳು", | |
"78": "ಎಪ್ಪತ್ತೆಂಟು", | |
"79": "ಎಪ್ಪತ್ತೊಂಬತ್ತು", | |
"80": "ಎಂಬತ್ತು", | |
"81": "ಎಂಬತ್ತೊಂದು", | |
"82": "ಎಂಬತ್ತೆರಡು", | |
"83": "ಎಂಬತ್ತ್ ಮೂರು", | |
"84": "ಎಂಬತ್ತ್ ನಾಲ್ಕು", | |
"85": "ಎಂಬತ್ತೈದು", | |
"86": "ಎಂಬತ್ತಾರು", | |
"87": "ಎಂಬತ್ತೇಳು", | |
"88": "ಎಂಬತ್ತೆಂಟು", | |
"89": "ಎಂಬತ್ತೊಂಬತ್ತು", | |
"90": "ತೊಂಬತ್ತು", | |
"91": "ತೊಂಬತ್ತೊಂದು", | |
"92": "ತೊಂಬತ್ತೆರಡು", | |
"93": "ತೊಂಬತ್ತ ಮೂರು", | |
"94": "ತೊಂಬತ್ತ ನಾಲ್ಕು", | |
"95": "ತೊಂಬತ್ತೈದು", | |
"96": "ತೊಂಬತ್ತಾರು", | |
"97": "ತೊಂಬತ್ತೇಳು", | |
"98": "ತೊಂಬತ್ತೆಂಟು", | |
"99": "ತೊಂಬತ್ತೊಂಬತ್ತು", | |
"100": "ನೂರ", | |
"1000": "ಸಾವಿರದ", | |
"100000": "ಲಕ್ಷದ", | |
"10000000": "ಕೋಟಿ", | |
"1000000000": "ಶತಕೋಟಿ", | |
} # Kannada | |
num_dict["or"] = { | |
"0": "ଶୁନ୍ୟ", | |
"1": "ଏକ", | |
"2": "ଦୁଇ", | |
"3": "ତିନି", | |
"4": "ଚାରି", | |
"5": "ପାଞ୍ଚ", | |
"6": "ଛଅ", | |
"7": "ସାତ", | |
"8": "ଆଠ", | |
"9": "ନଅ", | |
"10": "ନଅ", | |
"11": "ଏଗାର", | |
"12": "ବାର", | |
"13": "ତେର", | |
"14": "ଚଉଦ", | |
"15": "ପନ୍ଦର", | |
"16": "ଷୋହଳ", | |
"17": "ସତର", | |
"18": "ଅଠର", | |
"19": "ଊଣାଇଶ", | |
"20": "କୋଡିଏ", | |
"21": "ଏକୋଇଶି", | |
"22": "ବାଇଶି", | |
"23": "ତେଇଶି", | |
"24": "ଚବିଶି", | |
"25": "ପଚିଶି", | |
"26": "ଛବିଶି", | |
"27": "ସତାଇଶି", | |
"28": "ଅଠାଇଶି", | |
"29": "ଅଣତିରିଶି", | |
"30": "ତିରିଶି", | |
"31": "ଏକତିରିଶି", | |
"32": "ବତିଶି", | |
"33": "ତେତିଶି", | |
"34": "ଚଉତିରିଶି", | |
"35": "ପଞ୍ଚତିରିଶି", | |
"36": "ଛତିଶି", | |
"37": "ସଂଇତିରିଶି", | |
"38": "ଅଠତିରିଶି", | |
"39": "ଅଣଚାଳିଶି", | |
"40": "ଚାଳିଶି", | |
"41": "ଏକଚାଳିଶି", | |
"42": "ବୟାଳିଶି", | |
"43": "ତେୟାଳିଶି", | |
"44": "ଚଉରାଳିଶି", | |
"45": "ପଞ୍ଚଚାଳିଶି", | |
"46": "ଛୟାଳିଶି", | |
"47": "ସତଚାଳିଶି", | |
"48": "ଅଠଚାଳିଶି", | |
"49": "ଅଣଚାଶ", | |
"50": "ପଚାଶ", | |
"51": "ଏକାବନ", | |
"52": "ବାଉନ", | |
"53": "ତେପନ", | |
"54": "ଚଉବନ", | |
"55": "ପଞ୍ଚାବନ", | |
"56": "ଛପନ", | |
"57": "ସତାବନ", | |
"58": "ଅଠାବନ", | |
"59": "ଅଣଷଠି", | |
"60": "ଷାଠିଏ", | |
"61": "ଏକଷଠି", | |
"62": "ବାଷଠି", | |
"63": "ତେଷଠି", | |
"64": "ଚଉଷଠି", | |
"65": "ପଞ୍ଚଷଠି", | |
"66": "ଛଅଷଠି", | |
"67": "ସତଷଠି", | |
"68": "ଅଠଷଠି", | |
"69": "ଅଣସ୍ତରୀ", | |
"70": "ସତୂରୀ", | |
"71": "ଏକସ୍ତରୀ", | |
"72": "ବାସ୍ତରୀ", | |
"73": "ତେସ୍ତରୀ", | |
"74": "ଚଉସ୍ତରୀ", | |
"75": "ପଞ୍ଚସ୍ତରୀ", | |
"76": "ଛଅସ୍ତରୀ", | |
"77": "ସତସ୍ତରୀ", | |
"78": "ଅଠସ୍ତରୀ", | |
"79": "ଅଣାଅଶୀ", | |
"80": "ଅଶୀ", | |
"81": "ଏକାଅଶୀ", | |
"82": "ବୟାଅଶୀ", | |
"83": "ତେୟାଅଶୀ", | |
"84": "ଚଉରାଅଶୀ", | |
"85": "ପଞ୍ଚାଅଶୀ", | |
"86": "ଛୟାଅଶୀ", | |
"87": "ସତାଅଶୀ", | |
"88": "ଅଠାଅଶୀ", | |
"89": "ଅଣାନବେ", | |
"90": "ନବେ", | |
"91": "ଏକାନବେ", | |
"92": "ବୟାନବେ", | |
"93": "ତେୟାନବେ", | |
"94": "ଚଉରାନବେ", | |
"95": "ପଞ୍ଚାନବେ", | |
"96": "ଛୟାନବେ", | |
"97": "ସତାନବେ", | |
"98": "ଅଠାନବେ", | |
"99": "ଅନେଶତ", | |
"100": "ଶହେ", | |
"1000": "ହଜାର", | |
"100000": "ଲକ୍ଷ", | |
"10000000": "କୋଟି", | |
"1000000000": "କୋଟି", | |
} # Oriya | |
num_dict["pa"] = { | |
"0": "ਸਿਫਰ ", | |
"1": "ਇੱਕ", | |
"2": "ਦੋ", | |
"3": "ਤਿੰਨ", | |
"4": "ਚਾਰ", | |
"5": "ਪੰਜ", | |
"6": "ਛੇ", | |
"7": "ਸੱਤ", | |
"8": "ਅੱਠ", | |
"9": "ਨੌਂ", | |
"10": "ਦੱਸ", | |
"11": "ਗਿਆਰਾਂ", | |
"12": "ਬਾਰਾਂ", | |
"13": "ਤੇਰਾਂ", | |
"14": "ਚੌਦਾਂ", | |
"15": "ਪੰਦਰਾਂ", | |
"16": "ਸੋਲ਼ਾਂ", | |
"17": "ਸਤਾਰਾਂ", | |
"18": "ਅਠਾਰਾਂ", | |
"19": "ਉਨੀ", | |
"20": "ਵੀਹ", | |
"21": "ਇੱਕੀ", | |
"22": "ਬਾਈ", | |
"23": "ਤੇਈ", | |
"24": "ਚੌਵੀ", | |
"25": "ਪੰਝੀ", | |
"26": "ਛੱਬੀ", | |
"27": "ਸਤਾਈ", | |
"28": "ਅਠਾਈ", | |
"29": "ਉਨੱਤੀ", | |
"30": "ਤੀਹ", | |
"31": "ਇਕੱਤੀ", | |
"32": "ਬੱਤੀ", | |
"33": "ਤੇਤੀ", | |
"34": "ਚੌਂਤੀ", | |
"35": "ਪੈਂਤੀ", | |
"36": "ਛੱਤੀ", | |
"37": "ਸੈਂਤੀ", | |
"38": "ਅਠੱਤੀ", | |
"39": "ਉਨਤਾਲੀ", | |
"40": "ਚਾਲੀ", | |
"41": "ਇਕਤਾਲੀ", | |
"42": "ਬਤਾਲੀ", | |
"43": "ਤਰਤਾਲੀ", | |
"44": "ਚੌਤਾਲੀ", | |
"45": "ਪੰਜਤਾਲੀ", | |
"46": "ਛਿਆਲੀ", | |
"47": "ਸੰਤਾਲੀ", | |
"48": "ਅੱਠਤਾਲੀ", | |
"49": "ਉਣਿੰਜਾ", | |
"50": "ਪੰਜਾਹ", | |
"51": "ਇਕਵਿੰਜਾ", | |
"52": "ਬਵਿੰਜਾ", | |
"53": "ਤਰਵਿੰਜਾ", | |
"54": "ਚਰਿੰਜਾ", | |
"55": "ਪਚਵਿੰਜਾ", | |
"56": "ਛਪਿੰਜਾ", | |
"57": "ਸਤਵਿੰਜਾ", | |
"58": "ਅੱਠਵਿੰਜਾ", | |
"59": "ਉਣਾਠ", | |
"60": "ਸੱਠ", | |
"61": "ਇਕਾਠ", | |
"62": "ਬਾਠ੍ਹ", | |
"63": "ਤਰੇਠ੍ਹ", | |
"64": "ਚੌਠ੍ਹ", | |
"65": "ਪੈਂਠ", | |
"66": "ਛਿਆਠ", | |
"67": "ਸਤਾਹਠ", | |
"68": "ਅੱਠਾਠ", | |
"69": "ਉਣੱਤਰ", | |
"70": "ਸੱਤਰ", | |
"71": "ਇਕ੍ਹੱਤਰ", | |
"72": "ਬਹੱਤਰ", | |
"73": "ਤਹੱਤਰ", | |
"74": "ਚੌਹੱਤਰ", | |
"75": "ਪੰਜੱਤਰ", | |
"76": "ਛਿਹੱਤਰ", | |
"77": "ਸਤੱਤਰ", | |
"78": "ਅਠੱਤਰ", | |
"79": "ਉਣਾਸੀ", | |
"80": "ਅੱਸੀ", | |
"81": "ਇਕਾਸੀ", | |
"82": "ਬਿਆਸੀ", | |
"83": "ਤਰਾਸੀ", | |
"84": "ਚਰਾਸੀ", | |
"85": "ਪੰਜਾਸੀ", | |
"86": "ਛਿਆਸੀ", | |
"87": "ਸਤਾਸੀ", | |
"88": "ਅਠਾਸੀ", | |
"89": "ਉਣਾਨਵੇਂ", | |
"90": "ਨੱਬੇ", | |
"91": "ਇਕਾਨਵੇਂ", | |
"92": "ਬਿਆਨਵੇਂ", | |
"93": "ਤਰਾਨਵੇਂ", | |
"94": "ਚਰਾਨਵੇਂ", | |
"95": "ਪਚਾਨਵੇਂ", | |
"96": "ਛਿਆਨਵੇਂ", | |
"97": "ਸਤਾਨਵੇਂ", | |
"98": "ਅਠਾਨਵੇਂ", | |
"99": "ਨਿੜਾਨਵੇਂ", | |
"100": "ਸੌ", | |
"1000": "ਹਜਾਰ", | |
"100000": "ਲੱਖ", | |
"10000000": "ਕਰੋੜ", | |
"1000000000": "ਅਰਬ", | |
} # Punjabi | |
# --------------------------- num_to_word.py ------------------------------ | |
""" | |
Method to convert Numbers to Words | |
for indian languages | |
Use cases:- | |
1) Speech recognition pre-processing | |
2) Language modeling Data pre-processing | |
------------------------- | |
check indic_numbers.py to add support | |
for any indian language | |
""" | |
def language_specific_exception(words, lang, combiner): | |
""" | |
Language Specific Exception will come here | |
""" | |
def occurs_at_end(piece): | |
return words[-len(piece) :] == piece | |
if lang == "mr": | |
words = words.replace("एक" + combiner + "शे", "शंभर") | |
elif lang == "gu": | |
words = words.replace("બે" + combiner + "સો", "બસ્સો") | |
elif lang == "te": | |
exception_dict = { | |
"1": "ఒక", | |
"100": "వంద", | |
"100+": "వందలు", | |
"1000": "వెయ్యి", | |
"1000+": "వేలు", | |
"100000": "లక్ష", | |
"100000+": "లక్షలు", | |
"10000000": "కోటి", | |
"10000000+": "కోట్లు", | |
} | |
test_case = ["100", "1000", "100000", "10000000"] | |
for test in test_case: | |
test_word = num_dict["te"][test] | |
match = num_dict["te"]["1"] + combiner + test_word | |
# for numbers like : 100, 1000, 100000 | |
if words == match: | |
return exception_dict[test] | |
# for numbers like : 200, 4000, 800000 | |
elif occurs_at_end(test_word): | |
words = words.replace(test_word, exception_dict[test + "+"]) | |
# for numbers like : 105, 1076, 123993 | |
elif not occurs_at_end(match): | |
replacement = exception_dict["1"] + combiner + exception_dict[test] | |
words = words.replace(match, replacement) | |
# Exception case for 101...199 | |
special_case = "ఒక" + combiner + "వంద" | |
words = words.replace(special_case, "నూట") | |
elif lang == "kn": | |
# special case for 100 | |
if words == ("ಒಂದು" + combiner + "ನೂರ"): | |
return "ನೂರು" | |
exception_dict = { | |
"ನೂರ": "ನೂರು", | |
"ಸಾವಿರದ": "ಸಾವಿರ", | |
"ಲಕ್ಷದ": "ಲಕ್ಷ", | |
"ಕೋಟಿಯ": "ಕೋಟಿ", | |
} | |
for expt in exception_dict: | |
if occurs_at_end(expt): | |
words = words.replace(expt, exception_dict[expt]) | |
return words | |
def num_to_word(num, lang, separator=", ", combiner=" "): | |
""" | |
Main Method | |
:param num: Number digits from any indian language | |
:param lang: Language Code from supported Language | |
:param separator: Separator character i.e. separator = '-' --> 'two hundred-sixty' | |
:param combiner: combine number with position i.e. combiner = '-' --> 'two-hundred sixty' | |
:return: UTF-8 String of numbers in words | |
""" | |
lang = lang.lower() | |
num = str(num) | |
# Load dictionary according to language code | |
assert lang in supported_lang, "Language not supported" | |
num_dic = num_dict[lang] | |
# dash default combiner for english-india | |
if (lang == "en") & (combiner == " "): | |
combiner = "-" | |
# Remove punctuations from numbers | |
num = str(num).replace(",", "").replace(" ", "") | |
# return word as it is if not number | |
if not num.isdecimal(): | |
return num | |
# Replace native language numbers with english digits | |
for language in supported_lang: | |
for num_index in range(10): | |
num = num.replace(all_num[language][num_index], all_num["en"][num_index]) | |
# Assert that input contains only integer number | |
for digit in num: | |
assert digit in all_num["en"], "Give proper input" | |
# Process | |
# For Number longer than 9 digits | |
def all_two_digit(digits_2): | |
if len(digits_2) <= 1: # Provided only one/zero digit | |
return num_dic.get(digits_2, "") | |
elif digits_2 == "00": # Two Zero provided | |
return num_dic["0"] + separator + num_dic["0"] | |
elif digits_2[0] == "0": # First digit is zero | |
return num_dic["0"] + separator + num_dic[digits_2[1]] | |
else: # Both digit provided | |
return num_dic[digits_2] | |
# For Number less than 9 digits | |
def two_digit(digits_2): | |
digits_2 = digits_2.lstrip("0") | |
if len(digits_2) != 0: | |
return num_dic[digits_2] | |
else: | |
return "" | |
def all_digit(digits): | |
digits = digits.lstrip("0") | |
digit_len = len(digits) | |
if digit_len > 3: | |
num_of_digits_to_process = (digit_len % 2) + 1 | |
process_digits = digits[:num_of_digits_to_process] | |
base = str(10 ** (int(digit_len / 2) * 2 - 1)) | |
remain_digits = digits[num_of_digits_to_process:] | |
return ( | |
num_dic[process_digits] | |
+ combiner | |
+ num_dic[base] | |
+ separator | |
+ all_digit(remain_digits) | |
) | |
elif len(digits) == 3: | |
return ( | |
num_dic[digits[:1]] | |
+ combiner | |
+ num_dic["100"] | |
+ separator | |
+ two_digit(digits[1:]) | |
) | |
else: | |
return two_digit(digits) | |
num = num.lstrip("0") | |
full_digit_len = len(num) | |
if full_digit_len == 0: | |
output = num_dic["0"] | |
elif full_digit_len <= 9: | |
output = all_digit(num) | |
else: | |
iteration = round(full_digit_len / 2) | |
output = all_two_digit(num[:2]) # First to digit | |
for i in range(1, iteration): | |
output = ( | |
output + separator + all_two_digit(num[i * 2 : (i + 1) * 2]) | |
) # Next two digit pairs | |
remaining_digits = num[iteration * 2 :] | |
if not all_two_digit(remaining_digits) == "": | |
output = ( | |
output + separator + all_two_digit(remaining_digits) | |
) # remaining Last one/two digits | |
output = output.strip(separator) | |
output = language_specific_exception(output, lang, combiner) | |
return output | |
# --------------------------------- num_to_word_on_a_sent --------------------------------- | |
def is_digit(word, digit_pattern): | |
return re.search(digit_pattern, word) | |
def remove_punct(sent): | |
clean = re.sub("[%s]" % re.escape(string.punctuation), " ", sent) | |
return " ".join([word for word in clean.split() if word]) | |
def normalize_nums(text, lang): | |
""" | |
text: str (eg) | |
lang: lang code ['en', 'hi'] | |
returns: str | |
(eg) | |
""" | |
if lang in supported_lang: | |
text = text.replace('-',' - ') # space separate hyphen | |
words = text.split() | |
lang_digits = [str(i) for i in range(0, 10)] | |
digit_pattern = "[" + "".join(lang_digits) + "]" | |
num_indices = [ | |
ind for ind, word in enumerate(words) if is_digit(word, digit_pattern) | |
] | |
words_up = [ | |
num_to_word(word, lang, separator=" ", combiner=" ") | |
if ind in num_indices | |
else word | |
for ind, word in enumerate(words) | |
] | |
return " ".join(words_up) | |
else: | |
return text | |
if __name__ == "__main__": | |
print(normalize_nums("रीटा के पास 16 बिल्लियाँ हैं।", "hi")) | |