Create cardinal_numbers.py
Browse files- cardinal_numbers.py +690 -0
cardinal_numbers.py
ADDED
@@ -0,0 +1,690 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
"""From https://github.com/peresolb/number-conversion/"""
|
5 |
+
import sys
|
6 |
+
import os
|
7 |
+
import nltk
|
8 |
+
|
9 |
+
|
10 |
+
# Dict for basic primitive numbers: 1-10
|
11 |
+
b = {
|
12 |
+
1: "én",
|
13 |
+
2: "to",
|
14 |
+
3: "tre",
|
15 |
+
4: "fire",
|
16 |
+
5: "fem",
|
17 |
+
6: "seks",
|
18 |
+
7: "sju",
|
19 |
+
8: "åtte",
|
20 |
+
9: "ni",
|
21 |
+
10: "ti",
|
22 |
+
}
|
23 |
+
b_nn = {
|
24 |
+
1: "ein",
|
25 |
+
2: "to",
|
26 |
+
3: "tre",
|
27 |
+
4: "fire",
|
28 |
+
5: "fem",
|
29 |
+
6: "seks",
|
30 |
+
7: "sju",
|
31 |
+
8: "åtte",
|
32 |
+
9: "ni",
|
33 |
+
10: "ti",
|
34 |
+
}
|
35 |
+
|
36 |
+
# Dict for teen primitive numbers: 11-19
|
37 |
+
t = {
|
38 |
+
11: "elleve",
|
39 |
+
12: "tolv",
|
40 |
+
13: "tretten",
|
41 |
+
14: "fjorten",
|
42 |
+
15: "femten",
|
43 |
+
16: "seksten",
|
44 |
+
17: "sytten",
|
45 |
+
18: "atten",
|
46 |
+
19: "nitten",
|
47 |
+
}
|
48 |
+
|
49 |
+
|
50 |
+
# Dict for two digit primitive numbers: 20-90
|
51 |
+
do = {
|
52 |
+
20: "tjue",
|
53 |
+
30: "tretti",
|
54 |
+
40: "førti",
|
55 |
+
50: "femti",
|
56 |
+
60: "seksti",
|
57 |
+
70: "sytti",
|
58 |
+
80: "åtti",
|
59 |
+
90: "nitti",
|
60 |
+
}
|
61 |
+
|
62 |
+
|
63 |
+
# Dict for the 3 digit primitive number: 100
|
64 |
+
doo = {100: "hundre"}
|
65 |
+
|
66 |
+
|
67 |
+
# Dict for the 4 digit primitive number: 1000
|
68 |
+
dooo = {1000: "tusen"}
|
69 |
+
|
70 |
+
|
71 |
+
# Reverser function for primitive number dicts
|
72 |
+
def _revdict(numberdict):
|
73 |
+
newdict = {}
|
74 |
+
for k, v in numberdict.items():
|
75 |
+
newdict[v] = k
|
76 |
+
return newdict
|
77 |
+
|
78 |
+
|
79 |
+
# The reverse of the primitive number dicts, where strings are keys and integers are values,
|
80 |
+
# are name of original dict underscore 'r'
|
81 |
+
b_r, b_nn_r, t_r, do_r, doo_r, dooo_r = (
|
82 |
+
_revdict(b),
|
83 |
+
_revdict(b_nn),
|
84 |
+
_revdict(t),
|
85 |
+
_revdict(do),
|
86 |
+
_revdict(doo),
|
87 |
+
_revdict(dooo),
|
88 |
+
)
|
89 |
+
|
90 |
+
|
91 |
+
def _oneten(nr, reverse=False, nn=False):
|
92 |
+
"""Function taking an int from 1-10 and returning the corresponding word,
|
93 |
+
or, if reverse=True, taking a numberword from 1-10 and returning the digit"""
|
94 |
+
if reverse == False:
|
95 |
+
if not type(nr) is int:
|
96 |
+
return None
|
97 |
+
if nr <= 10:
|
98 |
+
if nn == False:
|
99 |
+
return b[nr]
|
100 |
+
else:
|
101 |
+
return b_nn[nr]
|
102 |
+
else:
|
103 |
+
if not type(nr) is str:
|
104 |
+
return None
|
105 |
+
if nn == False:
|
106 |
+
if nr in b_r.keys():
|
107 |
+
return b_r[nr]
|
108 |
+
else:
|
109 |
+
if nr in b_nn_r.keys():
|
110 |
+
return b_nn_r[nr]
|
111 |
+
|
112 |
+
|
113 |
+
def _onedig(nr, reverse=False, nn=False):
|
114 |
+
if reverse == False:
|
115 |
+
if not _oneten(nr) == "ti":
|
116 |
+
if nn == False:
|
117 |
+
return _oneten(nr)
|
118 |
+
else:
|
119 |
+
return _oneten(nr, nn=True)
|
120 |
+
if reverse == True:
|
121 |
+
if not _oneten(nr, reverse=True) == 10:
|
122 |
+
if nn == False:
|
123 |
+
return _oneten(nr, reverse=True)
|
124 |
+
else:
|
125 |
+
return _oneten(nr, reverse=True, nn=True)
|
126 |
+
|
127 |
+
|
128 |
+
def _teen(nr, reverse=False):
|
129 |
+
"""Function taking a primitive two-digit int in the teen range and returning the
|
130 |
+
corresponding word, or, if reverse=True, the corresponding number word"""
|
131 |
+
if reverse == False:
|
132 |
+
if not type(nr) is int:
|
133 |
+
return None
|
134 |
+
if nr in t.keys():
|
135 |
+
return t[nr]
|
136 |
+
else:
|
137 |
+
if not type(nr) is str:
|
138 |
+
return None
|
139 |
+
if nr in t_r.keys():
|
140 |
+
return t_r[nr]
|
141 |
+
|
142 |
+
|
143 |
+
def _twodig(nr, reverse=False):
|
144 |
+
"""Function taking a primitive two-digit int in the range 20-90 and returning
|
145 |
+
the corresponding word, or, if reverse=True, the corresponding number word"""
|
146 |
+
if reverse == False:
|
147 |
+
if not type(nr) is int:
|
148 |
+
return None
|
149 |
+
if nr in do.keys():
|
150 |
+
return do[nr]
|
151 |
+
else:
|
152 |
+
if not type(nr) is str:
|
153 |
+
return None
|
154 |
+
if nr in do_r.keys():
|
155 |
+
return do_r[nr]
|
156 |
+
|
157 |
+
|
158 |
+
def _numparser(numword, nn=False):
|
159 |
+
"""Parse word to see if they start with a wd in firstnumwords.
|
160 |
+
If yes, return firstnumword and second part"""
|
161 |
+
if not type(numword) is str:
|
162 |
+
return None
|
163 |
+
firstpart = ""
|
164 |
+
scndpart = ""
|
165 |
+
firstnumwords = list(do_r.keys())
|
166 |
+
for s in firstnumwords:
|
167 |
+
if numword.startswith(s):
|
168 |
+
slength = len(s)
|
169 |
+
firstpart = s
|
170 |
+
scndpart = numword[slength:]
|
171 |
+
if nn == False:
|
172 |
+
if (
|
173 |
+
scndpart in b_r.keys() and b_r[scndpart] < 10
|
174 |
+
): # Only return if second part is dig below 10
|
175 |
+
return (firstpart, scndpart)
|
176 |
+
else:
|
177 |
+
if (
|
178 |
+
scndpart in b_nn_r.keys() and b_nn_r[scndpart] < 10
|
179 |
+
): # Only return if second part is dig below 10
|
180 |
+
return (firstpart, scndpart)
|
181 |
+
|
182 |
+
|
183 |
+
def _one_to_nineteen(nr, reverse=False, nn=False):
|
184 |
+
"""Function taking a primitive two-digit int in the range 1-19
|
185 |
+
and returning the corresponding word, or, if reverse=True, the corresponding number word"""
|
186 |
+
if reverse == False:
|
187 |
+
if not type(nr) is int:
|
188 |
+
return None
|
189 |
+
if nr < 11:
|
190 |
+
if nn == False:
|
191 |
+
return _oneten(nr)
|
192 |
+
else:
|
193 |
+
return _oneten(nr, nn=True)
|
194 |
+
elif nr < 20:
|
195 |
+
return _teen(nr)
|
196 |
+
else:
|
197 |
+
if not type(nr) is str:
|
198 |
+
return None
|
199 |
+
if nn == False:
|
200 |
+
if type(_oneten(nr, reverse=True)) is int:
|
201 |
+
return _oneten(nr, reverse=True)
|
202 |
+
elif type(_teen(nr, reverse=True)):
|
203 |
+
return _teen(nr, reverse=True)
|
204 |
+
else:
|
205 |
+
if type(_oneten(nr, reverse=True, nn=True)) is int:
|
206 |
+
return _oneten(nr, reverse=True, nn=True)
|
207 |
+
elif type(_teen(nr, reverse=True)):
|
208 |
+
return _teen(nr, reverse=True)
|
209 |
+
|
210 |
+
|
211 |
+
def _one_to_nn(nr, reverse=False, nn=False):
|
212 |
+
"""Function taking an int in the range 1-99 and returning the corresponding word. Reverse as before"""
|
213 |
+
if reverse == False:
|
214 |
+
if not type(nr) is int:
|
215 |
+
return None
|
216 |
+
if nr > 0:
|
217 |
+
if nr < 20:
|
218 |
+
if nn == False:
|
219 |
+
return _one_to_nineteen(nr)
|
220 |
+
else:
|
221 |
+
return _one_to_nineteen(nr, nn=True)
|
222 |
+
elif nr < 100:
|
223 |
+
if nr in do.keys():
|
224 |
+
return _twodig(nr)
|
225 |
+
else:
|
226 |
+
nrstring = str(nr)
|
227 |
+
frstdig = int(nrstring[0]) * 10
|
228 |
+
scndig = int(nrstring[1])
|
229 |
+
frstwd = _twodig(frstdig)
|
230 |
+
if nn == False:
|
231 |
+
scnwd = _onedig(scndig)
|
232 |
+
else:
|
233 |
+
scnwd = _onedig(scndig, nn=True)
|
234 |
+
nrwd = frstwd + scnwd
|
235 |
+
return nrwd
|
236 |
+
else:
|
237 |
+
if not type(nr) is str:
|
238 |
+
return None
|
239 |
+
if nn == False:
|
240 |
+
if type(_one_to_nineteen(nr, reverse=True)) is int:
|
241 |
+
return _one_to_nineteen(nr, reverse=True)
|
242 |
+
elif type(_twodig(nr, reverse=True)) is int:
|
243 |
+
return _twodig(nr, reverse=True)
|
244 |
+
else:
|
245 |
+
if _numparser(nr) == None:
|
246 |
+
return None
|
247 |
+
parsed = _numparser(nr)
|
248 |
+
first = _twodig(parsed[0], reverse=True)
|
249 |
+
second = _one_to_nineteen(parsed[1], reverse=True)
|
250 |
+
return first + second
|
251 |
+
else:
|
252 |
+
if type(_one_to_nineteen(nr, reverse=True, nn=True)) is int:
|
253 |
+
return _one_to_nineteen(nr, reverse=True, nn=True)
|
254 |
+
elif type(_twodig(nr, reverse=True)) is int:
|
255 |
+
return _twodig(nr, reverse=True)
|
256 |
+
else:
|
257 |
+
if _numparser(nr, nn=True) == None:
|
258 |
+
return None
|
259 |
+
parsed = _numparser(nr, nn=True)
|
260 |
+
first = _twodig(parsed[0], reverse=True)
|
261 |
+
second = _one_to_nineteen(parsed[1], reverse=True, nn=True)
|
262 |
+
return first + second
|
263 |
+
|
264 |
+
|
265 |
+
def _one_to_nnn(nr, reverse=False, nn=False):
|
266 |
+
"""Function taking an int in the range 1-999 and returning the corresponding word. Reverse as before"""
|
267 |
+
if reverse == False:
|
268 |
+
if not type(nr) is int:
|
269 |
+
return None
|
270 |
+
if nr == 0:
|
271 |
+
return None
|
272 |
+
if nr < 100: # 1-99
|
273 |
+
if nn == False:
|
274 |
+
return _one_to_nn(nr)
|
275 |
+
else:
|
276 |
+
return _one_to_nn(nr, nn=True)
|
277 |
+
elif nr < 1000:
|
278 |
+
if nr == 100: # 100
|
279 |
+
return doo[100]
|
280 |
+
else:
|
281 |
+
nrstring = str(nr) # 435 181
|
282 |
+
frstdig = int(nrstring[0]) # 4 1
|
283 |
+
scndig = int(nrstring[1]) # 3 8
|
284 |
+
thrdig = int(nrstring[2]) # 5 1
|
285 |
+
scthdig = int(nrstring[1:]) # 35 81
|
286 |
+
if nn == False:
|
287 |
+
frstwd = _onedig(frstdig) # fire
|
288 |
+
else:
|
289 |
+
frstwd = _onedig(frstdig, nn=True)
|
290 |
+
nrwd = ""
|
291 |
+
if scndig == 0: # 405 or 400
|
292 |
+
if thrdig == 0: # 400
|
293 |
+
nrwd = "%s %s" % (frstwd, doo[100]) # fire hundre
|
294 |
+
else: # 405
|
295 |
+
if nn == False:
|
296 |
+
thrdwd = _one_to_nn(thrdig) # fem
|
297 |
+
else:
|
298 |
+
thrdwd = _one_to_nn(thrdig, nn=True)
|
299 |
+
if frstdig != 1:
|
300 |
+
nrwd = "%s %s og %s" % (
|
301 |
+
frstwd,
|
302 |
+
doo[100],
|
303 |
+
thrdwd,
|
304 |
+
) # fire hundre og fem
|
305 |
+
else:
|
306 |
+
nrwd = "%s og %s" % (doo[100], thrdwd) # hundre og fem
|
307 |
+
else: # 435
|
308 |
+
scthwd = ""
|
309 |
+
if nn == False:
|
310 |
+
scthwd = _one_to_nn(scthdig) # trettifem
|
311 |
+
else:
|
312 |
+
scthwd = _one_to_nn(scthdig, nn=True)
|
313 |
+
if frstdig != 1:
|
314 |
+
nrwd = "%s %s og %s" % (frstwd, doo[100], scthwd)
|
315 |
+
else:
|
316 |
+
nrwd = "%s og %s" % (doo[100], scthwd) # hundre og trettifem
|
317 |
+
return nrwd
|
318 |
+
else:
|
319 |
+
if not type(nr) is str:
|
320 |
+
return None
|
321 |
+
if type(doo_r.get(nr, None)) is int: # hundre - 100
|
322 |
+
return doo_r[nr]
|
323 |
+
elif (
|
324 |
+
len(nr.split(" ")) == 1
|
325 |
+
and type(_one_to_nn(nr, reverse=True)) is int
|
326 |
+
and nn == False
|
327 |
+
):
|
328 |
+
return _one_to_nn(nr, reverse=True) # 44
|
329 |
+
elif (
|
330 |
+
len(nr.split(" ")) == 1
|
331 |
+
and type(_one_to_nn(nr, reverse=True, nn=True)) is int
|
332 |
+
and nn == True
|
333 |
+
):
|
334 |
+
return _one_to_nn(nr, reverse=True, nn=True) # 44
|
335 |
+
elif len(nr.split(" ")) == 2: # to hundre
|
336 |
+
splitwords = nr.split(" ")
|
337 |
+
if nn == False and nr == "ett hundre":
|
338 |
+
return 100
|
339 |
+
elif nn == True and nr == "eitt hundre":
|
340 |
+
return 100
|
341 |
+
elif (
|
342 |
+
type(_one_to_nn(splitwords[0], reverse=True)) is int
|
343 |
+
and splitwords[1] == "hundre"
|
344 |
+
):
|
345 |
+
return _one_to_nn(splitwords[0], reverse=True) * 100
|
346 |
+
elif len(nr.split(" ")) == 3: # hundre og tre
|
347 |
+
splitwords = nr.split(" ")
|
348 |
+
if splitwords[0] == "hundre" and splitwords[1] == "og":
|
349 |
+
if nn == False:
|
350 |
+
if type(_one_to_nn(splitwords[2], reverse=True)) is int:
|
351 |
+
return 100 + _one_to_nn(splitwords[2], reverse=True)
|
352 |
+
else:
|
353 |
+
if type(_one_to_nn(splitwords[2], reverse=True, nn=True)) is int:
|
354 |
+
return 100 + _one_to_nn(splitwords[2], reverse=True, nn=True)
|
355 |
+
else:
|
356 |
+
return None
|
357 |
+
elif len(nr.split(" ")) == 4: # ett hundre og trettifire, fire hundre og åtte
|
358 |
+
splitwords = nr.split(" ")
|
359 |
+
if nn == False:
|
360 |
+
if (
|
361 |
+
splitwords[0] == "ett"
|
362 |
+
and splitwords[1] == "hundre"
|
363 |
+
and splitwords[2] == "og"
|
364 |
+
and type(_one_to_nn(splitwords[3], reverse=True)) is int
|
365 |
+
): # ett hundre og trettifire
|
366 |
+
return 100 + _one_to_nn(splitwords[3], reverse=True)
|
367 |
+
elif (
|
368 |
+
type(_one_to_nn(splitwords[0], reverse=True)) is int
|
369 |
+
and _one_to_nn(splitwords[0], reverse=True) < 10
|
370 |
+
and splitwords[1] == "hundre"
|
371 |
+
and splitwords[2] == "og"
|
372 |
+
and type(_one_to_nn(splitwords[3], reverse=True)) is int
|
373 |
+
): # fire hundre og trettifire
|
374 |
+
hundreds = _one_to_nn(splitwords[0], reverse=True) * 100
|
375 |
+
tens = _one_to_nn(splitwords[3], reverse=True)
|
376 |
+
return hundreds + tens
|
377 |
+
else:
|
378 |
+
return None
|
379 |
+
else:
|
380 |
+
if (
|
381 |
+
splitwords[0] == "eitt"
|
382 |
+
and splitwords[1] == "hundre"
|
383 |
+
and splitwords[2] == "og"
|
384 |
+
and type(_one_to_nn(splitwords[3], reverse=True, nn=True)) is int
|
385 |
+
): # eit hundre og trettifire
|
386 |
+
return 100 + _one_to_nn(splitwords[3], reverse=True, nn=True)
|
387 |
+
elif (
|
388 |
+
type(_one_to_nn(splitwords[0], reverse=True, nn=True)) is int
|
389 |
+
and _one_to_nn(splitwords[0], reverse=True, nn=True) < 10
|
390 |
+
and splitwords[1] == "hundre"
|
391 |
+
and splitwords[2] == "og"
|
392 |
+
and type(_one_to_nn(splitwords[3], reverse=True, nn=True)) is int
|
393 |
+
): # fire hundre og trettifire
|
394 |
+
hundreds = _one_to_nn(splitwords[0], reverse=True, nn=True) * 100
|
395 |
+
tens = _one_to_nn(splitwords[3], reverse=True, nn=True)
|
396 |
+
return hundreds + tens
|
397 |
+
else:
|
398 |
+
return None
|
399 |
+
|
400 |
+
|
401 |
+
def _high_hundred(nr, nn=False):
|
402 |
+
"""In Norwegian, as in English, it is possible to express the numbers 1100-1999 with hundreds,
|
403 |
+
e.g. "tolv hundre og nittiåtte", /twelve hundred and ninety-eight/. We want to be able to convert
|
404 |
+
these to integers. However, we don't need to produce them, so this algoritm only goes from strings to integers"""
|
405 |
+
if not type(nr) is str:
|
406 |
+
return None
|
407 |
+
if len(nr.split(" ")) > 1:
|
408 |
+
frstwd = nr.split(" ")[0]
|
409 |
+
if not type(_teen(frstwd, reverse=True)) is int:
|
410 |
+
return None
|
411 |
+
frstdig = _teen(frstwd, reverse=True)
|
412 |
+
if len(nr.split(" ")) == 2 and nr.split(" ")[1] == "hundre": # femten hundre
|
413 |
+
return frstdig * 100
|
414 |
+
elif (
|
415 |
+
len(nr.split(" ")) == 4
|
416 |
+
and nr.split(" ")[1] == "hundre"
|
417 |
+
and nr.split(" ")[2] == "og"
|
418 |
+
):
|
419 |
+
if (
|
420 |
+
nn == False and type(_one_to_nn(nr.split(" ")[3], reverse=True)) is int
|
421 |
+
): # femten hundre og førtito
|
422 |
+
lastdigs = _one_to_nn(nr.split(" ")[3], reverse=True)
|
423 |
+
return (frstdig * 100) + lastdigs
|
424 |
+
elif (
|
425 |
+
nn == True
|
426 |
+
and type(_one_to_nn(nr.split(" ")[3], reverse=True, nn=True)) is int
|
427 |
+
): # femten hundre og førtito
|
428 |
+
lastdigs = _one_to_nn(nr.split(" ")[3], reverse=True, nn=True)
|
429 |
+
return (frstdig * 100) + lastdigs
|
430 |
+
|
431 |
+
|
432 |
+
def _one_to_nnnnnn(nr, reverse=False, nn=False):
|
433 |
+
"""Function taking an int in the range 1-999999 and returning the corresponding word. Reverse as before"""
|
434 |
+
if reverse == False:
|
435 |
+
if not type(nr) is int:
|
436 |
+
return None
|
437 |
+
if nr == 0:
|
438 |
+
return None
|
439 |
+
if nr < 1000: # 1-999
|
440 |
+
if nn == False:
|
441 |
+
return _one_to_nnn(nr)
|
442 |
+
else:
|
443 |
+
return _one_to_nnn(nr, nn=True)
|
444 |
+
elif nr < 1000000: # 1000-999999
|
445 |
+
if nr == 1000: # 1000
|
446 |
+
if nn == False:
|
447 |
+
return "ett tusen"
|
448 |
+
else:
|
449 |
+
return "eitt tusen"
|
450 |
+
else:
|
451 |
+
nrstring = str(nr) # Starting with last three digits. e.g. 23[456]
|
452 |
+
ultdig = int(nrstring[-1]) # 6
|
453 |
+
penultdig = int(nrstring[-2]) # 5
|
454 |
+
antepenultdig = int(nrstring[-3]) # 4
|
455 |
+
ult_and_penultdig = int(nrstring[-2:]) # 56
|
456 |
+
ult_penult_antepenultdig = int(nrstring[-3:]) # 456
|
457 |
+
tailstring = ""
|
458 |
+
if antepenultdig == 0: # 012, 002, 000
|
459 |
+
if penultdig == 0: # 000, 002
|
460 |
+
if ultdig == 0: # 000
|
461 |
+
tailstring = "tusen"
|
462 |
+
else: # 002
|
463 |
+
if nn == False:
|
464 |
+
ultstring = _one_to_nnn(ultdig)
|
465 |
+
tailstring = "tusen og %s" % ultstring # tusen og to
|
466 |
+
else:
|
467 |
+
ultstring = _one_to_nnn(ultdig, nn=True)
|
468 |
+
tailstring = "tusen og %s" % ultstring # tusen og to
|
469 |
+
else: # 012
|
470 |
+
if nn == False:
|
471 |
+
ult_and_penultstring = _one_to_nnn(ult_and_penultdig)
|
472 |
+
tailstring = (
|
473 |
+
"tusen og %s" % ult_and_penultstring
|
474 |
+
) # tusen og tolv
|
475 |
+
else:
|
476 |
+
ult_and_penultstring = _one_to_nnn(
|
477 |
+
ult_and_penultdig, nn=True
|
478 |
+
)
|
479 |
+
tailstring = (
|
480 |
+
"tusen og %s" % ult_and_penultstring
|
481 |
+
) # tusen og tolv
|
482 |
+
else: # 456
|
483 |
+
if nn == False:
|
484 |
+
ult_penult_antepenultstring = _one_to_nnn(
|
485 |
+
ult_penult_antepenultdig
|
486 |
+
)
|
487 |
+
if str(ult_penult_antepenultdig)[0] == "1":
|
488 |
+
tailstring = (
|
489 |
+
"tusen ett %s" % ult_penult_antepenultstring
|
490 |
+
) # tusen ett hundre
|
491 |
+
else:
|
492 |
+
tailstring = (
|
493 |
+
"tusen %s" % ult_penult_antepenultstring
|
494 |
+
) # tusen fire hundre og femtiseks
|
495 |
+
else:
|
496 |
+
ult_penult_antepenultstring = _one_to_nnn(
|
497 |
+
ult_penult_antepenultdig, nn=True
|
498 |
+
)
|
499 |
+
if str(ult_penult_antepenultdig)[0] == "1":
|
500 |
+
tailstring = (
|
501 |
+
"tusen eitt %s" % ult_penult_antepenultstring
|
502 |
+
) # tusen ett hundre
|
503 |
+
else:
|
504 |
+
tailstring = (
|
505 |
+
"tusen %s" % ult_penult_antepenultstring
|
506 |
+
) # tusen fire hundre og femtiseks
|
507 |
+
startdigs = int(
|
508 |
+
nrstring[:-3]
|
509 |
+
) # startstring can consist of the 1, 2 or 3 first digits
|
510 |
+
startstring = ""
|
511 |
+
if startdigs == 1: # 1001 starts with "ett"
|
512 |
+
if nn == False:
|
513 |
+
startstring = "ett"
|
514 |
+
else:
|
515 |
+
startstring = "eitt"
|
516 |
+
elif startdigs > 99 and startdigs < 200: # 155555 starts with ett
|
517 |
+
if nn == False:
|
518 |
+
startnumstring = _one_to_nnn(startdigs)
|
519 |
+
startstring = "ett %s" % startnumstring
|
520 |
+
else:
|
521 |
+
startnumstring = _one_to_nnn(startdigs, nn=True)
|
522 |
+
startstring = "eitt %s" % startnumstring
|
523 |
+
else: # the remaining numbers are purely compositional
|
524 |
+
if nn == False:
|
525 |
+
startstring = _one_to_nnn(startdigs)
|
526 |
+
else:
|
527 |
+
startstring = _one_to_nnn(startdigs, nn=True)
|
528 |
+
numstring = "%s %s" % (startstring, tailstring)
|
529 |
+
return numstring
|
530 |
+
else:
|
531 |
+
if not type(nr) is str:
|
532 |
+
return None
|
533 |
+
if type(_one_to_nnn(nr, reverse=True)) is int and nn == False:
|
534 |
+
return _one_to_nnn(nr, reverse=True) # 444
|
535 |
+
elif type(_one_to_nnn(nr, reverse=True, nn=True)) is int and nn == True:
|
536 |
+
return _one_to_nnn(nr, reverse=True, nn=True) # 444
|
537 |
+
elif nr == "tusen": # tusen - 1000
|
538 |
+
return 1000
|
539 |
+
elif (
|
540 |
+
len(nr.split(" ")) > 1 and nr.split(" ")[-1] == "tusen"
|
541 |
+
): # ett tusen, ett hundre tusen etc.
|
542 |
+
wdlist = nr.split(" ")
|
543 |
+
firstphrase = " ".join(wdlist[:-1])
|
544 |
+
if type(_one_to_nnn(firstphrase, reverse=True)) is int and nn == False:
|
545 |
+
firstdig = _one_to_nnn(firstphrase, reverse=True)
|
546 |
+
return firstdig * 1000
|
547 |
+
elif (
|
548 |
+
type(_one_to_nnn(firstphrase, reverse=True, nn=True)) is int
|
549 |
+
and nn == True
|
550 |
+
):
|
551 |
+
firstdig = _one_to_nnn(firstphrase, reverse=True, nn=True)
|
552 |
+
return firstdig * 1000
|
553 |
+
elif (
|
554 |
+
len(nr.split(" ")) == 2 and nr.split(" ")[0] == "ett" and nn == False
|
555 |
+
): # ett tusen
|
556 |
+
return 1000
|
557 |
+
elif len(nr.split(" ")) == 2 and nr.split(" ")[0] == "eitt" and nn == True:
|
558 |
+
return 1000
|
559 |
+
else: # misspellings should not result in return value
|
560 |
+
return None
|
561 |
+
else:
|
562 |
+
if len(nr.split(" ")) > 1: # all other numbers should contain spaces
|
563 |
+
numwordlist = nr.split(" ")
|
564 |
+
if (
|
565 |
+
"tusen" in numwordlist
|
566 |
+
): # Find last part of numphrase, which starts with "tusen"
|
567 |
+
tusenind = numwordlist.index("tusen") # find index of "tusen"
|
568 |
+
lastwords = numwordlist[tusenind:] # words from 'tusen'
|
569 |
+
firstwords = numwordlist[:tusenind] # words until 'tusen'
|
570 |
+
lastdigs = 0
|
571 |
+
if len(lastwords) == 3:
|
572 |
+
if lastwords[1] == "og": # 'tusen og fire' 'tusen og førtifire'
|
573 |
+
lastword = lastwords[-1]
|
574 |
+
if nn == False:
|
575 |
+
lastdigs = _one_to_nnn(lastword, reverse=True)
|
576 |
+
elif nn == True:
|
577 |
+
lastdigs = _one_to_nnn(lastword, reverse=True, nn=True)
|
578 |
+
elif (
|
579 |
+
nn == False
|
580 |
+
and type(_one_to_nnn(" ".join(lastwords[1:]), reverse=True))
|
581 |
+
is int
|
582 |
+
and lastwords[2] == "hundre"
|
583 |
+
): # tusen to hundre
|
584 |
+
hundredphrase = " ".join(lastwords[1:])
|
585 |
+
lastdigs = _one_to_nnn(hundredphrase, reverse=True)
|
586 |
+
elif (
|
587 |
+
nn == True
|
588 |
+
and type(
|
589 |
+
_one_to_nnn(
|
590 |
+
" ".join(lastwords[1:]), reverse=True, nn=True
|
591 |
+
)
|
592 |
+
)
|
593 |
+
is int
|
594 |
+
and lastwords[2] == "hundre"
|
595 |
+
): # tusen to hundre
|
596 |
+
hundredphrase = " ".join(lastwords[1:])
|
597 |
+
lastdigs = _one_to_nnn(hundredphrase, reverse=True, nn=True)
|
598 |
+
else: # misspellings should not result in return value
|
599 |
+
return None
|
600 |
+
elif (
|
601 |
+
len(lastwords) == 5 and lastwords[2] == "hundre"
|
602 |
+
): # 'tusen fire hundre og fem'
|
603 |
+
hundredphrase = " ".join(lastwords[1:])
|
604 |
+
if nn == False:
|
605 |
+
lastdigs = _one_to_nnn(hundredphrase, reverse=True)
|
606 |
+
else:
|
607 |
+
lastdigs = _one_to_nnn(hundredphrase, reverse=True, nn=True)
|
608 |
+
else: # misspellings should not result in return value
|
609 |
+
return None
|
610 |
+
firstdigs = 0
|
611 |
+
firstphrase = " ".join(firstwords)
|
612 |
+
if len(firstwords) == 0: # as in 'tusen og tretti'
|
613 |
+
firstdigs = 1000
|
614 |
+
elif (
|
615 |
+
len(firstwords) == 1 and firstwords[0] == "ett" and nn == False
|
616 |
+
):
|
617 |
+
firstdigs = 1000
|
618 |
+
elif (
|
619 |
+
len(firstwords) == 1 and firstwords[0] == "eitt" and nn == True
|
620 |
+
):
|
621 |
+
firstdigs = 1000
|
622 |
+
elif (
|
623 |
+
type(_one_to_nnn(firstphrase, reverse=True)) is int
|
624 |
+
and nn == False
|
625 |
+
):
|
626 |
+
firstdigs = _one_to_nnn(firstphrase, reverse=True) * 1000
|
627 |
+
elif (
|
628 |
+
type(_one_to_nnn(firstphrase, reverse=True, nn=True)) is int
|
629 |
+
and nn == True
|
630 |
+
):
|
631 |
+
firstdigs = (
|
632 |
+
_one_to_nnn(firstphrase, reverse=True, nn=True) * 1000
|
633 |
+
)
|
634 |
+
else: # misspellings should not result in return value
|
635 |
+
return None
|
636 |
+
if type(firstdigs) is int and type(lastdigs) is int:
|
637 |
+
return firstdigs + lastdigs
|
638 |
+
|
639 |
+
|
640 |
+
def convert_nums(nr, reverse=False, nn=False):
|
641 |
+
"""Functions for converting numbers. Only works for numbers in range 0-999999 for now"""
|
642 |
+
if reverse == False:
|
643 |
+
if type(nr) is int:
|
644 |
+
returnstring = ""
|
645 |
+
if nr == 0:
|
646 |
+
returnstring = "null"
|
647 |
+
elif nr < 1000000:
|
648 |
+
if nn == False:
|
649 |
+
returnstring = _one_to_nnnnnn(nr)
|
650 |
+
else:
|
651 |
+
returnstring = _one_to_nnnnnn(nr, nn=True)
|
652 |
+
else:
|
653 |
+
return None
|
654 |
+
return returnstring
|
655 |
+
else:
|
656 |
+
if type(nr) is str:
|
657 |
+
returnint = 0
|
658 |
+
if nr == "null":
|
659 |
+
returnint = 0
|
660 |
+
elif nn == False and type(_one_to_nnnnnn(nr, reverse=True)) is int:
|
661 |
+
returnint = _one_to_nnnnnn(nr, reverse=True)
|
662 |
+
elif nn == True and type(_one_to_nnnnnn(nr, reverse=True, nn=True)) is int:
|
663 |
+
returnint = _one_to_nnnnnn(nr, reverse=True, nn=True)
|
664 |
+
elif nn == False and type(_high_hundred(nr)) is int:
|
665 |
+
returnint = _high_hundred(nr)
|
666 |
+
elif nn == True and type(_high_hundred(nr, nn=True)) is int:
|
667 |
+
returnint = _high_hundred(nr, nn=True)
|
668 |
+
else:
|
669 |
+
return None
|
670 |
+
return returnint
|
671 |
+
|
672 |
+
|
673 |
+
if __name__ == "__main__":
|
674 |
+
# testing
|
675 |
+
mydigit = 243564
|
676 |
+
mynumstring = "hundre og atten tusen fire hundre og trettién"
|
677 |
+
mydigit_nn = 34381
|
678 |
+
mynumstring_nn = "tre hundre og førtiein"
|
679 |
+
|
680 |
+
print(
|
681 |
+
"Digit conversion bm: %s\nString conversion bm: %s"
|
682 |
+
% (convert_nums(mydigit), convert_nums(mynumstring, reverse=True))
|
683 |
+
)
|
684 |
+
print(
|
685 |
+
"Digit conversion nn: %s\nString conversion nn: %s"
|
686 |
+
% (
|
687 |
+
convert_nums(mydigit_nn, nn=True),
|
688 |
+
convert_nums(mynumstring_nn, nn=True, reverse=True),
|
689 |
+
)
|
690 |
+
)
|