Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
ionite34's h2p_parser and dep required for English
Browse files- requirements.txt +3 -0
- resources/app/python/xvapitch/text/h2p_parser/__init__.py +22 -0
- resources/app/python/xvapitch/text/h2p_parser/__main__.py +185 -0
- resources/app/python/xvapitch/text/h2p_parser/cmudictext.py +253 -0
- resources/app/python/xvapitch/text/h2p_parser/compat/__init__.py +7 -0
- resources/app/python/xvapitch/text/h2p_parser/compat/cmudict.py +19 -0
- resources/app/python/xvapitch/text/h2p_parser/data/__init__.py +0 -0
- resources/app/python/xvapitch/text/h2p_parser/data/cmudict-0.7b.txt +0 -0
- resources/app/python/xvapitch/text/h2p_parser/data/cmudict.dict +0 -0
- resources/app/python/xvapitch/text/h2p_parser/data/dict.json +1500 -0
- resources/app/python/xvapitch/text/h2p_parser/data/example.json +16 -0
- resources/app/python/xvapitch/text/h2p_parser/dict_reader.py +109 -0
- resources/app/python/xvapitch/text/h2p_parser/dictionary.py +85 -0
- resources/app/python/xvapitch/text/h2p_parser/filter.py +34 -0
- resources/app/python/xvapitch/text/h2p_parser/format_ph.py +99 -0
- resources/app/python/xvapitch/text/h2p_parser/h2p.py +123 -0
- resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/PKG-INFO +14 -0
- resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/SOURCES.txt +19 -0
- resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/dependency_links.txt +1 -0
- resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/requires.txt +2 -0
- resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/top_level.txt +1 -0
- resources/app/python/xvapitch/text/h2p_parser/pos_parser.py +17 -0
- resources/app/python/xvapitch/text/h2p_parser/processors.py +392 -0
- resources/app/python/xvapitch/text/h2p_parser/symbols.py +82 -0
- resources/app/python/xvapitch/text/h2p_parser/text/__init__.py +0 -0
- resources/app/python/xvapitch/text/h2p_parser/text/numbers.py +166 -0
- resources/app/python/xvapitch/text/h2p_parser/utils/__init__.py +0 -0
- resources/app/python/xvapitch/text/h2p_parser/utils/converter.py +79 -0
- resources/app/python/xvapitch/text/h2p_parser/utils/parser.py +133 -0
requirements.txt
CHANGED
@@ -26,9 +26,11 @@ idna==2.10
|
|
26 |
importlib-metadata==2.0.0
|
27 |
importlib-resources==5.2.2
|
28 |
inflect==4.1.0
|
|
|
29 |
jaconv==0.3
|
30 |
joblib==0.17.0
|
31 |
librosa
|
|
|
32 |
num2words==0.5.10
|
33 |
numpy
|
34 |
omegaconf==2.1.1
|
@@ -43,6 +45,7 @@ pydub==0.25.1
|
|
43 |
pykakasi==2.2.1
|
44 |
pyparsing==2.4.7
|
45 |
python-crfsuite==0.9.8
|
|
|
46 |
PyYAML
|
47 |
regex==2021.8.28
|
48 |
requests==2.25.1
|
|
|
26 |
importlib-metadata==2.0.0
|
27 |
importlib-resources==5.2.2
|
28 |
inflect==4.1.0
|
29 |
+
inquirerpy~=0.3.3
|
30 |
jaconv==0.3
|
31 |
joblib==0.17.0
|
32 |
librosa
|
33 |
+
nltk~=3.7
|
34 |
num2words==0.5.10
|
35 |
numpy
|
36 |
omegaconf==2.1.1
|
|
|
45 |
pykakasi==2.2.1
|
46 |
pyparsing==2.4.7
|
47 |
python-crfsuite==0.9.8
|
48 |
+
pywordsegment~=0.2.1
|
49 |
PyYAML
|
50 |
regex==2021.8.28
|
51 |
requests==2.25.1
|
resources/app/python/xvapitch/text/h2p_parser/__init__.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
h2p_parser
|
3 |
+
|
4 |
+
Heteronym to Phoneme Parser
|
5 |
+
|
6 |
+
"""
|
7 |
+
|
8 |
+
import sys
|
9 |
+
|
10 |
+
if sys.version_info < (3, 9):
|
11 |
+
# In Python versions below 3.9, this is needed
|
12 |
+
from importlib_resources import files
|
13 |
+
else:
|
14 |
+
# Since python 3.9+, importlib.resources.files is built-in
|
15 |
+
from importlib.resources import files
|
16 |
+
|
17 |
+
__version__ = "1.0.0"
|
18 |
+
|
19 |
+
# Data module
|
20 |
+
DATA_PATH = files(__name__ + '.data')
|
21 |
+
# Iterable collection of all files in data.
|
22 |
+
DATA_FILES = DATA_PATH.iterdir()
|
resources/app/python/xvapitch/text/h2p_parser/__main__.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import Counter
|
2 |
+
|
3 |
+
from InquirerPy import inquirer
|
4 |
+
from InquirerPy.utils import patched_print, color_print
|
5 |
+
from InquirerPy.base.control import Choice
|
6 |
+
from InquirerPy.validator import PathValidator
|
7 |
+
from h2p_parser.utils import converter
|
8 |
+
from h2p_parser.utils import parser
|
9 |
+
|
10 |
+
|
11 |
+
def convert_h2p(input_file, output_file, delimiter):
|
12 |
+
"""
|
13 |
+
Converts a h2p dictionary file from one format to another.
|
14 |
+
"""
|
15 |
+
converter.bin_delim_to_json(input_file, output_file, delimiter)
|
16 |
+
print('Converted h2p_dict to json.')
|
17 |
+
|
18 |
+
|
19 |
+
def prompt_action() -> str:
|
20 |
+
action = inquirer.select(
|
21 |
+
message='Select action:',
|
22 |
+
choices=[
|
23 |
+
"Convert",
|
24 |
+
"Parse",
|
25 |
+
Choice(value=None, name='Exit')
|
26 |
+
],
|
27 |
+
default=0,
|
28 |
+
).execute()
|
29 |
+
if not action:
|
30 |
+
exit(0)
|
31 |
+
return action
|
32 |
+
|
33 |
+
|
34 |
+
def prompt_f_input():
|
35 |
+
"""
|
36 |
+
Prompts for input file.
|
37 |
+
"""
|
38 |
+
return inquirer.filepath(
|
39 |
+
message='Select input file:',
|
40 |
+
validate=PathValidator(is_file=True, message='Input must be a file.')
|
41 |
+
).execute()
|
42 |
+
|
43 |
+
|
44 |
+
def prompt_f_output():
|
45 |
+
"""
|
46 |
+
Prompts for output file.
|
47 |
+
"""
|
48 |
+
return inquirer.filepath(
|
49 |
+
message='Select output file:',
|
50 |
+
validate=PathValidator(is_file=True, message='Output must be a file.')
|
51 |
+
).execute()
|
52 |
+
|
53 |
+
|
54 |
+
def action_convert():
|
55 |
+
"""
|
56 |
+
Converts a h2p dictionary file from one format to another.
|
57 |
+
"""
|
58 |
+
# Select input file
|
59 |
+
input_file = prompt_f_input()
|
60 |
+
if not input_file:
|
61 |
+
return
|
62 |
+
|
63 |
+
# Select output file
|
64 |
+
output_file = prompt_f_output()
|
65 |
+
if not output_file:
|
66 |
+
return
|
67 |
+
|
68 |
+
# Ask for delimiter
|
69 |
+
delimiter = inquirer.text(
|
70 |
+
message='Enter delimiter:',
|
71 |
+
default='|'
|
72 |
+
).execute()
|
73 |
+
if not delimiter:
|
74 |
+
return
|
75 |
+
|
76 |
+
# Run Process
|
77 |
+
convert_h2p(input_file, output_file, delimiter)
|
78 |
+
|
79 |
+
|
80 |
+
def action_parse_file():
|
81 |
+
"""
|
82 |
+
Parses a metadata.csv file and checks for dictionary coverage
|
83 |
+
:return:
|
84 |
+
"""
|
85 |
+
# Select input file
|
86 |
+
input_file = prompt_f_input()
|
87 |
+
if not input_file:
|
88 |
+
return
|
89 |
+
|
90 |
+
# Ask for delimiter
|
91 |
+
delimiter = inquirer.text(
|
92 |
+
message='Enter delimiter:',
|
93 |
+
default='|'
|
94 |
+
).execute()
|
95 |
+
if not delimiter:
|
96 |
+
return
|
97 |
+
|
98 |
+
# Run Process
|
99 |
+
result = parser.check_lines(parser.read_file(input_file, delimiter))
|
100 |
+
|
101 |
+
# Print results
|
102 |
+
color_print([("#e5c07b", "Unresolved Words")])
|
103 |
+
color_print([("#d21205", "[All]: "),
|
104 |
+
("#ffffff", f"{len(result.unres_all_words)}/{len(result.all_words)}")])
|
105 |
+
color_print([("#7e3b41", "[Unique]: "),
|
106 |
+
("#ffffff", f"{len(result.unres_words)}/{len(result.words)}")])
|
107 |
+
|
108 |
+
color_print([("#4ce5c8", "-" * 10)])
|
109 |
+
|
110 |
+
color_print([("#e5c07b", "Unresolved Lines")])
|
111 |
+
color_print([("#d21205", "[All]: "),
|
112 |
+
("#ffffff", f"{len(result.unres_all_lines)}/{len(result.all_lines)}")])
|
113 |
+
color_print([("#7e3b41", "[Unique]: "),
|
114 |
+
("#ffffff", f"{len(result.unres_lines)}/{len(result.lines)}")])
|
115 |
+
|
116 |
+
color_print([("#4ce5c8", "-" * 10)])
|
117 |
+
|
118 |
+
color_print([("#e5c07b", "Expected Coverage")])
|
119 |
+
color_print([("#d21205", "[Lines]: "),
|
120 |
+
("#ffffff", f"{result.line_coverage()}%")])
|
121 |
+
color_print([("#7e3b41", "[Words]: "),
|
122 |
+
("#ffffff", f"{result.word_coverage()}%")])
|
123 |
+
|
124 |
+
color_print([("#4ce5c8", "-" * 10)])
|
125 |
+
|
126 |
+
color_print([("#e5c07b", "H2p parser")])
|
127 |
+
color_print([("#d21205", "[Lines with Heteronyms]: "),
|
128 |
+
("#ffffff", f"{len(result.all_lines_cont_het)}/{len(result.all_lines)}"
|
129 |
+
f" | {result.percent_line_het()}%")])
|
130 |
+
color_print([("#7e3b41", "[Words Resolved by H2p]: "),
|
131 |
+
("#ffffff", f"{result.n_words_het}/{result.n_words_res}"
|
132 |
+
f" | {result.percent_word_h2p()}%")])
|
133 |
+
# Calcs
|
134 |
+
feature_res = result.n_words_fet
|
135 |
+
feature_percent = round(feature_res / result.n_words_res * 100, 2)
|
136 |
+
cmu_res = result.n_words_cmu
|
137 |
+
cmu_percent = round(cmu_res / result.n_words_res * 100, 2)
|
138 |
+
color_print([("#c8bd20", "[Transformed Resolves]: "),
|
139 |
+
("#ffffff", f"{feature_res}/{result.n_words_res}"
|
140 |
+
f" | {feature_percent}%")])
|
141 |
+
color_print([("#25a0c8", "[Words in CMUDict]: "),
|
142 |
+
("#ffffff", f"{cmu_res}/{result.n_words_res}"
|
143 |
+
f" | {cmu_percent}%")])
|
144 |
+
|
145 |
+
color_print([("#4ce5c8", "-" * 10)])
|
146 |
+
|
147 |
+
color_print([("#e5c07b", "Feature Usage")])
|
148 |
+
|
149 |
+
# Loop through feature results
|
150 |
+
for ft in result.ft_stats:
|
151 |
+
color_print([("#d21205", f"{ft}: "),
|
152 |
+
("#ffffff", f"{result.ft_stats[ft]}/{result.n_words_res}"
|
153 |
+
f" | {round(result.ft_stats[ft]/result.n_words_res*100, 2)}%")])
|
154 |
+
|
155 |
+
color_print([("#4ce5c8", "-" * 10)])
|
156 |
+
|
157 |
+
# Print 100 sampled unresolved words by frequency
|
158 |
+
color_print([("#e5c07b", "Top 100 most frequent unresolved words")])
|
159 |
+
# Count frequency of words
|
160 |
+
word_freq = Counter(result.unres_all_words)
|
161 |
+
# Sort by frequency
|
162 |
+
word_freq = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
|
163 |
+
# Print top 100
|
164 |
+
for word, freq in word_freq[:100]:
|
165 |
+
color_print([("#d21205", f"{word}: "),
|
166 |
+
("#ffffff", f"{freq}")])
|
167 |
+
|
168 |
+
|
169 |
+
def entry():
|
170 |
+
"""
|
171 |
+
Prints help information.
|
172 |
+
"""
|
173 |
+
# Select action type
|
174 |
+
action = prompt_action()
|
175 |
+
if action == 'Convert':
|
176 |
+
action_convert()
|
177 |
+
elif action == 'Parse':
|
178 |
+
action_parse_file()
|
179 |
+
|
180 |
+
|
181 |
+
if __name__ == "__main__":
|
182 |
+
entry()
|
183 |
+
|
184 |
+
|
185 |
+
|
resources/app/python/xvapitch/text/h2p_parser/cmudictext.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Extended Grapheme to Phoneme conversion using CMU Dictionary and Heteronym parsing.
|
2 |
+
from __future__ import annotations
|
3 |
+
|
4 |
+
import re
|
5 |
+
from typing import Optional
|
6 |
+
|
7 |
+
import pywordsegment
|
8 |
+
import nltk
|
9 |
+
from nltk.stem import WordNetLemmatizer
|
10 |
+
from nltk.stem.snowball import SnowballStemmer
|
11 |
+
from .h2p import H2p
|
12 |
+
from .h2p import replace_first
|
13 |
+
from . import format_ph as ph
|
14 |
+
from .dict_reader import DictReader
|
15 |
+
from .text.numbers import normalize_numbers
|
16 |
+
from .filter import filter_text
|
17 |
+
from .processors import Processor
|
18 |
+
from copy import deepcopy
|
19 |
+
|
20 |
+
re_digit = re.compile(r"\((\d+)\)")
|
21 |
+
re_bracket_with_digit = re.compile(r"\(.*\)")
|
22 |
+
|
23 |
+
# Check that the nltk data is downloaded, if not, download it
|
24 |
+
try:
|
25 |
+
nltk.data.find('corpora/wordnet.zip')
|
26 |
+
nltk.data.find('corpora/omw-1.4.zip')
|
27 |
+
except LookupError:
|
28 |
+
nltk.download('wordnet')
|
29 |
+
nltk.download('omw-1.4')
|
30 |
+
|
31 |
+
|
32 |
+
class CMUDictExt:
|
33 |
+
def __init__(self, cmu_dict_path: str = None, h2p_dict_path: str = None, cmu_multi_mode: int = 0,
|
34 |
+
process_numbers: bool = True, phoneme_brackets: bool = True, unresolved_mode: str = 'keep'):
|
35 |
+
# noinspection GrazieInspection
|
36 |
+
"""
|
37 |
+
Initialize CMUDictExt - Extended Grapheme to Phoneme conversion using CMU Dictionary with Heteronym parsing.
|
38 |
+
|
39 |
+
CMU multi-entry resolution modes:
|
40 |
+
- -2 : Raw entry (i.e. 'A' resolves to 'AH0' and 'A(1)' to 'EY1')
|
41 |
+
- -1 : Skip resolving any entry with multiple pronunciations.
|
42 |
+
- 0 : Resolve using default un-numbered pronunciation.
|
43 |
+
- 1 : Resolve using (1) numbered pronunciation.
|
44 |
+
- n : Resolve using (n) numbered pronunciation.
|
45 |
+
- If a higher number is specified than available for the word, the highest available number is used.
|
46 |
+
|
47 |
+
Unresolved word resolution modes:
|
48 |
+
- keep : Keep the text-form word in the output.
|
49 |
+
- remove : Remove the text-form word from the output.
|
50 |
+
- drop : Return the line as None if any word is unresolved.
|
51 |
+
|
52 |
+
:param cmu_dict_path: Path to CMU dictionary file (.txt)
|
53 |
+
:type: str
|
54 |
+
:param h2p_dict_path: Path to Custom H2p dictionary (.json)
|
55 |
+
:type: str
|
56 |
+
:param cmu_multi_mode: CMU resolution mode for entries with multiple pronunciations.
|
57 |
+
:type: int
|
58 |
+
"""
|
59 |
+
|
60 |
+
# Check valid unresolved_mode argument
|
61 |
+
if unresolved_mode not in ['keep', 'remove', 'drop']:
|
62 |
+
raise ValueError('Invalid value for unresolved_mode: {}'.format(unresolved_mode))
|
63 |
+
self.unresolved_mode = unresolved_mode
|
64 |
+
|
65 |
+
self.cmu_dict_path = cmu_dict_path # Path to CMU dictionary file (.txt), if None, uses built-in
|
66 |
+
self.h2p_dict_path = h2p_dict_path # Path to Custom H2p dictionary (.json), if None, uses built-in
|
67 |
+
self.cmu_multi_mode = cmu_multi_mode # CMU multi-entry resolution mode
|
68 |
+
self.process_numbers = process_numbers # Normalize numbers to text form, if enabled
|
69 |
+
self.phoneme_brackets = phoneme_brackets # If True, phonemes are wrapped in curly brackets.
|
70 |
+
self.dict = DictReader(self.cmu_dict_path).dict # CMU Dictionary
|
71 |
+
self.h2p = H2p(self.h2p_dict_path, preload=True) # H2p parser
|
72 |
+
self.lemmatize = WordNetLemmatizer().lemmatize # WordNet Lemmatizer - used to find singular form
|
73 |
+
self.stem = SnowballStemmer('english').stem # Snowball Stemmer - used to find stem root of words
|
74 |
+
self.segment = pywordsegment.WordSegmenter().segment # Word Segmenter
|
75 |
+
self.p = Processor(self) # Processor for processing text
|
76 |
+
|
77 |
+
# Features
|
78 |
+
# Auto pluralization and de-pluralization
|
79 |
+
self.ft_auto_plural = True
|
80 |
+
# Auto splits and infers possessive forms of original words
|
81 |
+
self.ft_auto_pos = True
|
82 |
+
# Auto splits 'll
|
83 |
+
self.ft_auto_ll = True
|
84 |
+
# Auto splits and infers hyphenated words
|
85 |
+
self.ft_auto_hyphenated = True
|
86 |
+
# Auto splits possible compound words
|
87 |
+
self.ft_auto_compound = True
|
88 |
+
# Analyzes word root stem and infers pronunciation separately
|
89 |
+
# i.e. 'generously' -> 'generous' + 'ly'
|
90 |
+
self.ft_stem = True
|
91 |
+
# Forces compound words using manual lookup
|
92 |
+
self.ft_auto_compound_l2 = True
|
93 |
+
|
94 |
+
def lookup(self, text: str, pos: str = None, ph_format: str = 'sds') -> str | list | None:
|
95 |
+
# noinspection GrazieInspection
|
96 |
+
"""
|
97 |
+
Gets the CMU Dictionary entry for a word.
|
98 |
+
|
99 |
+
Options for ph_format:
|
100 |
+
|
101 |
+
- 'sds' space delimited string
|
102 |
+
- 'sds_b' space delimited string with curly brackets
|
103 |
+
- 'list' list of phoneme strings
|
104 |
+
|
105 |
+
:param pos: Part of speech tag (Optional)
|
106 |
+
:param ph_format: Format of the phonemes to return:
|
107 |
+
:type: str
|
108 |
+
:param text: Word to lookup
|
109 |
+
:type: str
|
110 |
+
"""
|
111 |
+
|
112 |
+
def format_as(in_phoneme):
|
113 |
+
if ph_format == 'sds':
|
114 |
+
output = ph.to_sds(in_phoneme)
|
115 |
+
elif ph_format == 'sds_b':
|
116 |
+
output = ph.with_cb(ph.to_sds(in_phoneme))
|
117 |
+
elif ph_format == 'list':
|
118 |
+
output = ph.to_list(in_phoneme)
|
119 |
+
else:
|
120 |
+
raise ValueError('Invalid value for ph_format: {}'.format(ph_format))
|
121 |
+
return output
|
122 |
+
|
123 |
+
# Get the CMU Dictionary entry for the word
|
124 |
+
word = text.lower()
|
125 |
+
entry = deepcopy(self.dict.get(word)) # Ensure safe copy of entry
|
126 |
+
|
127 |
+
# Has entry, return it directly
|
128 |
+
if entry is not None:
|
129 |
+
return format_as(entry)
|
130 |
+
|
131 |
+
# Auto Possessive Processor
|
132 |
+
if self.ft_auto_pos:
|
133 |
+
res = self.p.auto_possessives(word)
|
134 |
+
if res is not None:
|
135 |
+
return format_as(res)
|
136 |
+
|
137 |
+
# Auto Contractions for "ll" or "d"
|
138 |
+
if self.ft_auto_ll:
|
139 |
+
res = self.p.auto_contractions(word)
|
140 |
+
if res is not None:
|
141 |
+
return format_as(res)
|
142 |
+
|
143 |
+
# Check for hyphenated words
|
144 |
+
if self.ft_auto_hyphenated:
|
145 |
+
res = self.p.auto_hyphenated(word)
|
146 |
+
if res is not None:
|
147 |
+
return format_as(res)
|
148 |
+
|
149 |
+
# Check for compound words
|
150 |
+
if self.ft_auto_compound:
|
151 |
+
res = self.p.auto_compound(word)
|
152 |
+
if res is not None:
|
153 |
+
return format_as(res)
|
154 |
+
|
155 |
+
# No entry, detect if this is a multi-word entry
|
156 |
+
if '(' in word and ')' in word and any(char.isdigit() for char in word):
|
157 |
+
# Parse the integer from the word using regex
|
158 |
+
num = int(re.findall(re_digit, word)[0])
|
159 |
+
# If found
|
160 |
+
if num is not None:
|
161 |
+
# Remove the integer and bracket from the word
|
162 |
+
actual_word = re.sub(re_bracket_with_digit, "", word)
|
163 |
+
# See if this is a valid entry
|
164 |
+
result = deepcopy(self.dict.get(actual_word)) # Ensure safe copy of entry
|
165 |
+
# If found:
|
166 |
+
if result is not None:
|
167 |
+
# Translate the integer to index
|
168 |
+
index = min(num - 1, 0)
|
169 |
+
# Check if index is less than the number of pronunciations
|
170 |
+
if index < len(result):
|
171 |
+
# Return the entry using the provided num index
|
172 |
+
return format_as(result[index])
|
173 |
+
# If entry is higher
|
174 |
+
else:
|
175 |
+
# Return the highest available entry
|
176 |
+
return format_as(result[-1])
|
177 |
+
|
178 |
+
# Auto de-pluralization
|
179 |
+
# This is placed near the end because we need to do a pos-tag process
|
180 |
+
if self.ft_auto_plural:
|
181 |
+
res = self.p.auto_plural(word, pos)
|
182 |
+
if res is not None:
|
183 |
+
return format_as(res)
|
184 |
+
|
185 |
+
# Stem check
|
186 |
+
# noinspection SpellCheckingInspection
|
187 |
+
"""
|
188 |
+
Supported modes for words ending in:
|
189 |
+
"ing", "ingly", "ly"
|
190 |
+
"""
|
191 |
+
if self.ft_stem:
|
192 |
+
res = self.p.auto_stem(word)
|
193 |
+
if res is not None:
|
194 |
+
return format_as(res)
|
195 |
+
|
196 |
+
# Force compounding
|
197 |
+
if self.ft_auto_compound_l2:
|
198 |
+
res = self.p.auto_compound_l2(word)
|
199 |
+
if res is not None:
|
200 |
+
return format_as(res)
|
201 |
+
|
202 |
+
# If not found
|
203 |
+
return None
|
204 |
+
|
205 |
+
def convert(self, text: str) -> str | None:
|
206 |
+
# noinspection GrazieInspection
|
207 |
+
"""
|
208 |
+
Replace a grapheme text line with phonemes.
|
209 |
+
|
210 |
+
:param text: Text line to be converted
|
211 |
+
:type: str
|
212 |
+
"""
|
213 |
+
|
214 |
+
# Check valid unresolved_mode argument
|
215 |
+
if self.unresolved_mode not in ['keep', 'remove', 'drop']:
|
216 |
+
raise ValueError('Invalid value for unresolved_mode: {}'.format(self.unresolved_mode))
|
217 |
+
ur_mode = self.unresolved_mode
|
218 |
+
|
219 |
+
# Normalize numbers, if enabled
|
220 |
+
if self.process_numbers:
|
221 |
+
text = normalize_numbers(text)
|
222 |
+
# Filter and Tokenize
|
223 |
+
f_text = filter_text(text, preserve_case=True)
|
224 |
+
words = self.h2p.tokenize(f_text)
|
225 |
+
# Run POS tagging
|
226 |
+
tags = self.h2p.get_tags(words)
|
227 |
+
|
228 |
+
# Loop through words and pos tags
|
229 |
+
for word, pos in tags:
|
230 |
+
# Skip punctuation
|
231 |
+
if word == '.':
|
232 |
+
continue
|
233 |
+
# If word not in h2p dict, check CMU dict
|
234 |
+
if not self.h2p.dict.contains(word):
|
235 |
+
entry = self.lookup(word, pos)
|
236 |
+
if entry is None:
|
237 |
+
if ur_mode == 'drop':
|
238 |
+
return None
|
239 |
+
if ur_mode == 'remove':
|
240 |
+
text = replace_first(word, '', text)
|
241 |
+
continue
|
242 |
+
# Do replace
|
243 |
+
f_ph = ph.with_cb(ph.to_sds(entry))
|
244 |
+
text = replace_first(word, f_ph, text)
|
245 |
+
continue
|
246 |
+
# For word in h2p dict, get phonemes
|
247 |
+
phonemes = self.h2p.dict.get_phoneme(word, pos)
|
248 |
+
# Format phonemes
|
249 |
+
f_ph = ph.with_cb(ph.to_sds(phonemes))
|
250 |
+
# Replace word with phonemes
|
251 |
+
text = replace_first(word, f_ph, text)
|
252 |
+
# Return text
|
253 |
+
return text
|
resources/app/python/xvapitch/text/h2p_parser/compat/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Compatibility module.
|
3 |
+
|
4 |
+
This module contains compatibility wrappers for existing
|
5 |
+
implementations of CMUDict and other dictionaries.
|
6 |
+
|
7 |
+
"""
|
resources/app/python/xvapitch/text/h2p_parser/compat/cmudict.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Compatibility layer for using CMUDictExt with CMUDict-like API calls.
|
2 |
+
# Designed to be compatible with the implementation of CMUDict in:
|
3 |
+
# https://github.com/NVIDIA/DeepLearningExamples/
|
4 |
+
#
|
5 |
+
# Example usage:
|
6 |
+
# from h2p_parser.compat.cmudict import CMUDict
|
7 |
+
|
8 |
+
from h2p_parser.cmudictext import CMUDictExt
|
9 |
+
|
10 |
+
|
11 |
+
class CMUDict(CMUDictExt):
|
12 |
+
def __init__(self, file_or_path=None, heteronyms_path=None, keep_ambiguous=True):
|
13 |
+
# Parameter Mapping:
|
14 |
+
# file_or_path => Mapped to cmu_dict_path
|
15 |
+
# heteronyms_path => Dropped as CMUDictExt uses H2p for heteronym parsing.
|
16 |
+
# keep_ambiguous => Mapped to cmu_multi_mode | True => -2, False => -1
|
17 |
+
super().__init__(file_or_path, heteronyms_path)
|
18 |
+
self._entries = {}
|
19 |
+
self.heteronyms = []
|
resources/app/python/xvapitch/text/h2p_parser/data/__init__.py
ADDED
File without changes
|
resources/app/python/xvapitch/text/h2p_parser/data/cmudict-0.7b.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
resources/app/python/xvapitch/text/h2p_parser/data/cmudict.dict
ADDED
The diff for this file is too large to render.
See raw diff
|
|
resources/app/python/xvapitch/text/h2p_parser/data/dict.json
ADDED
@@ -0,0 +1,1500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"absent": {
|
3 |
+
"DEFAULT": "AE1 B S AH0 N T",
|
4 |
+
"VERB": "AH1 B S AE1 N T"
|
5 |
+
},
|
6 |
+
"abstract": {
|
7 |
+
"DEFAULT": "AE1 B S T R AE2 K T",
|
8 |
+
"VERB": "AE0 B S T R AE1 K T"
|
9 |
+
},
|
10 |
+
"abstracts": {
|
11 |
+
"DEFAULT": "AE1 B S T R AE0 K T S",
|
12 |
+
"VERB": "AE0 B S T R AE1 K T S"
|
13 |
+
},
|
14 |
+
"abuse": {
|
15 |
+
"DEFAULT": "AH0 B Y UW1 S",
|
16 |
+
"VERB": "AH0 B Y UW1 Z"
|
17 |
+
},
|
18 |
+
"abuses": {
|
19 |
+
"DEFAULT": "AH0 B Y UW1 S IH0 Z",
|
20 |
+
"VERB": "AH0 B Y UW1 Z IH0 Z"
|
21 |
+
},
|
22 |
+
"accent": {
|
23 |
+
"DEFAULT": "AE1 K S EH2 N T",
|
24 |
+
"VERB": "AH0 K S EH1 N T"
|
25 |
+
},
|
26 |
+
"accents": {
|
27 |
+
"DEFAULT": "AE1 K S EH0 N T S",
|
28 |
+
"VERB": "AE1 K S EH0 N T S"
|
29 |
+
},
|
30 |
+
"addict": {
|
31 |
+
"DEFAULT": "AE1 D IH2 K T",
|
32 |
+
"VERB": "AH0 D IH1 K T"
|
33 |
+
},
|
34 |
+
"addicts": {
|
35 |
+
"DEFAULT": "AE1 D IH2 K T S",
|
36 |
+
"VERB": "AH0 D IH1 K T S"
|
37 |
+
},
|
38 |
+
"advocate": {
|
39 |
+
"DEFAULT": "AE1 D V AH0 K AH0 T",
|
40 |
+
"VERB": "AE1 D V AH0 K EY2 T"
|
41 |
+
},
|
42 |
+
"advocates": {
|
43 |
+
"DEFAULT": "AE1 D V AH0 K AH0 T S",
|
44 |
+
"VERB": "AE1 D V AH0 K EY2 T S"
|
45 |
+
},
|
46 |
+
"affect": {
|
47 |
+
"DEFAULT": "AE1 F EH0 K T",
|
48 |
+
"VERB": "AH0 F EH1 K T"
|
49 |
+
},
|
50 |
+
"affects": {
|
51 |
+
"DEFAULT": "AE1 F EH0 K T S",
|
52 |
+
"VERB": "AH0 F EH1 K T S"
|
53 |
+
},
|
54 |
+
"affix": {
|
55 |
+
"DEFAULT": "AE1 F IH0 K S",
|
56 |
+
"VERB": "AH0 F IH1 K S"
|
57 |
+
},
|
58 |
+
"affixes": {
|
59 |
+
"DEFAULT": "AE1 F IH0 K S IH0 Z",
|
60 |
+
"VERB": "AH0 F IH1 K S IH0 Z"
|
61 |
+
},
|
62 |
+
"agglomerate": {
|
63 |
+
"DEFAULT": "AH0 G L AA1 M ER0 AH0 T",
|
64 |
+
"VERB": "AH0 G L AA1 M ER0 EY2 T"
|
65 |
+
},
|
66 |
+
"aggregate": {
|
67 |
+
"DEFAULT": "AE1 G R AH0 G AH0 T",
|
68 |
+
"VERB": "AE1 G R AH0 G EY0 T"
|
69 |
+
},
|
70 |
+
"aggregates": {
|
71 |
+
"DEFAULT": "AE1 G R AH0 G IH0 T S",
|
72 |
+
"VERB": "AE1 G R AH0 G EY2 T S"
|
73 |
+
},
|
74 |
+
"allies": {
|
75 |
+
"DEFAULT": "AE1 L AY0 Z",
|
76 |
+
"VERB": "AH0 L AY1 Z"
|
77 |
+
},
|
78 |
+
"alloy": {
|
79 |
+
"DEFAULT": "AE1 L OY2",
|
80 |
+
"VERB": "AH0 L OY1"
|
81 |
+
},
|
82 |
+
"alloys": {
|
83 |
+
"DEFAULT": "AE1 L OY2 Z",
|
84 |
+
"VERB": "AH0 L OY1 Z"
|
85 |
+
},
|
86 |
+
"ally": {
|
87 |
+
"DEFAULT": "AE1 L AY0",
|
88 |
+
"VERB": "AH0 L AY1"
|
89 |
+
},
|
90 |
+
"alternate": {
|
91 |
+
"DEFAULT": "AO0 L T ER1 N AH0 T",
|
92 |
+
"VERB": "AO1 L T ER0 N EY2 T"
|
93 |
+
},
|
94 |
+
"analyses": {
|
95 |
+
"DEFAULT": "AE1 N AH0 L AY0 Z IH2 Z",
|
96 |
+
"VERB": "AH0 N AE1 L IH0 S IY2 Z"
|
97 |
+
},
|
98 |
+
"animate": {
|
99 |
+
"DEFAULT": "AE1 N AH0 M AH0 T",
|
100 |
+
"VERB": "AE1 N AH0 M EY2 T"
|
101 |
+
},
|
102 |
+
"annex": {
|
103 |
+
"DEFAULT": "AE1 N EH2 K S",
|
104 |
+
"VERB": "AH0 N EH1 K S"
|
105 |
+
},
|
106 |
+
"annexes": {
|
107 |
+
"DEFAULT": "AE1 N EH2 K S IH0 Z",
|
108 |
+
"VERB": "AH0 N EH1 K S IH0 Z"
|
109 |
+
},
|
110 |
+
"appropriate": {
|
111 |
+
"DEFAULT": "AH0 P R OW1 P R IY0 AH0 T",
|
112 |
+
"VERB": "AH0 P R OW1 P R IY0 EY2 T"
|
113 |
+
},
|
114 |
+
"approximate": {
|
115 |
+
"DEFAULT": "AH0 P R AA1 K S AH0 M AH0 T",
|
116 |
+
"VERB": "AH0 P R AA1 K S AH0 M EY2 T"
|
117 |
+
},
|
118 |
+
"articulate": {
|
119 |
+
"DEFAULT": "AA0 R T IH1 K Y AH0 L EY2 T",
|
120 |
+
"VERB": "AA0 R T IH1 K Y AH0 L AH0 T"
|
121 |
+
},
|
122 |
+
"aspirate": {
|
123 |
+
"DEFAULT": "AE1 S P ER0 AH0 T",
|
124 |
+
"VERB": "AE1 S P ER0 EY2 T"
|
125 |
+
},
|
126 |
+
"aspirates": {
|
127 |
+
"DEFAULT": "AE1 S P ER0 AH0 T S",
|
128 |
+
"VERB": "AE1 S P ER0 EY2 T S"
|
129 |
+
},
|
130 |
+
"associate": {
|
131 |
+
"DEFAULT": "AH0 S OW1 S IY0 AH0 T",
|
132 |
+
"VERB": "AH0 S OW1 S IY0 EY2 T"
|
133 |
+
},
|
134 |
+
"associates": {
|
135 |
+
"DEFAULT": "AH0 S OW1 S IY0 AH0 T S",
|
136 |
+
"VERB": "AH0 S OW1 S IY0 EY2 T S"
|
137 |
+
},
|
138 |
+
"attribute": {
|
139 |
+
"DEFAULT": "AE1 T R IH0 B Y UW0 T",
|
140 |
+
"VERB": "AH0 T R IH1 B Y UW2 T"
|
141 |
+
},
|
142 |
+
"attributes": {
|
143 |
+
"DEFAULT": "AE1 T R IH0 B Y UW0 T S",
|
144 |
+
"VERB": "AH0 T R IH1 B Y UW2 T S"
|
145 |
+
},
|
146 |
+
"baths": {
|
147 |
+
"DEFAULT": "B AE1 DH Z",
|
148 |
+
"VERB": "B AE1 TH S"
|
149 |
+
},
|
150 |
+
"blessed": {
|
151 |
+
"DEFAULT": "B L EH1 S T",
|
152 |
+
"VERB": "B L EH1 S IH0 D"
|
153 |
+
},
|
154 |
+
"certificate": {
|
155 |
+
"DEFAULT": "S ER0 T IH1 F IH0 K EY2 T",
|
156 |
+
"VERB": "S ER0 T IH1 F IH0 K AH0 T"
|
157 |
+
},
|
158 |
+
"certificates": {
|
159 |
+
"DEFAULT": "S ER0 T IH1 F IH0 K AH0 T S",
|
160 |
+
"VERB": "S ER0 T IH1 F IH0 K EY2 T S"
|
161 |
+
},
|
162 |
+
"close": {
|
163 |
+
"DEFAULT": "K L OW1 S",
|
164 |
+
"VERB": "K L OW1 Z"
|
165 |
+
},
|
166 |
+
"closer": {
|
167 |
+
"DEFAULT": "K L OW1 S ER0",
|
168 |
+
"NOUN": "K L OW1 Z ER0"
|
169 |
+
},
|
170 |
+
"closes": {
|
171 |
+
"DEFAULT": "K L OW1 S IH0 Z",
|
172 |
+
"VERB": "K L OW1 Z IH0 Z"
|
173 |
+
},
|
174 |
+
"collect": {
|
175 |
+
"DEFAULT": "K AA1 L EH0 K T",
|
176 |
+
"VERB": "K AH0 L EH1 K T"
|
177 |
+
},
|
178 |
+
"collects": {
|
179 |
+
"DEFAULT": "K AA1 L EH0 K T S",
|
180 |
+
"VERB": "K AH0 L EH1 K T S"
|
181 |
+
},
|
182 |
+
"combat": {
|
183 |
+
"DEFAULT": "K AA1 M B AE0 T",
|
184 |
+
"VERB": "K AH0 M B AE1 T"
|
185 |
+
},
|
186 |
+
"combats": {
|
187 |
+
"DEFAULT": "K AH1 M B AE0 T S",
|
188 |
+
"VERB": "K AH0 M B AE1 T S"
|
189 |
+
},
|
190 |
+
"combine": {
|
191 |
+
"DEFAULT": "K AA1 M B AY0 N",
|
192 |
+
"VERB": "K AH0 M B AY1 N"
|
193 |
+
},
|
194 |
+
"commune": {
|
195 |
+
"DEFAULT": "K AA1 M Y UW0 N",
|
196 |
+
"VERB": "K AH0 M Y UW1 N"
|
197 |
+
},
|
198 |
+
"communes": {
|
199 |
+
"DEFAULT": "K AA1 M Y UW0 N Z",
|
200 |
+
"VERB": "K AH0 M Y UW1 N Z"
|
201 |
+
},
|
202 |
+
"compact": {
|
203 |
+
"DEFAULT": "K AA1 M P AE0 K T",
|
204 |
+
"VERB": "K AH0 M P AE1 K T"
|
205 |
+
},
|
206 |
+
"compacts": {
|
207 |
+
"DEFAULT": "K AA1 M P AE0 K T S",
|
208 |
+
"VERB": "K AH0 M P AE1 K T S"
|
209 |
+
},
|
210 |
+
"complex": {
|
211 |
+
"ADJ": "K AH0 M P L EH1 K S",
|
212 |
+
"DEFAULT": " K AA1 M P L EH0 K S"
|
213 |
+
},
|
214 |
+
"compliment": {
|
215 |
+
"DEFAULT": "K AA1 M P L AH0 M AH0 N T",
|
216 |
+
"VERB": "K AA1 M P L AH0 M EH0 N T"
|
217 |
+
},
|
218 |
+
"compliments": {
|
219 |
+
"DEFAULT": "K AA1 M P L AH0 M AH0 N T S",
|
220 |
+
"VERB": "K AA1 M P L AH0 M EH0 N T S"
|
221 |
+
},
|
222 |
+
"compound": {
|
223 |
+
"DEFAULT": "K AA1 M P AW0 N D",
|
224 |
+
"VERB": "K AH0 M P AW1 N D"
|
225 |
+
},
|
226 |
+
"compounds": {
|
227 |
+
"DEFAULT": "K AA1 M P AW0 N D Z",
|
228 |
+
"VERB": "K AH0 M P AW1 N D Z"
|
229 |
+
},
|
230 |
+
"compress": {
|
231 |
+
"DEFAULT": "K AA1 M P R EH0 S",
|
232 |
+
"VERB": "K AH0 M P R EH1 S"
|
233 |
+
},
|
234 |
+
"compresses": {
|
235 |
+
"DEFAULT": "K AA1 M P R EH0 S AH0 Z",
|
236 |
+
"VERB": "K AH0 M P R EH1 S IH0 Z"
|
237 |
+
},
|
238 |
+
"concert": {
|
239 |
+
"DEFAULT": "K AA1 N S ER0 T",
|
240 |
+
"VERB": "K AH0 N S ER1 T"
|
241 |
+
},
|
242 |
+
"concerts": {
|
243 |
+
"DEFAULT": "K AA1 N S ER0 T S",
|
244 |
+
"VERB": "K AH0 N S ER1 T S"
|
245 |
+
},
|
246 |
+
"conduct": {
|
247 |
+
"DEFAULT": "K AA1 N D AH0 K T",
|
248 |
+
"VERB": "K AA0 N D AH1 K T"
|
249 |
+
},
|
250 |
+
"confederate": {
|
251 |
+
"DEFAULT": "K AH0 N F EH1 D ER0 AH0 T",
|
252 |
+
"VERB": "K AH0 N F EH1 D ER0 EY2 T"
|
253 |
+
},
|
254 |
+
"confederates": {
|
255 |
+
"DEFAULT": "K AH0 N F EH1 D ER0 AH0 T S",
|
256 |
+
"VERB": "K AH0 N F EH1 D ER0 EY2 T S"
|
257 |
+
},
|
258 |
+
"confines": {
|
259 |
+
"DEFAULT": "K AA1 N F AY2 N Z",
|
260 |
+
"VERB": "K AH0 N F AY1 N Z"
|
261 |
+
},
|
262 |
+
"conflict": {
|
263 |
+
"DEFAULT": "K AA1 N F L IH0 K T",
|
264 |
+
"VERB": "K AH0 N F L IH1 K T"
|
265 |
+
},
|
266 |
+
"conflicts": {
|
267 |
+
"DEFAULT": "K AA1 N F L IH0 K T S",
|
268 |
+
"VERB": "K AH0 N F L IH1 K T S"
|
269 |
+
},
|
270 |
+
"conglomerate": {
|
271 |
+
"DEFAULT": "K AH0 N G L AA1 M ER0 AH0 T",
|
272 |
+
"VERB": "K AH0 N G L AA1 M ER0 EY2 T"
|
273 |
+
},
|
274 |
+
"conglomerates": {
|
275 |
+
"DEFAULT": "K AH0 N G L AA1 M ER0 AH0 T S",
|
276 |
+
"VERB": "K AH0 N G L AA1 M ER0 EY2 T S"
|
277 |
+
},
|
278 |
+
"conscript": {
|
279 |
+
"DEFAULT": "K AA1 N S K R IH0 P T",
|
280 |
+
"VERB": "K AH0 N S K R IH1 P T"
|
281 |
+
},
|
282 |
+
"conscripts": {
|
283 |
+
"DEFAULT": "K AA1 N S K R IH0 P T S",
|
284 |
+
"VERB": "K AH0 N S K R IH1 P T S"
|
285 |
+
},
|
286 |
+
"console": {
|
287 |
+
"DEFAULT": "K AA1 N S OW0 L",
|
288 |
+
"VERB": "K AH0 N S OW1 L"
|
289 |
+
},
|
290 |
+
"consoles": {
|
291 |
+
"DEFAULT": "K AA1 N S OW0 L Z",
|
292 |
+
"VERB": "K AH0 N S OW1 L Z"
|
293 |
+
},
|
294 |
+
"consort": {
|
295 |
+
"DEFAULT": "K AA1 N S AO0 R T",
|
296 |
+
"VERB": "K AH0 N S AO1 R T"
|
297 |
+
},
|
298 |
+
"construct": {
|
299 |
+
"DEFAULT": "K AA1 N S T R AH0 K T",
|
300 |
+
"VERB": "K AH0 N S T R AH1 K T"
|
301 |
+
},
|
302 |
+
"constructs": {
|
303 |
+
"DEFAULT": "K AA1 N S T R AH0 K T S",
|
304 |
+
"VERB": "K AH0 N S T R AH1 K T S"
|
305 |
+
},
|
306 |
+
"consummate": {
|
307 |
+
"DEFAULT": "K AA0 N S AH1 M AH0 T",
|
308 |
+
"VERB": "K AA1 N S AH0 M EY2 T"
|
309 |
+
},
|
310 |
+
"content": {
|
311 |
+
"DEFAULT": "K AH0 N T EH1 N T",
|
312 |
+
"NOUN": "K AA1 N T EH0 N T"
|
313 |
+
},
|
314 |
+
"contents": {
|
315 |
+
"DEFAULT": "K AA1 N T EH0 N T S",
|
316 |
+
"VERB": "K AH0 N T EH1 N T S"
|
317 |
+
},
|
318 |
+
"contest": {
|
319 |
+
"DEFAULT": "K AA1 N T EH0 S T",
|
320 |
+
"VERB": "K AH0 N T EH1 S T"
|
321 |
+
},
|
322 |
+
"contests": {
|
323 |
+
"DEFAULT": "K AA1 N T EH0 S T S",
|
324 |
+
"VERB": "K AH0 N T EH1 S T S"
|
325 |
+
},
|
326 |
+
"contract": {
|
327 |
+
"DEFAULT": "K AA1 N T R AE2 K T",
|
328 |
+
"VERB": "K AH0 N T R AE1 K T"
|
329 |
+
},
|
330 |
+
"contracts": {
|
331 |
+
"DEFAULT": "K AA1 N T R AE2 K T S",
|
332 |
+
"VERB": "K AH0 N T R AE1 K T S"
|
333 |
+
},
|
334 |
+
"contrast": {
|
335 |
+
"DEFAULT": "K AA1 N T R AE0 S T",
|
336 |
+
"VERB": "K AH0 N T R AE1 S T"
|
337 |
+
},
|
338 |
+
"contrasts": {
|
339 |
+
"DEFAULT": "K AA1 N T R AE0 S T S",
|
340 |
+
"VERB": "K AH0 N T R AE1 S T S"
|
341 |
+
},
|
342 |
+
"converse": {
|
343 |
+
"DEFAULT": "K AA1 N V ER0 S",
|
344 |
+
"VERB": "K AH0 N V ER1 S"
|
345 |
+
},
|
346 |
+
"convert": {
|
347 |
+
"DEFAULT": "K AA1 N V ER0 T",
|
348 |
+
"VERB": "K AH0 N V ER1 T"
|
349 |
+
},
|
350 |
+
"converts": {
|
351 |
+
"DEFAULT": "K AA1 N V ER0 T S",
|
352 |
+
"VERB": "K AH0 N V ER1 T S"
|
353 |
+
},
|
354 |
+
"convict": {
|
355 |
+
"DEFAULT": "K AA1 N V IH0 K T",
|
356 |
+
"VERB": "K AH0 N V IH1 K T"
|
357 |
+
},
|
358 |
+
"convicts": {
|
359 |
+
"DEFAULT": "K AA1 N V IH0 K T S",
|
360 |
+
"VERB": "K AH0 N V IH1 K T S"
|
361 |
+
},
|
362 |
+
"coordinate": {
|
363 |
+
"DEFAULT": "K OW0 AO1 R D AH0 N AH0 T",
|
364 |
+
"VERB": "K OW0 AO1 R D AH0 N EY2 T"
|
365 |
+
},
|
366 |
+
"coordinates": {
|
367 |
+
"DEFAULT": "K OW0 AO1 R D AH0 N AH0 T S",
|
368 |
+
"VERB": "K OW0 AO1 R D AH0 N EY2 T S"
|
369 |
+
},
|
370 |
+
"counterbalance": {
|
371 |
+
"DEFAULT": "K AW2 N T ER0 B AE1 L AH0 N S",
|
372 |
+
"VERB": "K AW1 N T ER0 B AE2 L AH0 N S"
|
373 |
+
},
|
374 |
+
"counterbalances": {
|
375 |
+
"DEFAULT": "K AW1 N T ER0 B AE2 L AH0 N S IH0 Z",
|
376 |
+
"VERB": "K AW2 N T ER0 B AE1 L AH0 N S IH0 Z"
|
377 |
+
},
|
378 |
+
"crabbed": {
|
379 |
+
"DEFAULT": "K R AE1 B IH0 D",
|
380 |
+
"VERB": "K R AE1 B D"
|
381 |
+
},
|
382 |
+
"crooked": {
|
383 |
+
"DEFAULT": "K R UH1 K AH0 D",
|
384 |
+
"VERB": "K R UH1 K T"
|
385 |
+
},
|
386 |
+
"curate": {
|
387 |
+
"DEFAULT": "K Y UH1 R AH0 T",
|
388 |
+
"VERB": "K Y UH0 R AH1 T"
|
389 |
+
},
|
390 |
+
"cursed": {
|
391 |
+
"DEFAULT": "K ER1 S IH0 D",
|
392 |
+
"VERB": "K ER1 S T"
|
393 |
+
},
|
394 |
+
"decoy": {
|
395 |
+
"DEFAULT": "D IY1 K OY0",
|
396 |
+
"VERB": "D IY0 K OY1"
|
397 |
+
},
|
398 |
+
"decoys": {
|
399 |
+
"DEFAULT": "D IY1 K OY0 Z",
|
400 |
+
"VERB": "D IY0 K OY1 Z"
|
401 |
+
},
|
402 |
+
"decrease": {
|
403 |
+
"DEFAULT": "D IY1 K R IY2 S",
|
404 |
+
"VERB": "D IH0 K R IY1 S"
|
405 |
+
},
|
406 |
+
"decreases": {
|
407 |
+
"DEFAULT": "D IY1 K R IY2 S IH0 Z",
|
408 |
+
"VERB": "D IH0 K R IY1 S IH0 Z"
|
409 |
+
},
|
410 |
+
"defect": {
|
411 |
+
"DEFAULT": "D IY1 F EH0 K T",
|
412 |
+
"VERB": "D IH0 F EH1 K T"
|
413 |
+
},
|
414 |
+
"defects": {
|
415 |
+
"DEFAULT": "D IY1 F EH0 K T S",
|
416 |
+
"VERB": "D IH0 F EH1 K T S"
|
417 |
+
},
|
418 |
+
"degenerate": {
|
419 |
+
"DEFAULT": "D IH0 JH EH1 N ER0 AH0 T",
|
420 |
+
"VERB": "D IH0 JH EH1 N ER0 EY2 T"
|
421 |
+
},
|
422 |
+
"degenerates": {
|
423 |
+
"DEFAULT": "D IH0 JH EH1 N ER0 AH0 T S",
|
424 |
+
"VERB": "D IH0 JH EH1 N ER0 EY2 T S"
|
425 |
+
},
|
426 |
+
"delegate": {
|
427 |
+
"DEFAULT": "D EH1 L AH0 G AH0 T",
|
428 |
+
"VERB": "D EH1 L AH0 G EY2 T"
|
429 |
+
},
|
430 |
+
"delegates": {
|
431 |
+
"DEFAULT": "D EH1 L AH0 G AH0 T S",
|
432 |
+
"VERB": "D EH1 L AH0 G EY2 T S"
|
433 |
+
},
|
434 |
+
"deliberate": {
|
435 |
+
"DEFAULT": "D IH0 L IH1 B ER0 AH0 T",
|
436 |
+
"VERB": "D IH0 L IH1 B ER0 EY2 T"
|
437 |
+
},
|
438 |
+
"desert": {
|
439 |
+
"DEFAULT": "D EH1 Z ER0 T",
|
440 |
+
"VERB": "D IH0 Z ER1 T"
|
441 |
+
},
|
442 |
+
"deserts": {
|
443 |
+
"DEFAULT": "D EH1 Z ER0 T S",
|
444 |
+
"VERB": "D IH0 Z ER1 T S"
|
445 |
+
},
|
446 |
+
"desolate": {
|
447 |
+
"DEFAULT": "D EH1 S AH0 L AH0 T",
|
448 |
+
"VERB": "D EH1 S AH0 L EY2 T"
|
449 |
+
},
|
450 |
+
"diagnoses": {
|
451 |
+
"DEFAULT": "D AY2 AH0 G N OW1 S IY0 Z",
|
452 |
+
"VERB": "D AY1 AH0 G N OW2 Z IY0 Z"
|
453 |
+
},
|
454 |
+
"dictate": {
|
455 |
+
"DEFAULT": "D IH1 K T EY2 T",
|
456 |
+
"VERB": "D IH0 K T EY1 T"
|
457 |
+
},
|
458 |
+
"dictates": {
|
459 |
+
"DEFAULT": "D IH1 K T EY2 T S",
|
460 |
+
"VERB": "D IH0 K T EY1 T S"
|
461 |
+
},
|
462 |
+
"diffuse": {
|
463 |
+
"DEFAULT": "D IH0 F Y UW1 S",
|
464 |
+
"VERB": "D IH0 F Y UW1 Z"
|
465 |
+
},
|
466 |
+
"digest": {
|
467 |
+
"DEFAULT": "D AY1 JH EH0 S T",
|
468 |
+
"VERB": "D AY0 JH EH1 S T"
|
469 |
+
},
|
470 |
+
"digests": {
|
471 |
+
"DEFAULT": "D AY1 JH EH0 S T S",
|
472 |
+
"VERB": "D AY2 JH EH1 S T S"
|
473 |
+
},
|
474 |
+
"discard": {
|
475 |
+
"DEFAULT": "D IH1 S K AA0 R D",
|
476 |
+
"VERB": "D IH0 S K AA1 R D"
|
477 |
+
},
|
478 |
+
"discards": {
|
479 |
+
"DEFAULT": "D IH1 S K AA0 R D Z",
|
480 |
+
"VERB": "D IH0 S K AA1 R D Z"
|
481 |
+
},
|
482 |
+
"discharge": {
|
483 |
+
"DEFAULT": "D IH1 S CH AA2 R JH",
|
484 |
+
"VERB": "D IH0 S CH AA1 R JH"
|
485 |
+
},
|
486 |
+
"discharges": {
|
487 |
+
"DEFAULT": "D IH1 S CH AA2 R JH AH0 Z",
|
488 |
+
"VERB": "D IH0 S CH AA1 R JH AH0 Z"
|
489 |
+
},
|
490 |
+
"discount": {
|
491 |
+
"DEFAULT": "D IH1 S K AW0 N T",
|
492 |
+
"VERB": "D IH0 S K AW1 N T"
|
493 |
+
},
|
494 |
+
"discounts": {
|
495 |
+
"DEFAULT": "D IH1 S K AW2 N T S",
|
496 |
+
"VERB": "D IH0 S K AW1 N T S"
|
497 |
+
},
|
498 |
+
"discourse": {
|
499 |
+
"DEFAULT": "D IH1 S K AO0 R S",
|
500 |
+
"VERB": "D IH0 S K AO1 R S"
|
501 |
+
},
|
502 |
+
"discourses": {
|
503 |
+
"DEFAULT": "D IH1 S K AO0 R S IH0 Z",
|
504 |
+
"VERB": "D IH0 S K AO1 R S IH0 Z"
|
505 |
+
},
|
506 |
+
"document": {
|
507 |
+
"DEFAULT": "D AA1 K Y AH0 M AH0 N T",
|
508 |
+
"VERB": "D AA1 K Y UW0 M EH0 N T"
|
509 |
+
},
|
510 |
+
"documents": {
|
511 |
+
"DEFAULT": "D AA1 K Y AH0 M AH0 N T S",
|
512 |
+
"VERB": "D AA1 K Y UW0 M EH0 N T S"
|
513 |
+
},
|
514 |
+
"dogged": {
|
515 |
+
"DEFAULT": "D AO1 G D",
|
516 |
+
"VERB": "D AO1 G IH0 D"
|
517 |
+
},
|
518 |
+
"duplicate": {
|
519 |
+
"DEFAULT": "D UW1 P L AH0 K AH0 T",
|
520 |
+
"VERB": "D UW1 P L AH0 K EY2 T"
|
521 |
+
},
|
522 |
+
"duplicates": {
|
523 |
+
"DEFAULT": "D UW1 P L AH0 K AH0 T S",
|
524 |
+
"VERB": "D UW1 P L AH0 K EY2 T S"
|
525 |
+
},
|
526 |
+
"ejaculate": {
|
527 |
+
"DEFAULT": "IH0 JH AE1 K Y UW0 L AH0 T",
|
528 |
+
"VERB": "IH0 JH AE1 K Y UW0 L EY2 T"
|
529 |
+
},
|
530 |
+
"ejaculates": {
|
531 |
+
"DEFAULT": "IH0 JH AE1 K Y UW0 L AH0 T S",
|
532 |
+
"VERB": "IH0 JH AE1 K Y UW0 L EY2 T S"
|
533 |
+
},
|
534 |
+
"elaborate": {
|
535 |
+
"DEFAULT": "IH0 L AE1 B R AH0 T",
|
536 |
+
"VERB": "IH0 L AE1 B ER0 EY2 T"
|
537 |
+
},
|
538 |
+
"entrance": {
|
539 |
+
"DEFAULT": "EH1 N T R AH0 N S",
|
540 |
+
"VERB": "IH0 N T R AH1 N S"
|
541 |
+
},
|
542 |
+
"entrances": {
|
543 |
+
"DEFAULT": "EH1 N T R AH0 N S AH0 Z",
|
544 |
+
"VERB": "IH0 N T R AH1 N S AH0 Z"
|
545 |
+
},
|
546 |
+
"envelope": {
|
547 |
+
"DEFAULT": "EH1 N V AH0 L OW2 P",
|
548 |
+
"VERB": "IH0 N V EH1 L AH0 P"
|
549 |
+
},
|
550 |
+
"envelopes": {
|
551 |
+
"DEFAULT": "EH1 N V AH0 L OW2 P S",
|
552 |
+
"VERB": "IH0 N V EH1 L AH0 P S"
|
553 |
+
},
|
554 |
+
"escort": {
|
555 |
+
"DEFAULT": "EH1 S K AO0 R T",
|
556 |
+
"VERB": "EH0 S K AO1 R T"
|
557 |
+
},
|
558 |
+
"escorts": {
|
559 |
+
"DEFAULT": "EH1 S K AO0 R T S",
|
560 |
+
"VERB": "EH0 S K AO1 R T S"
|
561 |
+
},
|
562 |
+
"essay": {
|
563 |
+
"DEFAULT": "EH1 S EY2",
|
564 |
+
"VERB": "EH0 S EY1"
|
565 |
+
},
|
566 |
+
"essays": {
|
567 |
+
"DEFAULT": "EH1 S EY2 Z",
|
568 |
+
"VERB": "EH0 S EY1 Z"
|
569 |
+
},
|
570 |
+
"estimate": {
|
571 |
+
"DEFAULT": "EH1 S T AH0 M AH0 T",
|
572 |
+
"VERB": "EH1 S T AH0 M EY2 T"
|
573 |
+
},
|
574 |
+
"estimates": {
|
575 |
+
"DEFAULT": "EH1 S T AH0 M AH0 T S",
|
576 |
+
"VERB": "EH1 S T AH0 M EY2 T S"
|
577 |
+
},
|
578 |
+
"excess": {
|
579 |
+
"DEFAULT": "EH1 K S EH2 S",
|
580 |
+
"VERB": "IH0 K S EH1 S"
|
581 |
+
},
|
582 |
+
"excise": {
|
583 |
+
"DEFAULT": "EH1 K S AY0 Z",
|
584 |
+
"VERB": "EH0 K S AY1 S"
|
585 |
+
},
|
586 |
+
"excuse": {
|
587 |
+
"DEFAULT": "IH0 K S K Y UW1 S",
|
588 |
+
"VERB": "IH0 K S K Y UW1 Z"
|
589 |
+
},
|
590 |
+
"excuses": {
|
591 |
+
"DEFAULT": "IH0 K S K Y UW1 S IH0 Z",
|
592 |
+
"VERB": "IH0 K S K Y UW1 Z IH0 Z"
|
593 |
+
},
|
594 |
+
"expatriate": {
|
595 |
+
"DEFAULT": "EH0 K S P EY1 T R IY0 AH0 T",
|
596 |
+
"VERB": "EH0 K S P EY1 T R IY0 EY2 T"
|
597 |
+
},
|
598 |
+
"expatriates": {
|
599 |
+
"DEFAULT": "EH0 K S P EY1 T R IY0 AH0 T S",
|
600 |
+
"VERB": "EH0 K S P EY1 T R IY0 EY2 T S"
|
601 |
+
},
|
602 |
+
"exploit": {
|
603 |
+
"DEFAULT": "EH2 K S P L OY1 T",
|
604 |
+
"VERB": "EH1 K S P L OY2 T"
|
605 |
+
},
|
606 |
+
"exploits": {
|
607 |
+
"DEFAULT": "EH2 K S P L OY1 T S",
|
608 |
+
"VERB": "EH1 K S P L OY2 T S"
|
609 |
+
},
|
610 |
+
"export": {
|
611 |
+
"DEFAULT": "EH1 K S P AO0 R T",
|
612 |
+
"VERB": "IH0 K S P AO1 R T"
|
613 |
+
},
|
614 |
+
"exports": {
|
615 |
+
"DEFAULT": "EH1 K S P AO0 R T S",
|
616 |
+
"VERB": "IH0 K S P AO1 R T S"
|
617 |
+
},
|
618 |
+
"extract": {
|
619 |
+
"DEFAULT": "EH1 K S T R AE2 K T",
|
620 |
+
"VERB": "IH0 K S T R AE1 K T"
|
621 |
+
},
|
622 |
+
"extracts": {
|
623 |
+
"DEFAULT": "EH1 K S T R AE2 K T S",
|
624 |
+
"VERB": "IH0 K S T R AE1 K T S"
|
625 |
+
},
|
626 |
+
"ferment": {
|
627 |
+
"DEFAULT": "F ER1 M EH0 N T",
|
628 |
+
"VERB": "F ER0 M EH1 N T"
|
629 |
+
},
|
630 |
+
"ferments": {
|
631 |
+
"DEFAULT": "F ER1 M EH0 N T S",
|
632 |
+
"VERB": "F ER0 M EH1 N T S"
|
633 |
+
},
|
634 |
+
"fragment": {
|
635 |
+
"DEFAULT": "F R AE0 G M EH1 N T",
|
636 |
+
"VERB": "F R AE1 G M AH0 N T"
|
637 |
+
},
|
638 |
+
"fragments": {
|
639 |
+
"DEFAULT": "F R AE1 G M AH0 N T S",
|
640 |
+
"VERB": "F R AE0 G M EH1 N T S"
|
641 |
+
},
|
642 |
+
"frequent": {
|
643 |
+
"DEFAULT": "F R IY1 K W AH0 N T",
|
644 |
+
"VERB": "F R IY1 K W EH2 N T"
|
645 |
+
},
|
646 |
+
"graduate": {
|
647 |
+
"DEFAULT": "G R AE1 JH AH0 W AH0 T",
|
648 |
+
"VERB": "G R AE1 JH AH0 W EY2 T"
|
649 |
+
},
|
650 |
+
"graduates": {
|
651 |
+
"DEFAULT": "G R AE1 JH AH0 W AH0 T S",
|
652 |
+
"VERB": "G R AE1 JH AH0 W EY2 T S"
|
653 |
+
},
|
654 |
+
"house": {
|
655 |
+
"DEFAULT": "HH AW1 S",
|
656 |
+
"VERB": "HH AW1 Z"
|
657 |
+
},
|
658 |
+
"impact": {
|
659 |
+
"DEFAULT": "IH1 M P AE0 K T",
|
660 |
+
"VERB": "IH2 M P AE1 K T"
|
661 |
+
},
|
662 |
+
"impacts": {
|
663 |
+
"DEFAULT": "IH1 M P AE0 K T S",
|
664 |
+
"VERB": "IH2 M P AE1 K T S"
|
665 |
+
},
|
666 |
+
"implant": {
|
667 |
+
"DEFAULT": "IH1 M P L AE2 N T",
|
668 |
+
"VERB": "IH2 M P L AE1 N T"
|
669 |
+
},
|
670 |
+
"implants": {
|
671 |
+
"DEFAULT": "IH1 M P L AE2 N T S",
|
672 |
+
"VERB": "IH2 M P L AE1 N T S"
|
673 |
+
},
|
674 |
+
"implement": {
|
675 |
+
"DEFAULT": "IH1 M P L AH0 M AH0 N T",
|
676 |
+
"VERB": "IH1 M P L AH0 M EH0 N T"
|
677 |
+
},
|
678 |
+
"implements": {
|
679 |
+
"DEFAULT": "IH1 M P L AH0 M AH0 N T S",
|
680 |
+
"VERB": "IH1 M P L AH0 M EH0 N T S"
|
681 |
+
},
|
682 |
+
"import": {
|
683 |
+
"DEFAULT": "IH1 M P AO2 R T",
|
684 |
+
"VERB": "IH2 M P AO1 R T"
|
685 |
+
},
|
686 |
+
"imports": {
|
687 |
+
"DEFAULT": "IH1 M P AO2 R T S",
|
688 |
+
"VERB": "IH2 M P AO1 R T S"
|
689 |
+
},
|
690 |
+
"impress": {
|
691 |
+
"DEFAULT": "IH1 M P R EH0 S",
|
692 |
+
"VERB": "IH0 M P R EH1 S"
|
693 |
+
},
|
694 |
+
"imprint": {
|
695 |
+
"DEFAULT": "IH2 M P R IH1 N T",
|
696 |
+
"VERB": "IH1 M P R IH0 N T"
|
697 |
+
},
|
698 |
+
"imprints": {
|
699 |
+
"DEFAULT": "IH1 M P R IH0 N T S",
|
700 |
+
"VERB": "IH2 M P R IH1 N T S"
|
701 |
+
},
|
702 |
+
"incense": {
|
703 |
+
"DEFAULT": "IH1 N S EH2 N S",
|
704 |
+
"VERB": "IH2 N S EH1 N S"
|
705 |
+
},
|
706 |
+
"incline": {
|
707 |
+
"DEFAULT": "IH1 N K L AY0 N",
|
708 |
+
"VERB": "IH2 N K L AY1 N"
|
709 |
+
},
|
710 |
+
"inclines": {
|
711 |
+
"DEFAULT": "IH1 N K L AY0 N Z",
|
712 |
+
"VERB": "IH2 N K L AY1 N Z"
|
713 |
+
},
|
714 |
+
"incorporate": {
|
715 |
+
"DEFAULT": "IH2 N K AO1 R P ER0 AH0 T",
|
716 |
+
"VERB": "IH2 N K AO1 R P ER0 EY2 T"
|
717 |
+
},
|
718 |
+
"increase": {
|
719 |
+
"DEFAULT": "IH1 N K R IY2 S",
|
720 |
+
"VERB": "IH2 N K R IY1 S"
|
721 |
+
},
|
722 |
+
"increases": {
|
723 |
+
"DEFAULT": "IH1 N K R IY2 S IH0 Z",
|
724 |
+
"VERB": "IH2 N K R IY1 S IH0 Z"
|
725 |
+
},
|
726 |
+
"indent": {
|
727 |
+
"DEFAULT": "IH1 N D EH0 N T",
|
728 |
+
"VERB": "IH2 N D EH1 N T"
|
729 |
+
},
|
730 |
+
"indents": {
|
731 |
+
"DEFAULT": "IH1 N D EH0 N T S",
|
732 |
+
"VERB": "IH2 N D EH1 N T S"
|
733 |
+
},
|
734 |
+
"inebriate": {
|
735 |
+
"DEFAULT": "IH2 N EH1 B R IY0 AH0 T",
|
736 |
+
"VERB": "IH2 N EH1 B R IY0 EY2 T"
|
737 |
+
},
|
738 |
+
"inebriates": {
|
739 |
+
"DEFAULT": "IH2 N EH1 B R IY0 AH0 T S",
|
740 |
+
"VERB": "IH2 N EH1 B R IY0 EY2 T S"
|
741 |
+
},
|
742 |
+
"initiate": {
|
743 |
+
"DEFAULT": "IH2 N IH1 SH IY0 AH0 T",
|
744 |
+
"VERB": "IH2 N IH1 SH IY0 EY2 T"
|
745 |
+
},
|
746 |
+
"initiates": {
|
747 |
+
"DEFAULT": "IH2 N IH1 SH IY0 AH0 T S",
|
748 |
+
"VERB": "IH2 N IH1 SH IY0 EY2 T S"
|
749 |
+
},
|
750 |
+
"inlay": {
|
751 |
+
"DEFAULT": "IH1 N L EY2",
|
752 |
+
"VERB": "IH2 N L EY1"
|
753 |
+
},
|
754 |
+
"inlays": {
|
755 |
+
"DEFAULT": "IH1 N L EY2 Z",
|
756 |
+
"VERB": "IH2 N L EY1 Z"
|
757 |
+
},
|
758 |
+
"insert": {
|
759 |
+
"DEFAULT": "IH1 N S ER2 T",
|
760 |
+
"VERB": "IH2 N S ER1 T"
|
761 |
+
},
|
762 |
+
"inserts": {
|
763 |
+
"DEFAULT": "IH1 N S ER2 T S",
|
764 |
+
"VERB": "IH2 N S ER1 T S"
|
765 |
+
},
|
766 |
+
"inset": {
|
767 |
+
"DEFAULT": "IH1 N S EH2 T",
|
768 |
+
"VERB": "IH2 N S EH1 T"
|
769 |
+
},
|
770 |
+
"insets": {
|
771 |
+
"DEFAULT": "IH1 N S EH2 T S",
|
772 |
+
"VERB": "IH2 N S EH1 T S"
|
773 |
+
},
|
774 |
+
"instinct": {
|
775 |
+
"DEFAULT": "IH1 N S T IH0 NG K T",
|
776 |
+
"VERB": "IH2 N S T IH1 NG K T"
|
777 |
+
},
|
778 |
+
"insult": {
|
779 |
+
"DEFAULT": "IH1 N S AH2 L T",
|
780 |
+
"VERB": "IH2 N S AH1 L T"
|
781 |
+
},
|
782 |
+
"insults": {
|
783 |
+
"DEFAULT": "IH1 N S AH2 L T S",
|
784 |
+
"VERB": "IH2 N S AH1 L T S"
|
785 |
+
},
|
786 |
+
"interchange": {
|
787 |
+
"DEFAULT": "IH1 N T ER0 CH EY2 N JH",
|
788 |
+
"VERB": "IH2 T ER0 CH EY1 N JH"
|
789 |
+
},
|
790 |
+
"interchanges": {
|
791 |
+
"DEFAULT": "IH1 N T ER0 CH EY2 N JH IH0 Z",
|
792 |
+
"VERB": "IH2 T ER0 CH EY1 N JH IH0 Z"
|
793 |
+
},
|
794 |
+
"interdict": {
|
795 |
+
"DEFAULT": "IH1 N T ER0 D IH2 K T",
|
796 |
+
"VERB": "IH2 N T ER0 D IH1 K T"
|
797 |
+
},
|
798 |
+
"interdicts": {
|
799 |
+
"DEFAULT": "IH1 N T ER0 D IH2 K T S",
|
800 |
+
"VERB": "IH2 N T ER0 D IH1 K T S"
|
801 |
+
},
|
802 |
+
"intern": {
|
803 |
+
"DEFAULT": "IH1 N T ER0 N",
|
804 |
+
"VERB": "IH0 N T ER1 N"
|
805 |
+
},
|
806 |
+
"interns": {
|
807 |
+
"DEFAULT": "IH1 N T ER0 N Z",
|
808 |
+
"VERB": "IH0 N T ER1 N Z"
|
809 |
+
},
|
810 |
+
"intimate": {
|
811 |
+
"DEFAULT": "IH1 N T AH0 M AH0 T",
|
812 |
+
"VERB": "IH1 N T IH0 M EY2 T"
|
813 |
+
},
|
814 |
+
"intimates": {
|
815 |
+
"DEFAULT": "IH1 N T AH0 M AH0 T S",
|
816 |
+
"VERB": "IH1 N T IH0 M EY2 T S"
|
817 |
+
},
|
818 |
+
"intrigue": {
|
819 |
+
"DEFAULT": "IH1 N T R IY0 G",
|
820 |
+
"VERB": "IH2 N T R IY1 G"
|
821 |
+
},
|
822 |
+
"introvert": {
|
823 |
+
"DEFAULT": "IH1 N T R AO0 V ER2 T",
|
824 |
+
"VERB": "IH2 N T R AO0 V ER1 T"
|
825 |
+
},
|
826 |
+
"introverts": {
|
827 |
+
"DEFAULT": "IH1 N T R AO0 V ER2 T S",
|
828 |
+
"VERB": "IH2 N T R AO0 V ER1 T S"
|
829 |
+
},
|
830 |
+
"inverse": {
|
831 |
+
"DEFAULT": "IH2 N V ER1 S",
|
832 |
+
"VERB": "IH1 N V ER0 S"
|
833 |
+
},
|
834 |
+
"invite": {
|
835 |
+
"DEFAULT": "IH1 N V AY0 T",
|
836 |
+
"VERB": "IH2 N V AY1 T"
|
837 |
+
},
|
838 |
+
"invites": {
|
839 |
+
"DEFAULT": "IH1 N V AY0 T S",
|
840 |
+
"VERB": "IH2 N V AY1 T S"
|
841 |
+
},
|
842 |
+
"jagged": {
|
843 |
+
"DEFAULT": "JH AE1 G IH0 D",
|
844 |
+
"VERB": "JH AE1 G D"
|
845 |
+
},
|
846 |
+
"learned": {
|
847 |
+
"DEFAULT": "L ER1 N D",
|
848 |
+
"VERB": "L ER1 N IH0 D"
|
849 |
+
},
|
850 |
+
"legitimate": {
|
851 |
+
"DEFAULT": "L AH0 JH IH1 T AH0 M AH0 T",
|
852 |
+
"VERB": "L AH0 JH IH1 T AH0 M EY2 T"
|
853 |
+
},
|
854 |
+
"live": {
|
855 |
+
"DEFAULT": "L AY1 V",
|
856 |
+
"VERB": "L IH1 V"
|
857 |
+
},
|
858 |
+
"lives": {
|
859 |
+
"DEFAULT": "L AY1 V Z",
|
860 |
+
"VERB": "L IH1 V Z"
|
861 |
+
},
|
862 |
+
"mandate": {
|
863 |
+
"DEFAULT": "M AE2 N D EY1 T",
|
864 |
+
"VERB": "M AE1 N D EY2 T"
|
865 |
+
},
|
866 |
+
"misconduct": {
|
867 |
+
"DEFAULT": "M IH2 S K AA0 N D AH1 K T",
|
868 |
+
"VERB": "M IH2 S K AA1 N D AH0 K T"
|
869 |
+
},
|
870 |
+
"misprint": {
|
871 |
+
"DEFAULT": "M IH1 S P R IH0 N T",
|
872 |
+
"VERB": "M IH2 S P R IH1 N T"
|
873 |
+
},
|
874 |
+
"misprints": {
|
875 |
+
"DEFAULT": "M IH1 S P R IH0 N T S",
|
876 |
+
"VERB": "M IH2 S P R IH1 N T S"
|
877 |
+
},
|
878 |
+
"misuse": {
|
879 |
+
"DEFAULT": "M IH0 S Y UW1 Z",
|
880 |
+
"VERB": "M IH0 S Y UW1 S"
|
881 |
+
},
|
882 |
+
"misuses": {
|
883 |
+
"DEFAULT": "M IH0 S Y UW1 S IH0 Z",
|
884 |
+
"VERB": "M IH0 S Y UW1 Z IH0 Z"
|
885 |
+
},
|
886 |
+
"moderate": {
|
887 |
+
"DEFAULT": "M AA1 D ER0 AH0 T",
|
888 |
+
"VERB": "M AA1 D ER0 EY2 T"
|
889 |
+
},
|
890 |
+
"moderates": {
|
891 |
+
"DEFAULT": "M AA1 D ER0 AH0 T S",
|
892 |
+
"VERB": "M AA1 D ER0 EY2 T S"
|
893 |
+
},
|
894 |
+
"mouth": {
|
895 |
+
"DEFAULT": "M AW1 DH",
|
896 |
+
"VERB": "M AW1 TH"
|
897 |
+
},
|
898 |
+
"mouths": {
|
899 |
+
"DEFAULT": "M AW1 TH S",
|
900 |
+
"VERB": "M AW1 DH Z"
|
901 |
+
},
|
902 |
+
"object": {
|
903 |
+
"DEFAULT": "AA1 B JH EH0 K T",
|
904 |
+
"VERB": "AH0 B JH EH1 K T"
|
905 |
+
},
|
906 |
+
"objects": {
|
907 |
+
"DEFAULT": "AA1 B JH EH0 K T S",
|
908 |
+
"VERB": "AH0 B JH EH1 K T S"
|
909 |
+
},
|
910 |
+
"ornament": {
|
911 |
+
"DEFAULT": "AO1 R N AH0 M AH0 N T",
|
912 |
+
"VERB": "AO1 R N AH0 M EH0 N T"
|
913 |
+
},
|
914 |
+
"ornaments": {
|
915 |
+
"DEFAULT": "AO1 R N AH0 M AH0 N T S",
|
916 |
+
"VERB": "AO1 R N AH0 M EH0 N T S"
|
917 |
+
},
|
918 |
+
"overcharge": {
|
919 |
+
"DEFAULT": "OW1 V ER0 CH AA2 R JH",
|
920 |
+
"VERB": "OW2 V ER0 CH AA1 R JH"
|
921 |
+
},
|
922 |
+
"overcharges": {
|
923 |
+
"DEFAULT": "OW1 V ER0 CH AA2 R JH IH0 Z",
|
924 |
+
"VERB": "OW2 V ER0 CH AA1 R JH IH0 Z"
|
925 |
+
},
|
926 |
+
"overflow": {
|
927 |
+
"DEFAULT": "OW1 V ER0 F L OW2",
|
928 |
+
"VERB": "OW2 V ER0 F L OW1"
|
929 |
+
},
|
930 |
+
"overflows": {
|
931 |
+
"DEFAULT": "OW1 V ER0 F L OW2 Z",
|
932 |
+
"VERB": "OW2 V ER0 F L OW1 Z"
|
933 |
+
},
|
934 |
+
"overhang": {
|
935 |
+
"DEFAULT": "OW1 V ER0 HH AE2 NG",
|
936 |
+
"VERB": "OW2 V ER0 HH AE1 NG"
|
937 |
+
},
|
938 |
+
"overhangs": {
|
939 |
+
"DEFAULT": "OW1 V ER0 HH AE2 NG Z",
|
940 |
+
"VERB": "OW2 V ER0 HH AE1 NG Z"
|
941 |
+
},
|
942 |
+
"overhaul": {
|
943 |
+
"DEFAULT": "OW1 V ER0 HH AO2 L",
|
944 |
+
"VERB": "OW2 V ER0 HH AO1 L"
|
945 |
+
},
|
946 |
+
"overhauls": {
|
947 |
+
"DEFAULT": "OW1 V ER0 HH AO2 L Z",
|
948 |
+
"VERB": "OW2 V ER0 HH AO1 L Z"
|
949 |
+
},
|
950 |
+
"overlap": {
|
951 |
+
"DEFAULT": "OW1 V ER0 L AE2 P",
|
952 |
+
"VERB": "OW2 V ER0 L AE1 P"
|
953 |
+
},
|
954 |
+
"overlaps": {
|
955 |
+
"DEFAULT": "OW1 V ER0 L AE2 P S",
|
956 |
+
"VERB": "OW2 V ER0 L AE1 P S"
|
957 |
+
},
|
958 |
+
"overlay": {
|
959 |
+
"DEFAULT": "OW1 V ER0 L EY2",
|
960 |
+
"VERB": "OW2 V ER0 L EY1"
|
961 |
+
},
|
962 |
+
"overlays": {
|
963 |
+
"DEFAULT": "OW1 V ER0 L EY2 Z",
|
964 |
+
"VERB": "OW2 V ER0 L EY1 Z"
|
965 |
+
},
|
966 |
+
"overwork": {
|
967 |
+
"DEFAULT": "OW1 V ER0 W ER2 K",
|
968 |
+
"VERB": "OW2 V ER0 W ER1 K"
|
969 |
+
},
|
970 |
+
"perfect": {
|
971 |
+
"DEFAULT": "P ER1 F IH2 K T",
|
972 |
+
"VERB": "P ER0 F EH1 K T"
|
973 |
+
},
|
974 |
+
"perfume": {
|
975 |
+
"DEFAULT": "P ER1 F Y UW0 M",
|
976 |
+
"VERB": "P ER0 F Y UW1 M"
|
977 |
+
},
|
978 |
+
"perfumes": {
|
979 |
+
"DEFAULT": "P ER1 F Y UW0 M Z",
|
980 |
+
"VERB": "P ER0 F Y UW1 M Z"
|
981 |
+
},
|
982 |
+
"permit": {
|
983 |
+
"DEFAULT": "P ER1 M IH2 T",
|
984 |
+
"VERB": "P ER0 M IH1 T"
|
985 |
+
},
|
986 |
+
"permits": {
|
987 |
+
"DEFAULT": "P ER1 M IH2 T S",
|
988 |
+
"VERB": "P ER0 M IH1 T S"
|
989 |
+
},
|
990 |
+
"pervert": {
|
991 |
+
"DEFAULT": "P ER1 V ER0 T",
|
992 |
+
"VERB": "P ER0 V ER1 T"
|
993 |
+
},
|
994 |
+
"perverts": {
|
995 |
+
"DEFAULT": "P ER1 V ER0 T S",
|
996 |
+
"VERB": "P ER0 V ER1 T S"
|
997 |
+
},
|
998 |
+
"pontificate": {
|
999 |
+
"DEFAULT": "P AA0 N T IH1 F AH0 K EY2 T",
|
1000 |
+
"VERB": "P AA0 N T IH1 F AH0 K AH0 T"
|
1001 |
+
},
|
1002 |
+
"pontificates": {
|
1003 |
+
"DEFAULT": "P AA0 N T IH1 F AH0 K AH0 T S",
|
1004 |
+
"VERB": "P AA0 N T IH1 F AH0 K EY2 T S"
|
1005 |
+
},
|
1006 |
+
"precipitate": {
|
1007 |
+
"DEFAULT": "P R IH0 S IH1 P IH0 T EY2 T",
|
1008 |
+
"VERB": "P R IH0 S IH1 P IH0 T AH0 T"
|
1009 |
+
},
|
1010 |
+
"predicate": {
|
1011 |
+
"DEFAULT": "P R EH1 D AH0 K EY2 T",
|
1012 |
+
"VERB": "P R EH1 D IH0 K AH0 T"
|
1013 |
+
},
|
1014 |
+
"predicates": {
|
1015 |
+
"DEFAULT": "P R EH1 D IH0 K AH0 T S",
|
1016 |
+
"VERB": "P R EH1 D AH0 K EY2 T S"
|
1017 |
+
},
|
1018 |
+
"prefix": {
|
1019 |
+
"DEFAULT": "P R IY1 F IH0 K S",
|
1020 |
+
"VERB": "P R IY2 F IH1 K S"
|
1021 |
+
},
|
1022 |
+
"prefixes": {
|
1023 |
+
"DEFAULT": "P R IY1 F IH0 K S IH0 JH",
|
1024 |
+
"VERB": "P R IY2 F IH1 K S IH0 JH"
|
1025 |
+
},
|
1026 |
+
"presage": {
|
1027 |
+
"DEFAULT": "P R EH1 S IH0 JH",
|
1028 |
+
"VERB": "P R EH2 S IH1 JH"
|
1029 |
+
},
|
1030 |
+
"presages": {
|
1031 |
+
"DEFAULT": "P R EH1 S IH0 JH IH0 JH",
|
1032 |
+
"VERB": "P R EH2 S IH1 JH IH0 JH"
|
1033 |
+
},
|
1034 |
+
"present": {
|
1035 |
+
"DEFAULT": "P R EH1 Z AH0 N T",
|
1036 |
+
"VERB": "P R IY0 Z EH1 N T"
|
1037 |
+
},
|
1038 |
+
"presents": {
|
1039 |
+
"DEFAULT": "P R EH1 Z AH0 N T S",
|
1040 |
+
"VERB": "P R IY0 Z EH1 N T S"
|
1041 |
+
},
|
1042 |
+
"proceeds": {
|
1043 |
+
"DEFAULT": "P R OW1 S IY0 D Z",
|
1044 |
+
"VERB": "P R AH0 S IY1 D Z"
|
1045 |
+
},
|
1046 |
+
"process": {
|
1047 |
+
"DEFAULT": "P R AA1 S EH2 S",
|
1048 |
+
"VERB": "P R AO2 S EH1 S"
|
1049 |
+
},
|
1050 |
+
"processes": {
|
1051 |
+
"DEFAULT": "P R AO2 S EH1 S AH0 Z",
|
1052 |
+
"VERB": "P R AA1 S EH0 S AH0 Z"
|
1053 |
+
},
|
1054 |
+
"processing": {
|
1055 |
+
"DEFAULT": "P R AA1 S EH0 S IH0 NG",
|
1056 |
+
"VERB": "P R AA0 S EH1 S IH0 NG"
|
1057 |
+
},
|
1058 |
+
"produce": {
|
1059 |
+
"DEFAULT": "P R OW1 D UW0 S",
|
1060 |
+
"VERB": "P R AH0 D UW1 S"
|
1061 |
+
},
|
1062 |
+
"progress": {
|
1063 |
+
"DEFAULT": "P R AA1 G R EH2 S",
|
1064 |
+
"VERB": "P R AH0 G R EH1 S"
|
1065 |
+
},
|
1066 |
+
"progresses": {
|
1067 |
+
"DEFAULT": "P R AA1 G R EH2 S AH0 Z",
|
1068 |
+
"VERB": "P R OW0 G R EH1 S AH0 Z"
|
1069 |
+
},
|
1070 |
+
"project": {
|
1071 |
+
"DEFAULT": "P R AA1 JH EH0 K T",
|
1072 |
+
"VERB": "P R AA0 JH EH1 K T"
|
1073 |
+
},
|
1074 |
+
"projects": {
|
1075 |
+
"DEFAULT": "P R AA1 JH EH0 K T S",
|
1076 |
+
"VERB": "P R AA0 JH EH1 K T S"
|
1077 |
+
},
|
1078 |
+
"prospect": {
|
1079 |
+
"DEFAULT": "P R AA1 S P EH0 K T",
|
1080 |
+
"VERB": "P R AH2 S P EH1 K T"
|
1081 |
+
},
|
1082 |
+
"prospects": {
|
1083 |
+
"DEFAULT": "P R AA1 S P EH0 K T S",
|
1084 |
+
"VERB": "P R AH2 S P EH1 K T S"
|
1085 |
+
},
|
1086 |
+
"prostrate": {
|
1087 |
+
"DEFAULT": "P R AA1 S T R EY0 T",
|
1088 |
+
"VERB": "P R AA0 S T R EY1 T"
|
1089 |
+
},
|
1090 |
+
"protest": {
|
1091 |
+
"DEFAULT": "P R OW1 T EH2 S T",
|
1092 |
+
"VERB": "P R AH0 T EH1 S T"
|
1093 |
+
},
|
1094 |
+
"protests": {
|
1095 |
+
"DEFAULT": "P R OW1 T EH2 S T S",
|
1096 |
+
"VERB": "P R AH0 T EH1 S T S"
|
1097 |
+
},
|
1098 |
+
"purport": {
|
1099 |
+
"DEFAULT": "P ER1 P AO2 R T",
|
1100 |
+
"VERB": "P ER0 P AO1 R T"
|
1101 |
+
},
|
1102 |
+
"quadruple": {
|
1103 |
+
"DEFAULT": "K W AA0 D R UW1 P AH0 L",
|
1104 |
+
"VERB": "K W AA1 D R UW0 P AH0 L"
|
1105 |
+
},
|
1106 |
+
"quadruples": {
|
1107 |
+
"DEFAULT": "K W AA1 D R UW0 P AH0 L Z",
|
1108 |
+
"VERB": "K W AA0 D R UW1 P AH0 L Z"
|
1109 |
+
},
|
1110 |
+
"ragged": {
|
1111 |
+
"DEFAULT": "R AE1 G AH0 D",
|
1112 |
+
"VERB": "R AE1 G D"
|
1113 |
+
},
|
1114 |
+
"rampage": {
|
1115 |
+
"DEFAULT": "R AE1 M P EY2 JH",
|
1116 |
+
"VERB": "R AE2 M P EY1 JH"
|
1117 |
+
},
|
1118 |
+
"rampages": {
|
1119 |
+
"DEFAULT": "R AE1 M P EY2 JH IH0 Z",
|
1120 |
+
"VERB": "R AE2 M P EY1 JH IH0 Z"
|
1121 |
+
},
|
1122 |
+
"read": {
|
1123 |
+
"DEFAULT": "R IY1 D",
|
1124 |
+
"VBD": "R EH1 D",
|
1125 |
+
"VBN": "R EH1 D",
|
1126 |
+
"VBP": "R EH1 D"
|
1127 |
+
},
|
1128 |
+
"rebel": {
|
1129 |
+
"DEFAULT": "R IH0 B EH1 L",
|
1130 |
+
"VERB": "R EH1 B AH0 L"
|
1131 |
+
},
|
1132 |
+
"rebels": {
|
1133 |
+
"DEFAULT": "R EH1 B AH0 L Z",
|
1134 |
+
"VERB": "R IH0 B EH1 L Z"
|
1135 |
+
},
|
1136 |
+
"rebound": {
|
1137 |
+
"DEFAULT": "R IY1 B AW0 N D",
|
1138 |
+
"VERB": "R IY0 B AW1 N D"
|
1139 |
+
},
|
1140 |
+
"rebounds": {
|
1141 |
+
"DEFAULT": "R IY1 B AW0 N D Z",
|
1142 |
+
"VERB": "R IY0 B AW1 N D Z"
|
1143 |
+
},
|
1144 |
+
"recall": {
|
1145 |
+
"DEFAULT": "R IY1 K AO2 L",
|
1146 |
+
"VERB": "R IH0 K AO1 L"
|
1147 |
+
},
|
1148 |
+
"recalls": {
|
1149 |
+
"DEFAULT": "R IY1 K AO2 L Z",
|
1150 |
+
"VERB": "R IH0 K AO1 L Z"
|
1151 |
+
},
|
1152 |
+
"recap": {
|
1153 |
+
"DEFAULT": "R IY1 K AE2 P",
|
1154 |
+
"VERB": "R IH0 K AE1 P"
|
1155 |
+
},
|
1156 |
+
"recapped": {
|
1157 |
+
"DEFAULT": "R IY1 K AE2 P T",
|
1158 |
+
"VERB": "R IH0 K AE1 P T"
|
1159 |
+
},
|
1160 |
+
"recapping": {
|
1161 |
+
"DEFAULT": "R IY1 K AE2 P IH0 NG",
|
1162 |
+
"VERB": "R IH0 K AE1 P IH0 NG"
|
1163 |
+
},
|
1164 |
+
"recaps": {
|
1165 |
+
"DEFAULT": "R IY1 K AE2 P S",
|
1166 |
+
"VERB": "R IH0 K AE1 P S"
|
1167 |
+
},
|
1168 |
+
"record": {
|
1169 |
+
"DEFAULT": "R EH1 K ER0 D",
|
1170 |
+
"VERB": "R IH0 K AO1 R D"
|
1171 |
+
},
|
1172 |
+
"records": {
|
1173 |
+
"DEFAULT": "R EH1 K ER0 D Z",
|
1174 |
+
"VERB": "R IH0 K AO1 R D Z"
|
1175 |
+
},
|
1176 |
+
"recount": {
|
1177 |
+
"DEFAULT": " R IH1 K AW0 N T",
|
1178 |
+
"VERB": "R IY2 K AW1 N T"
|
1179 |
+
},
|
1180 |
+
"recounts": {
|
1181 |
+
"DEFAULT": " R IH1 K AW0 N T S",
|
1182 |
+
"VERB": "R IY2 K AW1 N T S"
|
1183 |
+
},
|
1184 |
+
"refill": {
|
1185 |
+
"DEFAULT": "R IY1 F IH0 L",
|
1186 |
+
"VERB": "R IY0 F IH1 L"
|
1187 |
+
},
|
1188 |
+
"refills": {
|
1189 |
+
"DEFAULT": "R IY1 F IH0 L Z",
|
1190 |
+
"VERB": "R IY0 F IH1 L Z"
|
1191 |
+
},
|
1192 |
+
"refit": {
|
1193 |
+
"DEFAULT": "R IY1 F IH0 T",
|
1194 |
+
"VERB": "R IY0 F IH1 T"
|
1195 |
+
},
|
1196 |
+
"refits": {
|
1197 |
+
"DEFAULT": "R IY1 F IH0 T S",
|
1198 |
+
"VERB": "R IY0 F IH1 T S"
|
1199 |
+
},
|
1200 |
+
"refresh": {
|
1201 |
+
"DEFAULT": "R IH1 F R EH0 SH",
|
1202 |
+
"VERB": "R IH0 F R EH1 SH"
|
1203 |
+
},
|
1204 |
+
"refund": {
|
1205 |
+
"DEFAULT": "R IY1 F AH2 N D",
|
1206 |
+
"VERB": "R IH0 F AH1 N D"
|
1207 |
+
},
|
1208 |
+
"refunds": {
|
1209 |
+
"DEFAULT": "R IY1 F AH2 N D Z",
|
1210 |
+
"VERB": "R IH0 F AH1 N D Z"
|
1211 |
+
},
|
1212 |
+
"refuse": {
|
1213 |
+
"DEFAULT": "R EH1 F Y UW2 Z",
|
1214 |
+
"VERB": "R IH0 F Y UW1 Z"
|
1215 |
+
},
|
1216 |
+
"regenerate": {
|
1217 |
+
"DEFAULT": "R IY0 JH EH1 N ER0 AH0 T",
|
1218 |
+
"VERB": "R IY0 JH EH1 N ER0 EY2 T"
|
1219 |
+
},
|
1220 |
+
"rehash": {
|
1221 |
+
"DEFAULT": "R IY1 HH AE0 SH",
|
1222 |
+
"VERB": "R IY0 HH AE1 SH"
|
1223 |
+
},
|
1224 |
+
"rehashes": {
|
1225 |
+
"DEFAULT": "R IY1 HH AE0 SH IH0 Z",
|
1226 |
+
"VERB": "R IY0 HH AE1 SH IH0 Z"
|
1227 |
+
},
|
1228 |
+
"reincarnate": {
|
1229 |
+
"DEFAULT": "R IY2 IH0 N K AA1 R N AH0 T",
|
1230 |
+
"VERB": "R IY2 IH0 N K AA1 R N EY2 T"
|
1231 |
+
},
|
1232 |
+
"reject": {
|
1233 |
+
"DEFAULT": "R IY1 JH EH0 K T",
|
1234 |
+
"VERB": "R IH0 JH EH1 K T"
|
1235 |
+
},
|
1236 |
+
"rejects": {
|
1237 |
+
"DEFAULT": "R IY1 JH EH0 K T S",
|
1238 |
+
"VERB": "R IH0 JH EH1 K T S"
|
1239 |
+
},
|
1240 |
+
"relay": {
|
1241 |
+
"DEFAULT": "R IY1 L EY2",
|
1242 |
+
"VERB": "R IY2 L EY1"
|
1243 |
+
},
|
1244 |
+
"relaying": {
|
1245 |
+
"DEFAULT": "R IY1 L EY2 IH0 NG",
|
1246 |
+
"VERB": "R IY2 L EY1 IH0 NG"
|
1247 |
+
},
|
1248 |
+
"relays": {
|
1249 |
+
"DEFAULT": "R IY1 L EY2 Z",
|
1250 |
+
"VERB": "R IY2 L EY1 Z"
|
1251 |
+
},
|
1252 |
+
"remake": {
|
1253 |
+
"DEFAULT": "R IY1 M EY0 K",
|
1254 |
+
"VERB": "R IY2 M EY1 K"
|
1255 |
+
},
|
1256 |
+
"remakes": {
|
1257 |
+
"DEFAULT": "R IY1 M EY0 K S",
|
1258 |
+
"VERB": "R IY2 M EY1 K S"
|
1259 |
+
},
|
1260 |
+
"replay": {
|
1261 |
+
"DEFAULT": "R IY1 P L EY0",
|
1262 |
+
"VERB": "R IY0 P L EY1"
|
1263 |
+
},
|
1264 |
+
"replays": {
|
1265 |
+
"DEFAULT": "R IY1 P L EY0 Z",
|
1266 |
+
"VERB": "R IY0 P L EY1 Z"
|
1267 |
+
},
|
1268 |
+
"reprint": {
|
1269 |
+
"DEFAULT": "R IY1 P R IH0 N T",
|
1270 |
+
"VERB": "R IY0 P R IH1 N T"
|
1271 |
+
},
|
1272 |
+
"reprints": {
|
1273 |
+
"DEFAULT": "R IY1 P R IH0 N T S",
|
1274 |
+
"VERB": "R IY0 P R IH1 N T S"
|
1275 |
+
},
|
1276 |
+
"rerun": {
|
1277 |
+
"DEFAULT": "R IY1 R AH0 N",
|
1278 |
+
"VERB": "R IY2 R AH1 N"
|
1279 |
+
},
|
1280 |
+
"reruns": {
|
1281 |
+
"DEFAULT": "R IY1 R AH0 N Z",
|
1282 |
+
"VERB": "R IY2 R AH1 N Z"
|
1283 |
+
},
|
1284 |
+
"resume": {
|
1285 |
+
"DEFAULT": "R EH1 Z AH0 M EY2",
|
1286 |
+
"VERB": "R IY0 Z UW1 M"
|
1287 |
+
},
|
1288 |
+
"retake": {
|
1289 |
+
"DEFAULT": "R IY1 T EY0 K",
|
1290 |
+
"VERB": "R IY0 T EY1 K"
|
1291 |
+
},
|
1292 |
+
"retakes": {
|
1293 |
+
"DEFAULT": "R IY1 T EY0 K S",
|
1294 |
+
"VERB": "R IY0 T EY1 K S"
|
1295 |
+
},
|
1296 |
+
"rethink": {
|
1297 |
+
"DEFAULT": "R IY1 TH IH0 NG K",
|
1298 |
+
"VERB": "R IY2 TH IH1 NG K"
|
1299 |
+
},
|
1300 |
+
"rethinks": {
|
1301 |
+
"DEFAULT": "R IY1 TH IH0 NG K S",
|
1302 |
+
"VERB": "R IY2 TH IH1 NG K S"
|
1303 |
+
},
|
1304 |
+
"retread": {
|
1305 |
+
"DEFAULT": "R IY1 T R EH0 D",
|
1306 |
+
"VERB": "R IY2 T R EH1 D"
|
1307 |
+
},
|
1308 |
+
"retreads": {
|
1309 |
+
"DEFAULT": "R IY1 T R EH0 D Z",
|
1310 |
+
"VERB": "R IY2 T R EH1 D Z"
|
1311 |
+
},
|
1312 |
+
"rewrite": {
|
1313 |
+
"DEFAULT": "R IY1 R AY2 T",
|
1314 |
+
"VERB": "R IY0 R AY1 T"
|
1315 |
+
},
|
1316 |
+
"rewrites": {
|
1317 |
+
"DEFAULT": "R IY1 R AY2 T S",
|
1318 |
+
"VERB": "R IY0 R AY1 T S"
|
1319 |
+
},
|
1320 |
+
"segment": {
|
1321 |
+
"DEFAULT": "S EH2 G M EH1 N T",
|
1322 |
+
"VERB": "S EH1 G M AH0 N T"
|
1323 |
+
},
|
1324 |
+
"segments": {
|
1325 |
+
"DEFAULT": "S EH1 G M AH0 N T S",
|
1326 |
+
"VERB": "S EH2 G M EH1 N T S"
|
1327 |
+
},
|
1328 |
+
"separate": {
|
1329 |
+
"DEFAULT": "S EH1 P ER0 IH0 T",
|
1330 |
+
"VERB": "S EH1 P ER0 EY2 T"
|
1331 |
+
},
|
1332 |
+
"separates": {
|
1333 |
+
"DEFAULT": "S EH1 P ER0 IH0 T S",
|
1334 |
+
"VERB": "S EH1 P ER0 EY2 T S"
|
1335 |
+
},
|
1336 |
+
"subcontract": {
|
1337 |
+
"DEFAULT": "S AH2 B K AA0 N T R AE1 K T",
|
1338 |
+
"VERB": "S AH0 B K AA1 N T R AE2 K T"
|
1339 |
+
},
|
1340 |
+
"subcontracts": {
|
1341 |
+
"DEFAULT": "S AH0 B K AA1 N T R AE2 K T S",
|
1342 |
+
"VERB": "S AH2 B K AA0 N T R AE1 K T S"
|
1343 |
+
},
|
1344 |
+
"subject": {
|
1345 |
+
"DEFAULT": "S AH1 B JH IH0 K T",
|
1346 |
+
"VERB": "S AH0 B JH EH1 K T"
|
1347 |
+
},
|
1348 |
+
"subjects": {
|
1349 |
+
"DEFAULT": "S AH1 B JH IH0 K T S",
|
1350 |
+
"VERB": "S AH0 B JH EH1 K T S"
|
1351 |
+
},
|
1352 |
+
"subordinate": {
|
1353 |
+
"DEFAULT": "S AH0 B AO1 R D AH0 N AH0 T",
|
1354 |
+
"VERB": "S AH0 B AO1 R D AH0 N EY2 T"
|
1355 |
+
},
|
1356 |
+
"subordinates": {
|
1357 |
+
"DEFAULT": "S AH0 B AO1 R D AH0 N AH0 T S",
|
1358 |
+
"VERB": "S AH0 B AO1 R D AH0 N EY2 T S"
|
1359 |
+
},
|
1360 |
+
"supplement": {
|
1361 |
+
"DEFAULT": "S AH1 P L AH0 M AH0 N T",
|
1362 |
+
"VERB": "S AH1 P L AH0 M EH0 N T"
|
1363 |
+
},
|
1364 |
+
"supplements": {
|
1365 |
+
"DEFAULT": "S AH1 P L AH0 M AH0 N T S",
|
1366 |
+
"VERB": "S AH1 P L AH0 M EH0 N T S"
|
1367 |
+
},
|
1368 |
+
"surmise": {
|
1369 |
+
"DEFAULT": "S ER1 M AY0 Z",
|
1370 |
+
"VERB": "S ER0 M AY1 Z"
|
1371 |
+
},
|
1372 |
+
"surmises": {
|
1373 |
+
"DEFAULT": "S ER1 M AY0 Z IH0 Z",
|
1374 |
+
"VERB": "S ER0 M AY1 Z IH0 Z"
|
1375 |
+
},
|
1376 |
+
"survey": {
|
1377 |
+
"DEFAULT": "S ER1 V EY2",
|
1378 |
+
"VERB": "S ER0 V EY1"
|
1379 |
+
},
|
1380 |
+
"surveys": {
|
1381 |
+
"DEFAULT": "S ER1 V EY2 Z",
|
1382 |
+
"VERB": "S ER0 V EY1 Z"
|
1383 |
+
},
|
1384 |
+
"suspect": {
|
1385 |
+
"DEFAULT": "S AH1 S P EH2 K T",
|
1386 |
+
"VERB": "S AH0 S P EH1 K T"
|
1387 |
+
},
|
1388 |
+
"suspects": {
|
1389 |
+
"DEFAULT": "S AH1 S P EH2 K T S",
|
1390 |
+
"VERB": "S AH0 S P EH1 K T S"
|
1391 |
+
},
|
1392 |
+
"syndicate": {
|
1393 |
+
"DEFAULT": "S IH1 N D IH0 K AH0 T",
|
1394 |
+
"VERB": "S IH1 N D AH0 K EY2 T"
|
1395 |
+
},
|
1396 |
+
"syndicates": {
|
1397 |
+
"DEFAULT": "S IH1 N D IH0 K AH0 T S",
|
1398 |
+
"VERB": "S IH1 N D IH0 K EY2 T S"
|
1399 |
+
},
|
1400 |
+
"torment": {
|
1401 |
+
"DEFAULT": "T AO0 R M EH1 N T",
|
1402 |
+
"VERB": "T AO1 R M EH2 N T"
|
1403 |
+
},
|
1404 |
+
"transfer": {
|
1405 |
+
"DEFAULT": "T R AE1 N S F ER0",
|
1406 |
+
"VERB": "T R AE0 N S F ER1"
|
1407 |
+
},
|
1408 |
+
"transfers": {
|
1409 |
+
"DEFAULT": "T R AE1 N S F ER0 Z",
|
1410 |
+
"VERB": "T R AE0 N S F ER1 Z"
|
1411 |
+
},
|
1412 |
+
"transplant": {
|
1413 |
+
"DEFAULT": "T R AE1 N S P L AE0 N T",
|
1414 |
+
"VERB": "T R AE0 N S P L AE1 N T"
|
1415 |
+
},
|
1416 |
+
"transplants": {
|
1417 |
+
"DEFAULT": "T R AE1 N S P L AE0 N T S",
|
1418 |
+
"VERB": "T R AE0 N S P L AE1 N T S"
|
1419 |
+
},
|
1420 |
+
"transport": {
|
1421 |
+
"DEFAULT": "T R AE1 N S P AO0 R T",
|
1422 |
+
"VERB": "T R AE0 N S P AO1 R T"
|
1423 |
+
},
|
1424 |
+
"transports": {
|
1425 |
+
"DEFAULT": "T R AE1 N S P AO0 R T S",
|
1426 |
+
"VERB": "T R AE0 N S P AO1 R T S"
|
1427 |
+
},
|
1428 |
+
"triplicate": {
|
1429 |
+
"DEFAULT": "T R IH1 P L IH0 K AH0 T",
|
1430 |
+
"VERB": "T R IH1 P L IH0 K EY2 T"
|
1431 |
+
},
|
1432 |
+
"triplicates": {
|
1433 |
+
"DEFAULT": "T R IH1 P L IH0 K AH0 T S",
|
1434 |
+
"VERB": "T R IH1 P L IH0 K EY2 T S"
|
1435 |
+
},
|
1436 |
+
"undercut": {
|
1437 |
+
"DEFAULT": "AH1 N D ER0 K AH2 T",
|
1438 |
+
"VERB": "AH2 N D ER0 K AH1 T"
|
1439 |
+
},
|
1440 |
+
"underestimate": {
|
1441 |
+
"DEFAULT": "AH1 N D ER0 EH1 S T AH0 M AH0 T",
|
1442 |
+
"VERB": "AH1 N D ER0 EH1 S T AH0 M EY2 T"
|
1443 |
+
},
|
1444 |
+
"underestimates": {
|
1445 |
+
"DEFAULT": "AH1 N D ER0 EH1 S T AH0 M AH0 T S",
|
1446 |
+
"VERB": "AH1 N D ER0 EH1 S T AH0 M EY2 T S"
|
1447 |
+
},
|
1448 |
+
"underline": {
|
1449 |
+
"DEFAULT": "AH1 N D ER0 L AY2 N",
|
1450 |
+
"VERB": "AH2 N D ER0 L AY1 N"
|
1451 |
+
},
|
1452 |
+
"underlines": {
|
1453 |
+
"DEFAULT": "AH1 N D ER0 L AY2 N Z",
|
1454 |
+
"VERB": "AH2 N D ER0 L AY1 N Z"
|
1455 |
+
},
|
1456 |
+
"undertaking": {
|
1457 |
+
"DEFAULT": "AH1 N D ER0 T EY2 K IH0 NG",
|
1458 |
+
"VERB": "AH2 N D ER0 T EY1 K IH0 NG"
|
1459 |
+
},
|
1460 |
+
"undertakings": {
|
1461 |
+
"DEFAULT": "AH1 N D ER0 T EY2 K IH0 NG Z",
|
1462 |
+
"VERB": "AH2 N D ER0 T EY1 K IH0 NG Z"
|
1463 |
+
},
|
1464 |
+
"unused": {
|
1465 |
+
"DEFAULT": "AH0 N Y UW1 S T",
|
1466 |
+
"VERB": "AH0 N Y UW1 Z D"
|
1467 |
+
},
|
1468 |
+
"upgrade": {
|
1469 |
+
"DEFAULT": "AH1 P G R EY0 D",
|
1470 |
+
"VERB": "AH0 P G R EY1 D"
|
1471 |
+
},
|
1472 |
+
"upgrades": {
|
1473 |
+
"DEFAULT": "AH1 P G R EY0 D Z",
|
1474 |
+
"VERB": "AH0 P G R EY1 D Z"
|
1475 |
+
},
|
1476 |
+
"uplift": {
|
1477 |
+
"DEFAULT": "AH1 P L IH0 F T",
|
1478 |
+
"VERB": "AH2 P L IH1 F T"
|
1479 |
+
},
|
1480 |
+
"upset": {
|
1481 |
+
"DEFAULT": "AH1 P S EH2 T",
|
1482 |
+
"VERB": "AH0 P S EH1 T"
|
1483 |
+
},
|
1484 |
+
"upsets": {
|
1485 |
+
"DEFAULT": "AH1 P S EH2 T S",
|
1486 |
+
"VERB": "AH0 P S EH1 T S"
|
1487 |
+
},
|
1488 |
+
"use": {
|
1489 |
+
"DEFAULT": "Y UW1 S",
|
1490 |
+
"VERB": "Y UW1 Z"
|
1491 |
+
},
|
1492 |
+
"used": {
|
1493 |
+
"DEFAULT": "Y UW1 S T",
|
1494 |
+
"VBN": "Y UW1 Z D"
|
1495 |
+
},
|
1496 |
+
"uses": {
|
1497 |
+
"DEFAULT": "Y UW1 S IH0 Z",
|
1498 |
+
"VERB": "Y UW1 Z IH0 Z"
|
1499 |
+
}
|
1500 |
+
}
|
resources/app/python/xvapitch/text/h2p_parser/data/example.json
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"absent": {
|
3 |
+
"VERB": "AH1 B S AE1 N T",
|
4 |
+
"DEFAULT": "AE1 B S AH0 N T"
|
5 |
+
},
|
6 |
+
"reject": {
|
7 |
+
"VERB": "R IH0 JH EH1 K T",
|
8 |
+
"DEFAULT": "R IY1 JH EH0 K T"
|
9 |
+
},
|
10 |
+
"read": {
|
11 |
+
"VBD": "R EH1 D",
|
12 |
+
"VBN": "R EH1 D",
|
13 |
+
"VBP": "R EH1 D",
|
14 |
+
"DEFAULT": "R IY1 D"
|
15 |
+
}
|
16 |
+
}
|
resources/app/python/xvapitch/text/h2p_parser/dict_reader.py
ADDED
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This reads a CMUDict formatted dictionary as a dictionary object
|
2 |
+
import re
|
3 |
+
import h2p_parser.format_ph as ph
|
4 |
+
from . import DATA_PATH
|
5 |
+
|
6 |
+
|
7 |
+
_dict_primary = 'cmudict.dict'
|
8 |
+
|
9 |
+
|
10 |
+
def read_dict(filename: str) -> list:
|
11 |
+
# Read the file
|
12 |
+
with open(filename, encoding='utf-8', mode='r') as f:
|
13 |
+
# Read the file into lines
|
14 |
+
lines = f.readlines()
|
15 |
+
# Remove any line starting with ";;;"
|
16 |
+
lines = [line for line in lines if not line.startswith(';;;')]
|
17 |
+
return lines
|
18 |
+
|
19 |
+
|
20 |
+
def parse_dict(lines: list) -> dict:
|
21 |
+
# Create a dictionary to store the parsed data
|
22 |
+
parsed_dict = {}
|
23 |
+
# Detect file format
|
24 |
+
|
25 |
+
# We will read the first 10 lines to determine the format
|
26 |
+
# Default to SSD format unless we find otherwise
|
27 |
+
dict_form = 'SSD'
|
28 |
+
for line in lines[:10]:
|
29 |
+
# Strip new lines
|
30 |
+
line = line.strip()
|
31 |
+
if line == '':
|
32 |
+
continue
|
33 |
+
"""
|
34 |
+
Format 1 (Double Space Delimited):
|
35 |
+
- Comment allowed to start with ";;;"
|
36 |
+
WORD W ER1 D
|
37 |
+
|
38 |
+
Format 2 (Single Space Delimited):
|
39 |
+
- Comment allowed at end of any line using "#"
|
40 |
+
WORD W ER1 D # Comment
|
41 |
+
"""
|
42 |
+
if ' ' in line:
|
43 |
+
dict_form = 'DSD'
|
44 |
+
break
|
45 |
+
|
46 |
+
# Iterate over the lines
|
47 |
+
for line in lines:
|
48 |
+
# Skip empty lines and lines with no space
|
49 |
+
line = line.strip()
|
50 |
+
if line == '' and ' ' not in line:
|
51 |
+
continue
|
52 |
+
|
53 |
+
# Split depending on format
|
54 |
+
if dict_form == 'DSD':
|
55 |
+
pairs = line.split(' ')
|
56 |
+
else:
|
57 |
+
space_index = line.find(' ')
|
58 |
+
line_split = line[:space_index], line[space_index + 1:]
|
59 |
+
pairs = line_split[0], line_split[1].split('#')[0]
|
60 |
+
|
61 |
+
word = str.lower(pairs[0]) # Get word and lowercase it
|
62 |
+
phonemes = ph.to_list(pairs[1]) # Convert to list of phonemes
|
63 |
+
phonemes = [phonemes] # Wrap in nested list
|
64 |
+
word_num = 0
|
65 |
+
word_orig = None
|
66 |
+
|
67 |
+
# Detect if this is a multi-word entry
|
68 |
+
if ('(' in word) and (')' in word) and any(char.isdigit() for char in word):
|
69 |
+
# Parse the integer from the word using regex
|
70 |
+
result = int(re.findall(r"\((\d+)\)", word)[0])
|
71 |
+
# If found
|
72 |
+
if result is not None:
|
73 |
+
# Set the original word
|
74 |
+
word_orig = word
|
75 |
+
# Remove the integer and bracket from the word
|
76 |
+
word = re.sub(r"\(.*\)", "", word)
|
77 |
+
# Set the word number to the result
|
78 |
+
word_num = result
|
79 |
+
|
80 |
+
# Check existing key
|
81 |
+
if word in parsed_dict:
|
82 |
+
# If word number is 0, ignore
|
83 |
+
if word_num == 0:
|
84 |
+
continue
|
85 |
+
# If word number is not 0, add phoneme to existing key at index
|
86 |
+
parsed_dict[word].extend(phonemes)
|
87 |
+
# Also add the original word if it exists
|
88 |
+
if word_orig is not None:
|
89 |
+
parsed_dict[word_orig] = phonemes
|
90 |
+
else:
|
91 |
+
# Create a new key
|
92 |
+
parsed_dict[word] = phonemes
|
93 |
+
|
94 |
+
# Return the dictionary
|
95 |
+
return parsed_dict
|
96 |
+
|
97 |
+
|
98 |
+
class DictReader:
|
99 |
+
def __init__(self, filename=None):
|
100 |
+
self.filename = filename
|
101 |
+
self.dict = {}
|
102 |
+
# If filename is None, use the default dictionary
|
103 |
+
# default = 'data' uses the dictionary file in the data module
|
104 |
+
# default = 'nltk' uses the nltk cmudict
|
105 |
+
if filename is not None:
|
106 |
+
self.dict = parse_dict(read_dict(filename))
|
107 |
+
else:
|
108 |
+
with DATA_PATH.joinpath(_dict_primary) as f:
|
109 |
+
self.dict = parse_dict(read_dict(f))
|
resources/app/python/xvapitch/text/h2p_parser/dictionary.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# dictionary.py
|
2 |
+
|
3 |
+
# Defines a dictionary class that can be used to store and retrieve from the json file
|
4 |
+
import sys
|
5 |
+
if sys.version_info < (3, 9):
|
6 |
+
# In Python versions below 3.9, this is needed
|
7 |
+
import importlib_resources as pkg_resources
|
8 |
+
else:
|
9 |
+
# Since python 3.9+, importlib.resources.files is built-in
|
10 |
+
import importlib.resources as pkg_resources
|
11 |
+
from os.path import exists
|
12 |
+
import json
|
13 |
+
import h2p_parser.pos_parser as pos_parser
|
14 |
+
|
15 |
+
|
16 |
+
# Method to get data path
|
17 |
+
def get_data_path():
|
18 |
+
data_path = pkg_resources.files('h2p_parser.data')
|
19 |
+
if data_path is None:
|
20 |
+
raise FileNotFoundError("Data folder not found")
|
21 |
+
return data_path
|
22 |
+
|
23 |
+
|
24 |
+
# Dictionary class
|
25 |
+
class Dictionary:
|
26 |
+
def __init__(self, file_name=None):
|
27 |
+
# If a file name is not provided, use the default file name
|
28 |
+
self.file_name = file_name
|
29 |
+
if file_name is None:
|
30 |
+
self.file_name = 'dict.json'
|
31 |
+
self.use_default = True
|
32 |
+
else:
|
33 |
+
self.file_name = file_name
|
34 |
+
self.use_default = False
|
35 |
+
self.dictionary = {}
|
36 |
+
self.dictionary = self.load_dictionary(file_name)
|
37 |
+
|
38 |
+
# Loads the dictionary from the json file
|
39 |
+
def load_dictionary(self, path=None):
|
40 |
+
if path is None:
|
41 |
+
data_path = get_data_path()
|
42 |
+
dict_path = data_path.joinpath(self.file_name)
|
43 |
+
with open(str(dict_path)) as def_file:
|
44 |
+
read_dict = json.load(def_file)
|
45 |
+
else:
|
46 |
+
if not exists(path):
|
47 |
+
raise FileNotFoundError(f'Dictionary {self.file_name} file not found')
|
48 |
+
with open(path) as file:
|
49 |
+
try:
|
50 |
+
read_dict = json.load(file)
|
51 |
+
except json.decoder.JSONDecodeError:
|
52 |
+
raise ValueError(f'Dictionary {self.file_name} file is not valid JSON')
|
53 |
+
# Check dictionary has at least one entry
|
54 |
+
if len(read_dict) == 0:
|
55 |
+
raise ValueError('Dictionary is empty or invalid')
|
56 |
+
return read_dict
|
57 |
+
|
58 |
+
# Check if a word is in the dictionary
|
59 |
+
def contains(self, word):
|
60 |
+
word = word.lower()
|
61 |
+
return word in self.dictionary
|
62 |
+
|
63 |
+
# Get the phonetic pronunciation of a word using Part of Speech tag
|
64 |
+
def get_phoneme(self, word, pos):
|
65 |
+
# Get the sub-dictionary at dictionary[word]
|
66 |
+
sub_dict = self.dictionary[word.lower()]
|
67 |
+
|
68 |
+
# First, check if the exact pos is a key
|
69 |
+
if pos in sub_dict:
|
70 |
+
return sub_dict[pos]
|
71 |
+
|
72 |
+
# If not, use the parent pos of the pos tag
|
73 |
+
parent_pos = pos_parser.get_parent_pos(pos)
|
74 |
+
|
75 |
+
if parent_pos is not None:
|
76 |
+
# Check if the sub_dict contains the parent pos
|
77 |
+
if parent_pos in sub_dict:
|
78 |
+
return sub_dict[parent_pos]
|
79 |
+
|
80 |
+
# If not, check if the sub_dict contains a DEFAULT key
|
81 |
+
if 'DEFAULT' in sub_dict:
|
82 |
+
return sub_dict['DEFAULT']
|
83 |
+
|
84 |
+
# If no matches, return None
|
85 |
+
return None
|
resources/app/python/xvapitch/text/h2p_parser/filter.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from unicodedata import normalize
|
2 |
+
import re
|
3 |
+
|
4 |
+
# Pre-compile regex
|
5 |
+
re_filter = re.compile(r"[^ A-Za-z'.,?!()\-]")
|
6 |
+
re_filter_with_num = re.compile(r"[^ A-Za-z\d'.,?!()\-]")
|
7 |
+
re_multi_space = re.compile(r"\s\s+")
|
8 |
+
|
9 |
+
|
10 |
+
# Filters text before parsing
|
11 |
+
# @param text: text to be filtered
|
12 |
+
# @return: filtered text
|
13 |
+
def filter_text(text: str, allow_num: bool = False, preserve_case: bool = False) -> str:
|
14 |
+
"""
|
15 |
+
Filters text before parsing
|
16 |
+
:param preserve_case:
|
17 |
+
:param allow_num: True if numbers are allowed
|
18 |
+
:param text: Input raw text
|
19 |
+
:return: Text after stripped accents, lower-cased, and invalid punctuation removed
|
20 |
+
"""
|
21 |
+
# Strip accents
|
22 |
+
text = normalize('NFD', text)
|
23 |
+
# To lowercase
|
24 |
+
if not preserve_case:
|
25 |
+
text = text.lower()
|
26 |
+
# Remove all invalid punctuation
|
27 |
+
if allow_num:
|
28 |
+
text = re.sub(re_filter_with_num, '', text)
|
29 |
+
else:
|
30 |
+
text = re.sub(re_filter, "", text)
|
31 |
+
# Remove all spaces more than 1
|
32 |
+
text = re.sub(re_multi_space, " ", text)
|
33 |
+
# Return
|
34 |
+
return text
|
resources/app/python/xvapitch/text/h2p_parser/format_ph.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import overload
|
2 |
+
|
3 |
+
# Converts and outputs various formats of phonemes
|
4 |
+
|
5 |
+
|
6 |
+
@overload
|
7 |
+
def to_sds(ph: str) -> str: ...
|
8 |
+
|
9 |
+
|
10 |
+
@overload
|
11 |
+
def to_sds(ph: list) -> str: ...
|
12 |
+
|
13 |
+
|
14 |
+
def to_sds(ph: list or str) -> str or None:
|
15 |
+
"""
|
16 |
+
Converts phonemes to space delimited string format
|
17 |
+
|
18 |
+
:param ph: Phoneme as str or list, supports nested lists
|
19 |
+
:return: Phoneme as space delimited string
|
20 |
+
"""
|
21 |
+
# Return None if None
|
22 |
+
if ph is None:
|
23 |
+
return None
|
24 |
+
|
25 |
+
# Return directly if str
|
26 |
+
if isinstance(ph, str):
|
27 |
+
return ph
|
28 |
+
# If is list, convert each element
|
29 |
+
if isinstance(ph, list):
|
30 |
+
# If list empty, return None
|
31 |
+
if len(ph) == 0:
|
32 |
+
return None
|
33 |
+
# Case for further lists
|
34 |
+
if isinstance(ph[0], list):
|
35 |
+
return to_sds(ph[0]) # Recursive call
|
36 |
+
# Case if str at index 0, and size 1, return directly
|
37 |
+
elif isinstance(ph[0], str) and len(ph) == 1:
|
38 |
+
return ph[0]
|
39 |
+
# Case if str at index 0, above size 1, return with join
|
40 |
+
elif isinstance(ph[0], str):
|
41 |
+
return ' '.join(ph)
|
42 |
+
# Case for none
|
43 |
+
elif ph[0] is None:
|
44 |
+
return None
|
45 |
+
else:
|
46 |
+
raise TypeError('to_sds() encountered an unexpected nested element type')
|
47 |
+
# Error if no matches
|
48 |
+
raise TypeError('to_sds() expects a list or string')
|
49 |
+
|
50 |
+
|
51 |
+
@overload
|
52 |
+
def to_list(ph: str) -> list: ...
|
53 |
+
|
54 |
+
|
55 |
+
@overload
|
56 |
+
def to_list(ph: list) -> list: ...
|
57 |
+
|
58 |
+
|
59 |
+
def to_list(ph: str or list) -> list or None:
|
60 |
+
"""
|
61 |
+
Converts phonemes to list format
|
62 |
+
|
63 |
+
:param ph: Phoneme as str or list, supports nested lists
|
64 |
+
:return: Phoneme as list
|
65 |
+
"""
|
66 |
+
# Return None if None
|
67 |
+
if ph is None:
|
68 |
+
return None
|
69 |
+
|
70 |
+
# Return directly if list and index 0 is str
|
71 |
+
if isinstance(ph, list) and len(ph) > 0 and isinstance(ph[0], str):
|
72 |
+
return ph
|
73 |
+
|
74 |
+
# If space delimited string, convert to list
|
75 |
+
if isinstance(ph, str):
|
76 |
+
return ph.split(' ')
|
77 |
+
|
78 |
+
# If nested list, convert each element
|
79 |
+
if isinstance(ph, list):
|
80 |
+
# If list empty or has None, return None
|
81 |
+
if len(ph) == 0 or ph[0] is None:
|
82 |
+
return None
|
83 |
+
# Case for further lists
|
84 |
+
if isinstance(ph[0], list):
|
85 |
+
return to_list(ph[0]) # Recursive call
|
86 |
+
|
87 |
+
# Error if no matches
|
88 |
+
raise TypeError('to_list() expects a list or string')
|
89 |
+
|
90 |
+
|
91 |
+
# Surrounds text with curly brackets
|
92 |
+
def with_cb(text: str) -> str:
|
93 |
+
"""
|
94 |
+
Surrounds text with curly brackets
|
95 |
+
|
96 |
+
:param text: Text to surround
|
97 |
+
:return: Surrounded text
|
98 |
+
"""
|
99 |
+
return '{' + text + '}'
|
resources/app/python/xvapitch/text/h2p_parser/h2p.py
ADDED
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
import re
|
3 |
+
from nltk.tokenize import TweetTokenizer
|
4 |
+
from nltk import pos_tag
|
5 |
+
from nltk import pos_tag_sents
|
6 |
+
from .dictionary import Dictionary
|
7 |
+
from .filter import filter_text as ft
|
8 |
+
from . import format_ph as ph
|
9 |
+
|
10 |
+
# Check that the nltk data is downloaded, if not, download it
|
11 |
+
try:
|
12 |
+
nltk.data.find('taggers/averaged_perceptron_tagger.zip')
|
13 |
+
except LookupError:
|
14 |
+
nltk.download('averaged_perceptron_tagger')
|
15 |
+
|
16 |
+
|
17 |
+
# Method to use Regex to replace the first instance of a word with its phonemes
|
18 |
+
def replace_first(target, replacement, text):
|
19 |
+
# Skip if target invalid
|
20 |
+
if target is None or target == '':
|
21 |
+
return text
|
22 |
+
# Replace the first instance of a word with its phonemes
|
23 |
+
return re.sub(r'(?i)\b' + target + r'\b', replacement, text, 1)
|
24 |
+
|
25 |
+
|
26 |
+
class H2p:
|
27 |
+
def __init__(self, dict_path=None, preload=False, phoneme_format=''):
|
28 |
+
"""
|
29 |
+
Creates a H2p parser
|
30 |
+
|
31 |
+
Supported phoneme formats:
|
32 |
+
- Space delimited
|
33 |
+
- Space delimited surrounded by { }
|
34 |
+
|
35 |
+
:param dict_path: Path to a heteronym dictionary json file. Built-in dictionary will be used if None
|
36 |
+
:type dict_path: str
|
37 |
+
:param preload: Preloads the tokenizer and tagger during initialization
|
38 |
+
:type preload: bool
|
39 |
+
"""
|
40 |
+
|
41 |
+
# Supported phoneme formats
|
42 |
+
self.phoneme_format = phoneme_format
|
43 |
+
self.dict = Dictionary(dict_path)
|
44 |
+
self.tokenize = TweetTokenizer().tokenize
|
45 |
+
self.get_tags = pos_tag
|
46 |
+
if preload:
|
47 |
+
self.preload()
|
48 |
+
|
49 |
+
# Method to preload tokenizer and pos_tag
|
50 |
+
def preload(self):
|
51 |
+
tokens = self.tokenize('a')
|
52 |
+
assert tokens == ['a']
|
53 |
+
assert pos_tag(tokens)[0][0] == 'a'
|
54 |
+
|
55 |
+
# Method to check if a text line contains a heteronym
|
56 |
+
def contains_het(self, text):
|
57 |
+
# Filter the text
|
58 |
+
text = ft(text)
|
59 |
+
# Tokenize
|
60 |
+
words = self.tokenize(text)
|
61 |
+
# Check match with dictionary
|
62 |
+
hets = []
|
63 |
+
for word in words:
|
64 |
+
if self.dict.contains(word):
|
65 |
+
hets.append(word)
|
66 |
+
return len(hets)>0, hets
|
67 |
+
|
68 |
+
# Method to replace heteronyms in a text line to phonemes
|
69 |
+
def replace_het(self, text):
|
70 |
+
# Filter the text
|
71 |
+
working_text = ft(text, preserve_case=True)
|
72 |
+
# Tokenize
|
73 |
+
words = self.tokenize(working_text)
|
74 |
+
# Get pos tags
|
75 |
+
tags = pos_tag(words)
|
76 |
+
# Loop through words and pos tags
|
77 |
+
for word, pos in tags:
|
78 |
+
# Skip if word not in dictionary
|
79 |
+
if not self.dict.contains(word):
|
80 |
+
continue
|
81 |
+
# Get phonemes
|
82 |
+
phonemes = self.dict.get_phoneme(word, pos)
|
83 |
+
# Format phonemes
|
84 |
+
f_ph = ph.with_cb(ph.to_sds(phonemes))
|
85 |
+
# Replace word with phonemes
|
86 |
+
text = replace_first(word, f_ph, text)
|
87 |
+
return text
|
88 |
+
|
89 |
+
# Replaces heteronyms in a list of text lines
|
90 |
+
# Slightly faster than replace_het() called on each line
|
91 |
+
def replace_het_list(self, text_list):
|
92 |
+
# Filter the text
|
93 |
+
working_text_list = [ft(text, preserve_case=True) for text in text_list]
|
94 |
+
# Tokenize
|
95 |
+
list_sentence_words = [self.tokenize(text) for text in working_text_list]
|
96 |
+
# Get pos tags list
|
97 |
+
tags_list = pos_tag_sents(list_sentence_words)
|
98 |
+
# Loop through lines
|
99 |
+
for index in range(len(tags_list)):
|
100 |
+
# Loop through words and pos tags in tags_list index
|
101 |
+
for word, pos in tags_list[index]:
|
102 |
+
# Skip if word not in dictionary
|
103 |
+
if not self.dict.contains(word):
|
104 |
+
continue
|
105 |
+
# Get phonemes
|
106 |
+
phonemes = self.dict.get_phoneme(word, pos)
|
107 |
+
# Format phonemes
|
108 |
+
f_ph = ph.with_cb(ph.to_sds(phonemes))
|
109 |
+
# Replace word with phonemes
|
110 |
+
text_list[index] = replace_first(word, f_ph, text_list[index])
|
111 |
+
return text_list
|
112 |
+
|
113 |
+
# Method to tag a text line, returns a list of tags
|
114 |
+
def tag(self, text):
|
115 |
+
# Filter the text
|
116 |
+
working_text = ft(text, preserve_case=True)
|
117 |
+
# Tokenize
|
118 |
+
words = self.tokenize(working_text)
|
119 |
+
# Get pos tags
|
120 |
+
tags = pos_tag(words)
|
121 |
+
# Only return element 1 of each list
|
122 |
+
return [tag[1] for tag in tags]
|
123 |
+
|
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/PKG-INFO
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: h2p-parser
|
3 |
+
Version: 1.0.0
|
4 |
+
Summary: Heteronym to Phoneme Parser
|
5 |
+
Home-page: https://github.com/ionite34/h2p-parser
|
6 |
+
Author: ionite
|
7 |
+
Author-email: dev@ionite.io
|
8 |
+
License: Apache 2.0
|
9 |
+
Platform: UNKNOWN
|
10 |
+
Requires-Python: >=3.7
|
11 |
+
License-File: LICENSE
|
12 |
+
|
13 |
+
UNKNOWN
|
14 |
+
|
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/SOURCES.txt
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LICENSE
|
2 |
+
README.md
|
3 |
+
setup.py
|
4 |
+
h2p_parser/__init__.py
|
5 |
+
h2p_parser/__main__.py
|
6 |
+
h2p_parser/cmudictext.py
|
7 |
+
h2p_parser/dict_reader.py
|
8 |
+
h2p_parser/dictionary.py
|
9 |
+
h2p_parser/filter.py
|
10 |
+
h2p_parser/format_ph.py
|
11 |
+
h2p_parser/h2p.py
|
12 |
+
h2p_parser/pos_parser.py
|
13 |
+
h2p_parser/processors.py
|
14 |
+
h2p_parser/symbols.py
|
15 |
+
h2p_parser/h2p_parser.egg-info/PKG-INFO
|
16 |
+
h2p_parser/h2p_parser.egg-info/SOURCES.txt
|
17 |
+
h2p_parser/h2p_parser.egg-info/dependency_links.txt
|
18 |
+
h2p_parser/h2p_parser.egg-info/requires.txt
|
19 |
+
h2p_parser/h2p_parser.egg-info/top_level.txt
|
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/dependency_links.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/requires.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
nltk
|
2 |
+
inflect
|
resources/app/python/xvapitch/text/h2p_parser/h2p_parser.egg-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
resources/app/python/xvapitch/text/h2p_parser/pos_parser.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Part of Speech Tag Operations
|
2 |
+
|
3 |
+
# Method to get the parent part of speech (VERB) or (NOUN) from a pos tag
|
4 |
+
# from __future__ import annotations
|
5 |
+
|
6 |
+
# def get_parent_pos(pos: str) -> str | None:
|
7 |
+
def get_parent_pos(pos):
|
8 |
+
# Get the parent part of speech from a pos tag
|
9 |
+
if pos.startswith('VB'):
|
10 |
+
return 'VERB'
|
11 |
+
elif pos.startswith('NN'):
|
12 |
+
return 'NOUN'
|
13 |
+
elif pos.startswith('RB'):
|
14 |
+
return 'ADVERB'
|
15 |
+
else:
|
16 |
+
return None
|
17 |
+
|
resources/app/python/xvapitch/text/h2p_parser/processors.py
ADDED
@@ -0,0 +1,392 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Transformations of text sequences for matching
|
2 |
+
from __future__ import annotations
|
3 |
+
from typing import TYPE_CHECKING
|
4 |
+
from .symbols import consonants
|
5 |
+
|
6 |
+
import re
|
7 |
+
|
8 |
+
if TYPE_CHECKING:
|
9 |
+
from .cmudictext import CMUDictExt
|
10 |
+
|
11 |
+
_re_digit = re.compile(r'\d+')
|
12 |
+
|
13 |
+
|
14 |
+
class Processor:
|
15 |
+
def __init__(self, cde: CMUDictExt):
|
16 |
+
self._lookup = cde.lookup
|
17 |
+
self._cmu_get = cde.dict.get
|
18 |
+
self._segment = cde.segment
|
19 |
+
self._tag = cde.h2p.tag
|
20 |
+
self._stem = cde.stem
|
21 |
+
# Number of times respective methods were called
|
22 |
+
self.stat_hits = {
|
23 |
+
'plural': 0,
|
24 |
+
'possessives': 0,
|
25 |
+
'contractions': 0,
|
26 |
+
'hyphenated': 0,
|
27 |
+
'compound': 0,
|
28 |
+
'compound_l2': 0,
|
29 |
+
'stem': 0
|
30 |
+
}
|
31 |
+
# Number of times respective methods returned value (not None)
|
32 |
+
self.stat_resolves = {
|
33 |
+
'plural': 0,
|
34 |
+
'possessives': 0,
|
35 |
+
'contractions': 0,
|
36 |
+
'hyphenated': 0,
|
37 |
+
'compound': 0,
|
38 |
+
'compound_l2': 0,
|
39 |
+
'stem': 0
|
40 |
+
}
|
41 |
+
# Holds events when features encountered unexpected language syntax
|
42 |
+
self.stat_unexpected = {
|
43 |
+
'plural': [],
|
44 |
+
'possessives': [],
|
45 |
+
'contractions': [],
|
46 |
+
'hyphenated': [],
|
47 |
+
'compound': [],
|
48 |
+
'compound_l2': [],
|
49 |
+
'stem': []
|
50 |
+
}
|
51 |
+
|
52 |
+
def auto_possessives(self, word: str) -> str | None:
|
53 |
+
"""
|
54 |
+
Auto-possessives
|
55 |
+
:param word: Input of possible possessive word
|
56 |
+
:return: Phoneme of word as SDS, or None if unresolvable
|
57 |
+
"""
|
58 |
+
if not word.endswith("'s"):
|
59 |
+
return None
|
60 |
+
# If the word ends with "'s", register a hit
|
61 |
+
self.stat_hits['possessives'] += 1
|
62 |
+
"""
|
63 |
+
There are 3 general cases:
|
64 |
+
1. Base words ending in one of 6 special consonants (sibilants)
|
65 |
+
- i.e. Tess's, Rose's, Butch's, Midge's, Rush's, Garage's
|
66 |
+
- With consonants ending of [s], [z], [ch], [j], [sh], [zh]
|
67 |
+
- In ARPAbet: {S}, {Z}, {CH}, {JH}, {SH}, {ZH}
|
68 |
+
- These require a suffix of {IH0 Z}
|
69 |
+
2. Base words ending in vowels and voiced consonants:
|
70 |
+
- i.e. Fay's, Hugh's, Bob's, Ted's, Meg's, Sam's, Dean's, Claire's, Paul's, Bing's
|
71 |
+
- In ARPAbet: {IY0}, {EY1}, {UW1}, {B}, {D}, {G}, {M}, {N}, {R}, {L}, {NG}
|
72 |
+
- Vowels need a wildcard match of any numbered variant
|
73 |
+
- These require a suffix of {Z}
|
74 |
+
3. Base words ending in voiceless consonants:
|
75 |
+
- i.e. Hope's, Pat's, Clark's, Ruth's
|
76 |
+
- In ARPAbet: {P}, {T}, {K}, {TH}
|
77 |
+
- These require a suffix of {S}
|
78 |
+
"""
|
79 |
+
|
80 |
+
# Method to return phoneme and increment stat
|
81 |
+
def _resolve(phoneme: str) -> str:
|
82 |
+
self.stat_resolves['possessives'] += 1
|
83 |
+
return phoneme
|
84 |
+
|
85 |
+
core = word[:-2] # Get core word without possessive
|
86 |
+
ph = self._lookup(core, ph_format='list') # find core word using recursive search
|
87 |
+
if ph is None:
|
88 |
+
return None # Core word not found
|
89 |
+
# [Case 1]
|
90 |
+
if ph[-1] in {'S', 'Z', 'CH', 'JH', 'SH', 'ZH'}:
|
91 |
+
ph += 'IH0' + 'Z'
|
92 |
+
return _resolve(ph)
|
93 |
+
# [Case 2]
|
94 |
+
"""
|
95 |
+
Valid for case 2:
|
96 |
+
'AA', 'AO', 'EY', 'OW', 'UW', 'AE', 'AW', 'EH', 'IH',
|
97 |
+
'OY', 'AH', 'AY', 'ER', 'IY', 'UH', 'UH',
|
98 |
+
'B', 'D', 'G', 'M', 'N', 'R', 'L', 'NG'
|
99 |
+
To simplify matching, we will check for the listed single-letter variants and 'NG'
|
100 |
+
and then check for any numbered variant
|
101 |
+
"""
|
102 |
+
if ph[-1] in {'B', 'D', 'G', 'M', 'N', 'R', 'L', 'NG'} or ph[-1][-1].isdigit():
|
103 |
+
ph += 'Z'
|
104 |
+
return _resolve(ph)
|
105 |
+
# [Case 3]
|
106 |
+
if ph[-1] in ['P', 'T', 'K', 'TH']:
|
107 |
+
ph += 'S'
|
108 |
+
return _resolve(ph)
|
109 |
+
|
110 |
+
return None # No match found
|
111 |
+
|
112 |
+
def auto_contractions(self, word: str) -> str | None:
|
113 |
+
"""
|
114 |
+
Auto contracts form and finds phonemes
|
115 |
+
:param word:
|
116 |
+
:return:
|
117 |
+
"""
|
118 |
+
"""
|
119 |
+
Supported contractions:
|
120 |
+
- 'll
|
121 |
+
- 'd
|
122 |
+
"""
|
123 |
+
# First, check if the word is a contraction
|
124 |
+
parts = word.split("\'") # Split on [']
|
125 |
+
if len(parts) == 1 or parts[1] not in {'ll', 'd'}:
|
126 |
+
return None # No contraction found
|
127 |
+
if len(parts) > 2:
|
128 |
+
self.stat_unexpected['contraction'] += word
|
129 |
+
return None # More than 2 parts, can't be a contraction
|
130 |
+
# If initial check passes, register a hit
|
131 |
+
self.stat_hits['contractions'] += 1
|
132 |
+
|
133 |
+
# Get the core word
|
134 |
+
core = parts[0]
|
135 |
+
# Get the phoneme for the core word recursively
|
136 |
+
ph = self._lookup(core, ph_format='list')
|
137 |
+
if ph is None:
|
138 |
+
return None # Core word not found
|
139 |
+
# Add the phoneme with the appropriate suffix
|
140 |
+
if parts[1] == 'll':
|
141 |
+
ph += 'L'
|
142 |
+
elif parts[1] == 'd':
|
143 |
+
ph += 'D'
|
144 |
+
# Return the phoneme
|
145 |
+
self.stat_resolves['contractions'] += 1
|
146 |
+
return ph
|
147 |
+
|
148 |
+
def auto_hyphenated(self, word: str) -> str | None:
|
149 |
+
"""
|
150 |
+
Splits hyphenated words and attempts to resolve components
|
151 |
+
:param word:
|
152 |
+
:return:
|
153 |
+
"""
|
154 |
+
# First, check if the word is a hyphenated word
|
155 |
+
if '-' not in word:
|
156 |
+
return None # No hyphen found
|
157 |
+
# If initial check passes, register a hit
|
158 |
+
self.stat_hits['hyphenated'] += 1
|
159 |
+
# Split the word into parts
|
160 |
+
parts = word.split('-')
|
161 |
+
# Get the phonemes for each part
|
162 |
+
ph = []
|
163 |
+
for part in parts:
|
164 |
+
ph_part = self._lookup(part, ph_format='sds')
|
165 |
+
if ph_part is None:
|
166 |
+
return None # Part not found
|
167 |
+
ph.append(ph_part)
|
168 |
+
# Join the phonemes
|
169 |
+
ph = ' '.join(ph)
|
170 |
+
# Return the phoneme
|
171 |
+
self.stat_resolves['hyphenated'] += 1
|
172 |
+
return ph
|
173 |
+
|
174 |
+
def auto_compound(self, word: str) -> str | None:
|
175 |
+
"""
|
176 |
+
Splits compound words and attempts to resolve components
|
177 |
+
:param word:
|
178 |
+
:return:
|
179 |
+
"""
|
180 |
+
# Split word into parts
|
181 |
+
parts = self._segment(word)
|
182 |
+
if len(parts) == 1:
|
183 |
+
return None # No compound found
|
184 |
+
# If initial check passes, register a hit
|
185 |
+
self.stat_hits['compound'] += 1
|
186 |
+
# Get the phonemes for each part
|
187 |
+
ph = []
|
188 |
+
for part in parts:
|
189 |
+
ph_part = self._lookup(part, ph_format='sds')
|
190 |
+
if ph_part is None:
|
191 |
+
return None # Part not found
|
192 |
+
ph.append(ph_part)
|
193 |
+
# Join the phonemes
|
194 |
+
ph = ' '.join(ph)
|
195 |
+
# Return the phoneme
|
196 |
+
self.stat_resolves['compound'] += 1
|
197 |
+
return ph
|
198 |
+
|
199 |
+
def auto_plural(self, word: str, pos: str = None) -> str | None:
|
200 |
+
"""
|
201 |
+
Finds singular form of plurals and attempts to resolve separately
|
202 |
+
Optionally a pos tag can be provided.
|
203 |
+
If no tags are provided, there will be a single word pos inference,
|
204 |
+
which is not ideal.
|
205 |
+
:param pos:
|
206 |
+
:param word:
|
207 |
+
:return:
|
208 |
+
"""
|
209 |
+
# First, check if the word is a replaceable plural
|
210 |
+
# Needs to end in 's' or 'es'
|
211 |
+
if word[-1] != 's':
|
212 |
+
return None # No plural found
|
213 |
+
# Now check if the word is a plural using pos
|
214 |
+
if pos is None:
|
215 |
+
pos = self._tag(word)
|
216 |
+
if pos is None or len(pos) == 0 or (pos[0] != 'NNS' and pos[0] != 'NNPS'):
|
217 |
+
return None # No tag found
|
218 |
+
# If initial check passes, register a hit
|
219 |
+
self.stat_hits['plural'] += 1
|
220 |
+
|
221 |
+
"""
|
222 |
+
Case 1:
|
223 |
+
> Word ends in 'oes'
|
224 |
+
> Remove the 'es' to get the singular
|
225 |
+
"""
|
226 |
+
if len(word) > 3 and word[-3:] == 'oes':
|
227 |
+
singular = word[:-2]
|
228 |
+
# Look up the possessive form (since the pronunciation is the same)
|
229 |
+
ph = self.auto_possessives(singular + "'s")
|
230 |
+
if ph is not None:
|
231 |
+
self.stat_resolves['plural'] += 1
|
232 |
+
return ph # Return the phoneme
|
233 |
+
|
234 |
+
"""
|
235 |
+
Case 2:
|
236 |
+
> Word ends in 's'
|
237 |
+
> Remove the 's' to get the singular
|
238 |
+
"""
|
239 |
+
if len(word) > 1 and word[-1] == 's':
|
240 |
+
singular = word[:-1]
|
241 |
+
# Look up the possessive form (since the pronunciation is the same)
|
242 |
+
ph = self.auto_possessives(singular + "'s")
|
243 |
+
if ph is not None:
|
244 |
+
self.stat_resolves['plural'] += 1
|
245 |
+
return ph # Return the phoneme
|
246 |
+
|
247 |
+
# If no matches, return None
|
248 |
+
return None
|
249 |
+
|
250 |
+
def auto_stem(self, word: str) -> str | None:
|
251 |
+
"""
|
252 |
+
Attempts to resolve using the root stem of a word.
|
253 |
+
Supported modes:
|
254 |
+
- "ing"
|
255 |
+
- "ingly"
|
256 |
+
- "ly"
|
257 |
+
:param word:
|
258 |
+
:return:
|
259 |
+
"""
|
260 |
+
|
261 |
+
# noinspection SpellCheckingInspection
|
262 |
+
"""
|
263 |
+
'ly' has no special rules, always add phoneme 'L IY0'
|
264 |
+
|
265 |
+
'ing' relevant rules:
|
266 |
+
|
267 |
+
> If the original verb ended in [e], remove it and add [ing]
|
268 |
+
- i.e. take -> taking, make -> making
|
269 |
+
- We will search once with the original verb, and once with [e] added
|
270 |
+
- 1st attempt: tak, mak
|
271 |
+
- 2nd attempt: take, make
|
272 |
+
|
273 |
+
> If the input word has a repeated consonant before [ing], it's likely that
|
274 |
+
the original verb has only 1 of the consonants
|
275 |
+
- i.e. running -> run, stopping -> stop
|
276 |
+
- We will search for repeated consonants, and perform 2 attempts:
|
277 |
+
- 1st attempt: without the repeated consonant (run, stop)
|
278 |
+
- 2nd attempt: with the repeated consonant (runn, stopp)
|
279 |
+
"""
|
280 |
+
# Discontinue if word is too short
|
281 |
+
if len(word) < 3 or (not word.endswith('ly') and not word.endswith('ing')):
|
282 |
+
return None
|
283 |
+
# Register a hit
|
284 |
+
self.stat_hits['stem'] += 1 # Register hit
|
285 |
+
|
286 |
+
# For ly case
|
287 |
+
if word.endswith('ly'):
|
288 |
+
# Get the root word
|
289 |
+
root = word[:-2]
|
290 |
+
# Recursively get the root
|
291 |
+
ph_root = self._lookup(root, ph_format='sds')
|
292 |
+
# If not exist, return None
|
293 |
+
if ph_root is None:
|
294 |
+
return None
|
295 |
+
ph_ly = 'L IY0'
|
296 |
+
ph_joined = ' '.join([ph_root, ph_ly])
|
297 |
+
self.stat_resolves['stem'] += 1
|
298 |
+
return ph_joined
|
299 |
+
|
300 |
+
# For ing case
|
301 |
+
if word.endswith('ing'):
|
302 |
+
# Get the root word
|
303 |
+
root = word[:-3]
|
304 |
+
# Recursively get the root
|
305 |
+
ph_root = self._lookup(root, ph_format='sds')
|
306 |
+
# If not exist, return None
|
307 |
+
if ph_root is None:
|
308 |
+
return None
|
309 |
+
ph_ly = 'IH0 NG'
|
310 |
+
ph_joined = ' '.join([ph_root, ph_ly])
|
311 |
+
self.stat_resolves['stem'] += 1
|
312 |
+
return ph_joined
|
313 |
+
|
314 |
+
def auto_component(self, word: str) -> str | None:
|
315 |
+
"""
|
316 |
+
Searches for target word as component of a larger word
|
317 |
+
:param word:
|
318 |
+
:return:
|
319 |
+
"""
|
320 |
+
|
321 |
+
"""
|
322 |
+
This processing step checks for words as a component of a larger word
|
323 |
+
- i.e. 'synth' is not in the cmu dictionary
|
324 |
+
- Stage 1: We will search for any word beginning with 'synth' (10 matches)
|
325 |
+
- This is because most unseen short words are likely shortened versions
|
326 |
+
- We will split
|
327 |
+
- Stage 2: Search for any word containing 'synth' (13 matches)
|
328 |
+
|
329 |
+
"""
|
330 |
+
raise NotImplementedError
|
331 |
+
|
332 |
+
def auto_compound_l2(self, word: str, recursive: bool = True) -> str | None:
|
333 |
+
"""
|
334 |
+
Searches for target word as a compound word.
|
335 |
+
> Does not use n-gram splitting like auto_compound()
|
336 |
+
> Splits words manually into every possible combination
|
337 |
+
> Returns the match with the highest length of both words
|
338 |
+
:param recursive: True to enable recursive lookups, otherwise only use base CMU dictionary
|
339 |
+
:param word:
|
340 |
+
:return:
|
341 |
+
"""
|
342 |
+
# Word must be fully alphabetic
|
343 |
+
if not word.isalpha() or len(word) < 3:
|
344 |
+
return None
|
345 |
+
self.stat_hits['compound_l2'] += 1 # Register hit
|
346 |
+
|
347 |
+
# Define lookup mode
|
348 |
+
def _lu(search_word: str) -> str | None:
|
349 |
+
if recursive:
|
350 |
+
return self._lookup(search_word, ph_format='sds')
|
351 |
+
else:
|
352 |
+
return self._cmu_get(search_word)
|
353 |
+
|
354 |
+
# Check if the last part is a single character
|
355 |
+
# And that it is repeated in the last char of the first part
|
356 |
+
# This is likely silent so remove it
|
357 |
+
# i.e. 'Derakk' -> 'Derak'
|
358 |
+
# If the word contains a repeated consonant at the end, remove it
|
359 |
+
# First check repeated last 2 letters
|
360 |
+
if word[-2:][0] == word[-2:][1]:
|
361 |
+
# Remove the last char from the word
|
362 |
+
word = word[:-1]
|
363 |
+
|
364 |
+
# Holds all matches as tuples
|
365 |
+
# (len1, len2, p1, p2, ph1, ph2)
|
366 |
+
matches = []
|
367 |
+
|
368 |
+
# Splits the word into every possible combination
|
369 |
+
for i in range(1, len(word)):
|
370 |
+
p1 = word[:i]
|
371 |
+
p2 = word[i:]
|
372 |
+
# Looks up both words
|
373 |
+
ph1 = _lu(p1)
|
374 |
+
if ph1 is None:
|
375 |
+
continue # Skip if not found
|
376 |
+
ph2 = _lu(p2)
|
377 |
+
if ph2 is None:
|
378 |
+
continue # Skip if not found
|
379 |
+
# If both words exist, add to list as tuple
|
380 |
+
matches.append((len(p1), len(p2), p1, p2, ph1, ph2))
|
381 |
+
|
382 |
+
# Pick the match with the highest length of both words
|
383 |
+
if len(matches) == 0:
|
384 |
+
return None
|
385 |
+
else:
|
386 |
+
# Sort by the minimum of len1 and len2
|
387 |
+
matches.sort(key=lambda x: min(x[0], x[1]))
|
388 |
+
# Get the highest minimum length match
|
389 |
+
match = matches[-1]
|
390 |
+
# Otherwise, return the full joined match
|
391 |
+
self.stat_resolves['compound_l2'] += 1 # Register resolve
|
392 |
+
return match[4] + ' ' + match[5]
|
resources/app/python/xvapitch/text/h2p_parser/symbols.py
ADDED
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Holds symbols for graphemes, phonemes, and pos-tags.
|
2 |
+
# noinspection SpellCheckingInspection,GrazieInspection
|
3 |
+
"""
|
4 |
+
POS tag list:
|
5 |
+
|
6 |
+
CC coordinating conjunction
|
7 |
+
CD cardinal digit
|
8 |
+
DT determiner
|
9 |
+
EX existential there ("there is" -> "there exists")
|
10 |
+
FW foreign word
|
11 |
+
IN preposition/subordinating conjunction
|
12 |
+
JJ adjective ('big')
|
13 |
+
JJR adjective, comparative ('bigger')
|
14 |
+
JJS adjective, superlative ('biggest')
|
15 |
+
LS list marker ("1)", "2)", "3)")
|
16 |
+
MD modal ('could', 'will')
|
17 |
+
NN noun, singular
|
18 |
+
NNS noun plural
|
19 |
+
NNP proper noun, singular 'Harrison'
|
20 |
+
NNPS proper noun, plural 'Americans'
|
21 |
+
PDT predeterminer ('all' in 'all the kids')
|
22 |
+
POS possessive ending (parent's)
|
23 |
+
PRP personal pronoun (I, he, she)
|
24 |
+
PRP$ possessive pronoun (my, his, hers)
|
25 |
+
RB adverb ('very', 'silently')
|
26 |
+
RBR adverb, comparative ('better')
|
27 |
+
RBS adverb, superlative ('best')
|
28 |
+
RP particle ('give up')
|
29 |
+
TO to ("go 'to' the store.")
|
30 |
+
UH interjection ("errrrrrrrm")
|
31 |
+
VB verb, base form take
|
32 |
+
VBD verb, past tense took
|
33 |
+
VBG verb, gerund/present participle taking
|
34 |
+
VBN verb, past participle taken
|
35 |
+
VBP verb, sing. present, non-3d take
|
36 |
+
VBZ verb, 3rd person sing. present takes
|
37 |
+
WDT wh-determiner which
|
38 |
+
WP wh-pronoun who, what
|
39 |
+
WP$ possessive wh-pronoun whose
|
40 |
+
WRB wh-abverb where, when
|
41 |
+
"""
|
42 |
+
|
43 |
+
from __future__ import annotations
|
44 |
+
|
45 |
+
# noinspection SpellCheckingInspection,GrazieInspection
|
46 |
+
graphemes = list("abcdefghijklmnopqrstuvwxyz")
|
47 |
+
phonemes = ['AA0', 'AA1', 'AA2', 'AE0', 'AE1', 'AE2', 'AH0', 'AH1', 'AH2', 'AO0',
|
48 |
+
'AO1', 'AO2', 'AW0', 'AW1', 'AW2', 'AY0', 'AY1', 'AY2', 'B', 'CH', 'D', 'DH',
|
49 |
+
'EH0', 'EH1', 'EH2', 'ER0', 'ER1', 'ER2', 'EY0', 'EY1', 'EY2', 'F', 'G', 'HH',
|
50 |
+
'IH0', 'IH1', 'IH2', 'IY0', 'IY1', 'IY2', 'JH', 'K', 'L', 'M', 'N', 'NG',
|
51 |
+
'OW0', 'OW1', 'OW2', 'OY0', 'OY1', 'OY2', 'P', 'R', 'S', 'SH', 'T', 'TH',
|
52 |
+
'UH0', 'UH1', 'UH2', 'UW', 'UW0', 'UW1', 'UW2', 'V', 'W', 'Y', 'Z', 'ZH']
|
53 |
+
pos_tags = ['CC', 'CD', 'DT', 'EX', 'FW', 'IN', 'JJ', 'JJR', 'JJS', 'LS', 'MD', 'NN', 'NNS',
|
54 |
+
'NNP', 'NNPS', 'PDT', 'POS', 'PRP', 'PRP$', 'RB', 'RBR', 'RBS', 'RP', 'TO', 'UH',
|
55 |
+
'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ', 'WDT', 'WP', 'WP$', 'WRB']
|
56 |
+
pos_type_tags = ['VERB', 'NOUN', 'PRON', 'ADJ', 'ADV']
|
57 |
+
pos_type_short_tags = ['V', 'N', 'P', 'A', 'R']
|
58 |
+
pos_type_form_dict = {'V': 'VERB', 'N': 'NOUN', 'P': 'PRON', 'A': 'ADJ', 'R': 'ADV'}
|
59 |
+
graphemes_set = set(graphemes)
|
60 |
+
phonemes_set = set(phonemes)
|
61 |
+
pos_tags_set = set(pos_tags)
|
62 |
+
pos_type_tags_set = set(pos_type_tags)
|
63 |
+
pos_type_short_tags_set = set(pos_type_short_tags)
|
64 |
+
punctuation = {'.', ',', ':', ';', '?', '!', '-', '_', '\'', '\"', '`', '~', '@', '#', '$'}
|
65 |
+
consonants = {'B', 'CH', 'D', 'DH', 'F', 'G', 'HH', 'JH', 'K', 'L', 'M', 'N', 'NG', 'P', 'R',
|
66 |
+
'S', 'SH', 'T', 'TH', 'V', 'W', 'Y', 'Z', 'ZH'}
|
67 |
+
|
68 |
+
|
69 |
+
# Method to convert from short type tags to full type tags.
|
70 |
+
def to_full_type_tag(short_type_tag: str) -> str | None:
|
71 |
+
if short_type_tag == 'V':
|
72 |
+
return 'VERB'
|
73 |
+
elif short_type_tag == 'N':
|
74 |
+
return 'NOUN'
|
75 |
+
elif short_type_tag == 'P':
|
76 |
+
return 'PRON'
|
77 |
+
elif short_type_tag == 'A':
|
78 |
+
return 'ADJ'
|
79 |
+
elif short_type_tag == 'R':
|
80 |
+
return 'ADV'
|
81 |
+
else:
|
82 |
+
return None
|
resources/app/python/xvapitch/text/h2p_parser/text/__init__.py
ADDED
File without changes
|
resources/app/python/xvapitch/text/h2p_parser/text/numbers.py
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Provides parsing of numbers to text
|
2 |
+
"""
|
3 |
+
This module provides parsing of numeric types in English to text.
|
4 |
+
Modified from https://github.com/keithito/tacotron
|
5 |
+
"""
|
6 |
+
|
7 |
+
import inflect
|
8 |
+
import re
|
9 |
+
|
10 |
+
_magnitudes = ['trillion', 'billion', 'million', 'thousand', 'hundred', 'm', 'b', 't']
|
11 |
+
_magnitudes_key = {'m': 'million', 'b': 'billion', 't': 'trillion'}
|
12 |
+
_measurements = '(f|c|k|d|m|km|ft)'
|
13 |
+
_measurements_key = {'f': 'fahrenheit',
|
14 |
+
'c': 'celsius',
|
15 |
+
'k': 'thousand',
|
16 |
+
'm': 'meters',
|
17 |
+
'km': 'kilometers',
|
18 |
+
'ft': 'feet'}
|
19 |
+
_currency_key = {'$': 'dollar', 'Β£': 'pound', 'β¬': 'euro', 'β©': 'won'}
|
20 |
+
_inflect = inflect.engine()
|
21 |
+
_comma_number_re = re.compile(r'([0-9][0-9,]+[0-9])')
|
22 |
+
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
|
23 |
+
_currency_re = re.compile(r'([$β¬Β£β©])([0-9.,]*[0-9]+)(?:[ ]?({})(?=[^a-zA-Z]|$))?'.format("|".join(_magnitudes)),
|
24 |
+
re.IGNORECASE)
|
25 |
+
_measurement_re = re.compile(r'([0-9.,]*[0-9]+(\s)?{}\b)'.format(_measurements), re.IGNORECASE)
|
26 |
+
_ordinal_re = re.compile(r'[0-9]+(st|nd|rd|th)')
|
27 |
+
_range_re = re.compile(r'(?<=[0-9])+(-)(?=[0-9])+.*?')
|
28 |
+
_roman_re = re.compile(r'\b(?=[MDCLXVI]+\b)M{0,4}(CM|CD|D?C{0,3})(XC|XL|L?X{0,3})(IX|IV|V?I{2,3})\b') # avoid I
|
29 |
+
_multiply_re = re.compile(r'(\b[0-9]+)(x)([0-9]+)')
|
30 |
+
_number_re = re.compile(r"[0-9]+'s|[0-9]+s|[0-9]+")
|
31 |
+
|
32 |
+
|
33 |
+
def _remove_commas(m):
|
34 |
+
return m.group(1).replace(',', '')
|
35 |
+
|
36 |
+
|
37 |
+
def _expand_decimal_point(m):
|
38 |
+
return m.group(1).replace('.', ' point ')
|
39 |
+
|
40 |
+
|
41 |
+
def _expand_currency(m):
|
42 |
+
currency = _currency_key[m.group(1)]
|
43 |
+
quantity = m.group(2)
|
44 |
+
magnitude = m.group(3)
|
45 |
+
|
46 |
+
# remove commas from quantity to be able to convert to numerical
|
47 |
+
quantity = quantity.replace(',', '')
|
48 |
+
|
49 |
+
# check for million, billion, etc...
|
50 |
+
if magnitude is not None and magnitude.lower() in _magnitudes:
|
51 |
+
if len(magnitude) == 1:
|
52 |
+
magnitude = _magnitudes_key[magnitude.lower()]
|
53 |
+
return "{} {} {}".format(_expand_hundreds(quantity), magnitude, currency + 's')
|
54 |
+
|
55 |
+
parts = quantity.split('.')
|
56 |
+
if len(parts) > 2:
|
57 |
+
return quantity + " " + currency + "s" # Unexpected format
|
58 |
+
|
59 |
+
dollars = int(parts[0]) if parts[0] else 0
|
60 |
+
|
61 |
+
cents = int(parts[1]) if len(parts) > 1 and parts[1] else 0
|
62 |
+
if dollars and cents:
|
63 |
+
dollar_unit = currency if dollars == 1 else currency + 's'
|
64 |
+
cent_unit = 'cent' if cents == 1 else 'cents'
|
65 |
+
return "{} {}, {} {}".format(
|
66 |
+
_expand_hundreds(dollars), dollar_unit,
|
67 |
+
_inflect.number_to_words(cents), cent_unit)
|
68 |
+
elif dollars:
|
69 |
+
dollar_unit = currency if dollars == 1 else currency + 's'
|
70 |
+
return "{} {}".format(_expand_hundreds(dollars), dollar_unit)
|
71 |
+
elif cents:
|
72 |
+
cent_unit = 'cent' if cents == 1 else 'cents'
|
73 |
+
return "{} {}".format(_inflect.number_to_words(cents), cent_unit)
|
74 |
+
else:
|
75 |
+
return 'zero' + ' ' + currency + 's'
|
76 |
+
|
77 |
+
|
78 |
+
def _expand_hundreds(text):
|
79 |
+
number = float(text)
|
80 |
+
if 1000 < number < 10000 and (number % 100 == 0) and (number % 1000 != 0):
|
81 |
+
return _inflect.number_to_words(int(number / 100)) + " hundred"
|
82 |
+
else:
|
83 |
+
return _inflect.number_to_words(text)
|
84 |
+
|
85 |
+
|
86 |
+
def _expand_ordinal(m):
|
87 |
+
return _inflect.number_to_words(m.group(0))
|
88 |
+
|
89 |
+
|
90 |
+
def _expand_measurement(m):
|
91 |
+
_, number, measurement = re.split(r'(\d+(?:\.\d+)?)', m.group(0))
|
92 |
+
number = _inflect.number_to_words(number)
|
93 |
+
measurement = "".join(measurement.split())
|
94 |
+
measurement = _measurements_key[measurement.lower()]
|
95 |
+
# if measurement is plural, and number is singular, remove the 's'
|
96 |
+
if number == "one" and str.endswith(measurement, "s"):
|
97 |
+
# Remove the 's' from the end of the measurement
|
98 |
+
measurement = measurement[:-1]
|
99 |
+
return "{} {}".format(number, measurement)
|
100 |
+
|
101 |
+
|
102 |
+
def _expand_range(m):
|
103 |
+
return ' to '
|
104 |
+
|
105 |
+
|
106 |
+
def _expand_multiply(m):
|
107 |
+
left = m.group(1)
|
108 |
+
right = m.group(3)
|
109 |
+
return "{} by {}".format(left, right)
|
110 |
+
|
111 |
+
|
112 |
+
def _expand_roman(m):
|
113 |
+
# from https://stackoverflow.com/questions/19308177/converting-roman-numerals-to-integers-in-python
|
114 |
+
roman_numerals = {'I': 1, 'V': 5, 'X': 10, 'L': 50, 'C': 100, 'D': 500, 'M': 1000}
|
115 |
+
result = 0
|
116 |
+
num = m.group(0)
|
117 |
+
for i, c in enumerate(num):
|
118 |
+
if (i + 1) == len(num) or roman_numerals[c] >= roman_numerals[num[i + 1]]:
|
119 |
+
result += roman_numerals[c]
|
120 |
+
else:
|
121 |
+
result -= roman_numerals[c]
|
122 |
+
return str(result)
|
123 |
+
|
124 |
+
|
125 |
+
def _expand_number(m):
|
126 |
+
_, number, suffix = re.split(r"(\d+(?:'?\d+)?)", m.group(0))
|
127 |
+
number = int(number)
|
128 |
+
if number > 1000 < 10000 and (number % 100 == 0) and (number % 1000 != 0):
|
129 |
+
text = _inflect.number_to_words(number // 100) + " hundred"
|
130 |
+
elif 1000 < number < 3000:
|
131 |
+
if number == 2000:
|
132 |
+
text = 'two thousand'
|
133 |
+
elif 2000 < number < 2010:
|
134 |
+
text = 'two thousand ' + _inflect.number_to_words(number % 100)
|
135 |
+
elif number % 100 == 0:
|
136 |
+
text = _inflect.number_to_words(number // 100) + ' hundred'
|
137 |
+
else:
|
138 |
+
number = _inflect.number_to_words(number, andword='', zero='oh', group=2).replace(', ', ' ')
|
139 |
+
number = re.sub(r'-', ' ', number)
|
140 |
+
text = number
|
141 |
+
else:
|
142 |
+
number = _inflect.number_to_words(number, andword='and')
|
143 |
+
number = re.sub(r'-', ' ', number)
|
144 |
+
number = re.sub(r',', '', number)
|
145 |
+
text = number
|
146 |
+
|
147 |
+
if suffix in ("'s", "s"):
|
148 |
+
if text[-1] == 'y':
|
149 |
+
text = text[:-1] + 'ies'
|
150 |
+
else:
|
151 |
+
text = text + suffix
|
152 |
+
|
153 |
+
return text
|
154 |
+
|
155 |
+
|
156 |
+
def normalize_numbers(text):
|
157 |
+
text = re.sub(_comma_number_re, _remove_commas, text)
|
158 |
+
text = re.sub(_currency_re, _expand_currency, text)
|
159 |
+
text = re.sub(_decimal_number_re, _expand_decimal_point, text)
|
160 |
+
text = re.sub(_ordinal_re, _expand_ordinal, text)
|
161 |
+
# text = re.sub(_range_re, _expand_range, text)
|
162 |
+
text = re.sub(_measurement_re, _expand_measurement, text)
|
163 |
+
text = re.sub(_roman_re, _expand_roman, text)
|
164 |
+
text = re.sub(_multiply_re, _expand_multiply, text)
|
165 |
+
text = re.sub(_number_re, _expand_number, text)
|
166 |
+
return text
|
resources/app/python/xvapitch/text/h2p_parser/utils/__init__.py
ADDED
File without changes
|
resources/app/python/xvapitch/text/h2p_parser/utils/converter.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Converts dictionary files
|
2 |
+
import json
|
3 |
+
import os
|
4 |
+
|
5 |
+
from .. import symbols
|
6 |
+
from .. import format_ph as ph
|
7 |
+
from tqdm import tqdm
|
8 |
+
|
9 |
+
|
10 |
+
def from_binary_delim(path, delimiter) -> dict:
|
11 |
+
# Converts a delimited binary state heteronym look-up dictionary to a dict format
|
12 |
+
# Expected format: WORD|(Space Seperated Phonemes Case)|(Space Seperated Phonemes Default)|(Case)
|
13 |
+
# Example: "REJECT|R IH0 JH EH1 K T|R IY1 JH EH0 K T|V"
|
14 |
+
# Hashtag comments are allowed but only at the start of a file
|
15 |
+
|
16 |
+
# Import file
|
17 |
+
result_dict = {}
|
18 |
+
num_lines = sum(1 for line in open(path, 'r'))
|
19 |
+
with open(path, 'r') as f:
|
20 |
+
skipped_comments = False
|
21 |
+
for line in tqdm(f, total=num_lines):
|
22 |
+
# Skip comments
|
23 |
+
if not skipped_comments:
|
24 |
+
if line.startswith('#') or line == '\n':
|
25 |
+
continue
|
26 |
+
else:
|
27 |
+
skipped_comments = True
|
28 |
+
# Skip empty or newline lines
|
29 |
+
if line.strip() == '' or line.strip() == '\n':
|
30 |
+
continue
|
31 |
+
# Parse line using passed delimiter
|
32 |
+
tokens = line.strip().split(delimiter)
|
33 |
+
# Check for correct number of tokens
|
34 |
+
if len(tokens) != 4:
|
35 |
+
raise ValueError('Invalid number of tokens in line: ' + line)
|
36 |
+
# Get word (token 0) and check validity (no spaces)
|
37 |
+
word = tokens[0].lower()
|
38 |
+
if ' ' in word:
|
39 |
+
raise ValueError('Invalid word in line: ' + line)
|
40 |
+
# Get phonemes and check validity (alphanumeric)
|
41 |
+
ph_case = tokens[1]
|
42 |
+
ph_default = tokens[2]
|
43 |
+
if not ph_case.replace(' ', '').isalnum() or not ph_default.replace(' ', '').isalnum():
|
44 |
+
raise ValueError('Invalid phonemes in line: ' + line)
|
45 |
+
# Get case (token 3) and check validity (alphanumeric)
|
46 |
+
case = tokens[3]
|
47 |
+
if not case.isalnum():
|
48 |
+
raise ValueError('Invalid case in line: ' + line)
|
49 |
+
# Check if case is a full case or full type case
|
50 |
+
if case in symbols.pos_tags_set or case in symbols.pos_type_tags_set:
|
51 |
+
# Add to dictionary directly
|
52 |
+
# Build sub-dictionary for each case
|
53 |
+
sub_dict = result_dict.get(word, {})
|
54 |
+
sub_dict[case] = ph.to_sds(ph_case)
|
55 |
+
sub_dict['DEFAULT'] = ph.to_sds(ph_default)
|
56 |
+
result_dict[word] = sub_dict
|
57 |
+
# Check if case is a short type case
|
58 |
+
elif case in symbols.pos_type_short_tags_set:
|
59 |
+
# Need to convert to full type case
|
60 |
+
sub_dict = result_dict.get(word, {})
|
61 |
+
case_short = symbols.pos_type_form_dict[case]
|
62 |
+
sub_dict[case_short] = ph.to_sds(ph_case)
|
63 |
+
sub_dict['DEFAULT'] = ph.to_sds(ph_default)
|
64 |
+
result_dict[word] = sub_dict
|
65 |
+
else:
|
66 |
+
raise ValueError('Invalid case in line: ' + line)
|
67 |
+
return result_dict
|
68 |
+
|
69 |
+
|
70 |
+
# Method to write a dict to a json file
|
71 |
+
def to_json(path, dict_to_write):
|
72 |
+
# Writes a dictionary to a json file
|
73 |
+
with open(path, 'w') as f:
|
74 |
+
json.dump(dict_to_write, f, indent=4, sort_keys=True)
|
75 |
+
|
76 |
+
|
77 |
+
# Combined method to convert binary delimited files to json
|
78 |
+
def bin_delim_to_json(path, output_path, delimiter):
|
79 |
+
to_json(output_path, from_binary_delim(path, delimiter))
|
resources/app/python/xvapitch/text/h2p_parser/utils/parser.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Parses annotation files for conversion of sentences to phonemes
|
2 |
+
from __future__ import annotations
|
3 |
+
from h2p_parser import cmudictext
|
4 |
+
from h2p_parser.filter import filter_text
|
5 |
+
from h2p_parser.text.numbers import normalize_numbers
|
6 |
+
from h2p_parser.symbols import punctuation
|
7 |
+
|
8 |
+
# Reads a file into a list of lines
|
9 |
+
from tqdm import tqdm
|
10 |
+
|
11 |
+
|
12 |
+
def read_file(file_name, delimiter) -> list:
|
13 |
+
with open(file_name, 'r', encoding="utf-8") as f:
|
14 |
+
result = []
|
15 |
+
for line in f:
|
16 |
+
line = line.split(delimiter)
|
17 |
+
# Take the second element
|
18 |
+
result.append(line[1].lower())
|
19 |
+
return result
|
20 |
+
|
21 |
+
# Method that checks if a single line is resolvable
|
22 |
+
|
23 |
+
|
24 |
+
# Checks a list of lines for unresolvable words
|
25 |
+
# Returns a list of lines with unresolvable words, or None if no unresolvable words
|
26 |
+
def check_lines(lines: list) -> ParseResult:
|
27 |
+
cde = cmudictext.CMUDictExt()
|
28 |
+
# Holds result
|
29 |
+
result = ParseResult()
|
30 |
+
# Loop with nqdm
|
31 |
+
for line in tqdm(lines, desc='Checking lines'):
|
32 |
+
# Add
|
33 |
+
result.all_lines.append(line)
|
34 |
+
result.lines.add(line)
|
35 |
+
# If line contains het, add to result
|
36 |
+
if cde.h2p.contains_het(line):
|
37 |
+
result.all_lines_cont_het.append(line)
|
38 |
+
# Filter the line
|
39 |
+
f_line = filter_text(line)
|
40 |
+
# Number converter
|
41 |
+
f_line = normalize_numbers(f_line)
|
42 |
+
# Tokenize
|
43 |
+
tokens = cde.h2p.tokenize(f_line)
|
44 |
+
for word in tokens:
|
45 |
+
# Skip word if punctuation
|
46 |
+
if word in punctuation:
|
47 |
+
continue
|
48 |
+
# Add word to result
|
49 |
+
result.all_words.append(word)
|
50 |
+
result.words.add(word)
|
51 |
+
# Check if word is resolvable
|
52 |
+
h2p_res = cde.h2p.contains_het(word)
|
53 |
+
cmu_res = cde.dict.get(word) is not None
|
54 |
+
fet_res = cde.lookup(word) is not None
|
55 |
+
if not h2p_res and not cmu_res and not fet_res:
|
56 |
+
# If word ends in "'s", remove it and add the base word
|
57 |
+
if word.endswith("'s"):
|
58 |
+
word = word[:-2]
|
59 |
+
result.unres_all_lines.append(line)
|
60 |
+
result.unres_all_words.append(word)
|
61 |
+
result.unres_lines.add(line)
|
62 |
+
result.unres_words.add(word)
|
63 |
+
elif h2p_res:
|
64 |
+
result.n_words_res += 1
|
65 |
+
result.n_words_het += 1
|
66 |
+
elif cmu_res:
|
67 |
+
result.n_words_res += 1
|
68 |
+
result.n_words_cmu += 1
|
69 |
+
elif fet_res:
|
70 |
+
result.n_words_res += 1
|
71 |
+
result.n_words_fet += 1
|
72 |
+
|
73 |
+
# Also pass stats
|
74 |
+
result.ft_stats = cde.p.stat_resolves
|
75 |
+
|
76 |
+
return result
|
77 |
+
|
78 |
+
|
79 |
+
# Class to hold the result of a parse
|
80 |
+
class ParseResult:
|
81 |
+
def __init__(self):
|
82 |
+
self.all_lines = []
|
83 |
+
self.all_lines_cont_het = []
|
84 |
+
self.unres_all_lines = []
|
85 |
+
self.lines = set()
|
86 |
+
self.unres_lines = set()
|
87 |
+
# Words
|
88 |
+
self.all_words = []
|
89 |
+
self.unres_all_words = []
|
90 |
+
self.words = set()
|
91 |
+
self.unres_words = set()
|
92 |
+
# Numerical stats
|
93 |
+
self.n_words_res = 0 # Number of total resolved words
|
94 |
+
self.n_words_cmu = 0 # Resolved words from CMU
|
95 |
+
self.n_words_fet = 0 # Resolved words from Features
|
96 |
+
self.n_words_het = 0 # Resolved words from H2p
|
97 |
+
# Stats from cmudictext
|
98 |
+
self.ft_stats = None
|
99 |
+
|
100 |
+
# Get percentage of lines covered
|
101 |
+
def line_unique_coverage(self) -> float:
|
102 |
+
dec = 1 - len(self.unres_lines) / len(self.lines)
|
103 |
+
return round(dec * 100, 2)
|
104 |
+
|
105 |
+
# Get percentage of words covered
|
106 |
+
def word_unique_coverage(self) -> float:
|
107 |
+
dec = 1 - len(self.unres_words) / len(self.words)
|
108 |
+
return round(dec * 100, 2)
|
109 |
+
|
110 |
+
# Get percentage of lines covered (All)
|
111 |
+
def line_coverage(self) -> float:
|
112 |
+
dec = 1 - len(self.unres_all_lines) / len(self.all_lines)
|
113 |
+
return round(dec * 100, 2)
|
114 |
+
|
115 |
+
# Get percentage of words covered (All)
|
116 |
+
def word_coverage(self) -> float:
|
117 |
+
dec = 1 - len(self.unres_all_words) / len(self.all_words)
|
118 |
+
return round(dec * 100, 2)
|
119 |
+
|
120 |
+
# Get percentage of heteronyms containing lines
|
121 |
+
def percent_line_het(self) -> float:
|
122 |
+
dec = len(self.all_lines_cont_het) / len(self.all_lines)
|
123 |
+
return round(dec * 100, 2)
|
124 |
+
|
125 |
+
# Get percentage of words resolved by H2p
|
126 |
+
def percent_word_h2p(self) -> float:
|
127 |
+
dec = self.n_words_het / self.n_words_res
|
128 |
+
return round(dec * 100, 2)
|
129 |
+
|
130 |
+
# Get percentage of words resolved by CMU
|
131 |
+
def percent_word_cmu(self) -> float:
|
132 |
+
dec = self.n_words_cmu / self.n_words_res
|
133 |
+
return round(dec * 100, 2)
|