|
# core deps |
|
numpy==1.23.0;python_version<="3.10" |
|
numpy>=1.24.3;python_version>"3.10" |
|
matplotlib |
|
cython>=0.29.30 |
|
scipy>=1.11.2 |
|
torch>=2.1 |
|
torchaudio |
|
transformers |
|
gdown |
|
trainer |
|
soundfile>=0.12.0 |
|
librosa>=0.10.0 |
|
scikit-learn>=1.3.0 |
|
numba==0.55.1;python_version<"3.9" |
|
numba>=0.57.0;python_version>="3.9" |
|
inflect>=5.6.0 |
|
tqdm>=4.64.1 |
|
anyascii>=0.3.0 |
|
pyyaml>=6.0 |
|
fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail |
|
aiohttp>=3.8.1 |
|
packaging>=23.1 |
|
mutagen==1.47.0 |
|
librosa |
|
# deps for examples |
|
flask>=2.0.1 |
|
# deps for inference |
|
pysbd>=0.3.4 |
|
# deps for notebooks |
|
umap-learn>=0.5.1 |
|
pandas>=1.4,<2.0 |
|
# deps for training |
|
matplotlib>=3.7.0 |
|
# coqui stack |
|
trainer>=0.0.36 |
|
# config management |
|
coqpit>=0.0.16 |
|
# chinese g2p deps |
|
jieba |
|
pypinyin |
|
# korean |
|
hangul_romanize |
|
# gruut+supported langs |
|
gruut[de,es,fr]==2.2.3 |
|
# deps for korean |
|
jamo |
|
nltk |
|
g2pkk>=0.1.1 |
|
# deps for bangla |
|
bangla |
|
bnnumerizer |
|
bnunicodenormalizer |
|
# deps for tortoise |
|
einops>=0.6.0 |
|
transformers>=4.45.2 |
|
# deps for bark |
|
encodec>=0.1.1 |
|
# deps for XTTS |
|
unidecode>=1.3.2 |
|
num2words |
|
# spacy[ja]>=3 |
|
tokenizers==0.20.1 |
|
vinorm==2.0.7 |
|
underthesea==6.8.4 |
|
# remove silence |
|
hmmlearn==0.3.3 |
|
eyed3==0.9.7 |
|
pesq==0.0.4 |
|
pydub==0.25.1 |
|
pyAudioAnalysis==0.3.14 |
|
ffmpeg-python==0.2.0 |