Clonar-voz-guaratuba / requirements.txt
Shadhil's picture
voice-clone with single audio sample input
9b2107c
raw
history blame
977 Bytes
# core deps
numpy==1.22.0;python_version<="3.10"
numpy>=1.24.3;python_version>"3.10"
cython>=0.29.30
scipy>=1.11.2
torch>=2.1
torchaudio
soundfile>=0.12.0
librosa>=0.10.0
scikit-learn>=1.3.0
numba==0.55.1;python_version<"3.9"
numba>=0.57.0;python_version>="3.9"
inflect>=5.6.0
tqdm>=4.64.1
anyascii>=0.3.0
pyyaml>=6.0
fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
aiohttp>=3.8.1
packaging>=23.1
# deps for examples
flask>=2.0.1
# deps for inference
pysbd>=0.3.4
# deps for notebooks
umap-learn>=0.5.1
pandas>=1.4,<2.0
# deps for training
matplotlib>=3.7.0
# coqui stack
trainer>=0.0.32
# config management
coqpit>=0.0.16
# chinese g2p deps
jieba
pypinyin
# korean
hangul_romanize
# gruut+supported langs
gruut[de,es,fr]==2.2.3
# deps for korean
jamo
nltk
g2pkk>=0.1.1
# deps for bangla
bangla
bnnumerizer
bnunicodenormalizer
#deps for tortoise
einops>=0.6.0
transformers>=4.33.0
#deps for bark
encodec>=0.1.1
# deps for XTTS
unidecode>=1.3.2
num2words
spacy[ja]>=3
gradio