Spaces:

projecte-aina
/

matxa-alvocat-tts-ca

Running

wetdog commited on Mar 20

Commit

bea1338

•

1 Parent(s): 40b17fc

get models from hf_hub

Files changed (8) hide show

Dockerfile CHANGED Viewed

@@ -40,6 +40,10 @@ COPY --chown=user requirements.txt $HOME/app/
 RUN pip install -r requirements.txt
 COPY --chown=user . $HOME/app/
 # Fix ownership issues

 RUN pip install -r requirements.txt
+RUN huggingface-cli download BSC-LT/matcha-tts-cat-onnx matcha_multispeaker_cat_opset_15_10_steps_lastwords.onnx --local-dir $HOME/app/
+RUN huggingface-cli download BSC-LT/vocos-mel-22khz-onnx  mel_spec_22khz_v2.onnx --local-dir $HOME/app/
 COPY --chown=user . $HOME/app/
 # Fix ownership issues

infer_onnx.py CHANGED Viewed

@@ -30,7 +30,7 @@ def process_text(i: int, text: str, device: torch.device):
     print(x_phones)
     return x.numpy(), x_lengths.numpy()
-MODEL_PATH_MATCHA_MEL="matcha_multispeaker_cat_opset_15_10_steps.onnx"
 MODEL_PATH_MATCHA="matcha_hifigan_multispeaker_cat.onnx"
 MODEL_PATH_VOCOS="mel_spec_22khz_v2.onnx"
 CONFIG_PATH="config_22khz.yaml"
@@ -39,7 +39,8 @@ SPEAKER_ID_DICT="spk_to_id.json"
 sess_options = onnxruntime.SessionOptions()
 model_matcha_mel= onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL), sess_options=sess_options, providers=["CPUExecutionProvider"])
 model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
-model_matcha = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA), sess_options=sess_options, providers=["CPUExecutionProvider"])
 speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
 speakers = [sp for sp in speaker_id_dict.keys()]
 speakers.sort()
@@ -90,7 +91,7 @@ def vocos_inference(mel,denoise):
         mag_spec_bias = torch.sqrt(spec_bias.pow(2).sum(-1))
         # substract
-        strength = 0.0005
         mag_spec_denoised = mag_spec - mag_spec_bias * strength
         mag_spec_denoised = torch.clamp(mag_spec_denoised, 0.0)

     print(x_phones)
     return x.numpy(), x_lengths.numpy()
+MODEL_PATH_MATCHA_MEL="matcha_multispeaker_cat_opset_15_10_steps_lastwords.onnx"
 MODEL_PATH_MATCHA="matcha_hifigan_multispeaker_cat.onnx"
 MODEL_PATH_VOCOS="mel_spec_22khz_v2.onnx"
 CONFIG_PATH="config_22khz.yaml"
 sess_options = onnxruntime.SessionOptions()
 model_matcha_mel= onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA_MEL), sess_options=sess_options, providers=["CPUExecutionProvider"])
 model_vocos = onnxruntime.InferenceSession(str(MODEL_PATH_VOCOS), sess_options=sess_options, providers=["CPUExecutionProvider"])
+#model_matcha = onnxruntime.InferenceSession(str(MODEL_PATH_MATCHA), sess_options=sess_options, providers=["CPUExecutionProvider"])
 speaker_id_dict = json.load(open(SPEAKER_ID_DICT))
 speakers = [sp for sp in speaker_id_dict.keys()]
 speakers.sort()
         mag_spec_bias = torch.sqrt(spec_bias.pow(2).sum(-1))
         # substract
+        strength = 0.0025
         mag_spec_denoised = mag_spec - mag_spec_bias * strength
         mag_spec_denoised = torch.clamp(mag_spec_denoised, 0.0)

matcha_hifigan_multispeaker_cat.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c5927b5a9a5f7890d4a8c353266ff00a1d9c4376eb1294020ffe43afa622b72f
-size 142073725

matcha_multispeaker_cat_opset_15.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e5b53370f69b8f4ca3d510634b644f6d815f34ee7a2944d0fb3a5588f6286b88
-size 102285286

matcha_multispeaker_cat_opset_15_10_steps.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f7ab5fb2e8d590d5cb610912d3c4e6480b32322cc4fa4bedf94eb0f8b8ce7570
-size 86049399

mel_spec_22khz.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:15485817350df1e1cf50f75058497ec4b5273acb8903591bb41c6b5fb62daf2b
-size 53870258

mel_spec_22khz_v2.onnx DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2b02c479881f89a8320024436e986f64b11e82b1fd48046d4b695c5fd9fb84e7
-size 53883652

requirements.txt CHANGED Viewed

@@ -3,4 +3,5 @@ phonemizer
 torch
 unidecode
 gradio
-soundfile

 torch
 unidecode
 gradio
+soundfile
+huggingface_hub[cli]