Spaces:

jlopez00
/

tts-service

Runtime error

App Files Files Community

jlopez00 commited on Nov 26

Commit

f017d24

•

1 Parent(s): a376e00

Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

.devcontainer/docker-compose.yml +3 -3
.pre-commit-config.yaml +1 -0
.vscode/settings.json +5 -3
assets/flask/server.py +1 -1
assets/installation_checker.py +1 -1
core/__init__.py +12 -12
pyproject.toml +16 -3
rvc/infer/infer.py +7 -3
rvc/infer/pipeline.py +11 -8
rvc/lib/algorithm/synthesizers.py +1 -0

.devcontainer/docker-compose.yml CHANGED Viewed

@@ -1,9 +1,9 @@
 services:
   dev:
     profiles:
-      - devcontainer
     build: dev
     volumes:
-      - ../..:/workspaces:cached
-      - ..:/workspaces/tts-service:cached
     command: sleep infinity

 services:
   dev:
     profiles:
+    - devcontainer
     build: dev
     volumes:
+    - ../..:/workspaces:cached
+    - ..:/workspaces/tts-service:cached
     command: sleep infinity

.pre-commit-config.yaml CHANGED Viewed

@@ -31,6 +31,7 @@ repos:
   - id: mypy
     name: mypy
     entry: mypy
     language: system
     types: [python]
     pass_filenames: false

   - id: mypy
     name: mypy
     entry: mypy
+    args: ["--explicit-package-bases", "--namespace-packages"]
     language: system
     types: [python]
     pass_filenames: false

.vscode/settings.json CHANGED Viewed

@@ -13,6 +13,8 @@
     "files.trimTrailingWhitespace": true,
     "kubernetes-yaml-formatter-x.indentlessArrays": true,
     "kubernetes-yaml-formatter-x.retainLineBreaksSingle": true,
     "python.analysis.importFormat": "relative",
     "python.analysis.autoFormatStrings": true,
     "python.analysis.autoImportCompletions": true,
@@ -30,9 +32,9 @@
         "prettier.tabWidth": 4,
         "editor.defaultFormatter": "esbenp.prettier-vscode"
     },
-    "[python]": {
-        "editor.defaultFormatter": "charliermarsh.ruff"
-    },
     "[yaml]": {
         "editor.defaultFormatter": "kiliantyler.kubernetes-yaml-formatter-x"
     }

     "files.trimTrailingWhitespace": true,
     "kubernetes-yaml-formatter-x.indentlessArrays": true,
     "kubernetes-yaml-formatter-x.retainLineBreaksSingle": true,
+    "mypy.runUsingActiveInterpreter": true,
+    "mypy.extraArguments": ["--explicit-package-bases", "--namespace-packages"],
     "python.analysis.importFormat": "relative",
     "python.analysis.autoFormatStrings": true,
     "python.analysis.autoImportCompletions": true,
         "prettier.tabWidth": 4,
         "editor.defaultFormatter": "esbenp.prettier-vscode"
     },
+    // "[python]": {
+    //     "editor.defaultFormatter": "charliermarsh.ruff"
+    // },
     "[yaml]": {
         "editor.defaultFormatter": "kiliantyler.kubernetes-yaml-formatter-x"
     }

assets/flask/server.py CHANGED Viewed

@@ -34,7 +34,7 @@ def start_flask():
         try:
             subprocess.Popen(
                 [ENV_PATH, FLASK_SCRIPT_PATH],
-                creationflags=subprocess.CREATE_NEW_CONSOLE,
             )
         except Exception as error:
             print(f"An error occurred starting the Flask server: {error}")

         try:
             subprocess.Popen(
                 [ENV_PATH, FLASK_SCRIPT_PATH],
+                creationflags=getattr(subprocess, "CREATE_NEW_CONSOLE", 0),
             )
         except Exception as error:
             print(f"An error occurred starting the Flask server: {error}")

assets/installation_checker.py CHANGED Viewed

@@ -13,7 +13,7 @@ class InstallationError(Exception):
 def check_installation():
     try:
-        system_drive = os.getenv("SystemDrive")
         current_drive = os.path.splitdrive(now_dir)[0]
         if current_drive.upper() != system_drive.upper():
             raise InstallationError(

 def check_installation():
     try:
+        system_drive = os.getenv("SystemDrive", "")
         current_drive = os.path.splitdrive(now_dir)[0]
         if current_drive.upper() != system_drive.upper():
             raise InstallationError(

core/__init__.py CHANGED Viewed

@@ -72,7 +72,7 @@ def run_infer_script(
     upscale_audio: bool,
     f0_file: str,
     embedder_model: str,
-    embedder_model_custom: str = None,
     formant_shifting: bool = False,
     formant_qfrency: float = 1.0,
     formant_timbre: float = 1.0,
@@ -210,7 +210,7 @@ def run_batch_infer_script(
     upscale_audio: bool,
     f0_file: str,
     embedder_model: str,
-    embedder_model_custom: str = None,
     formant_shifting: bool = False,
     formant_qfrency: float = 1.0,
     formant_timbre: float = 1.0,
@@ -351,7 +351,7 @@ def run_tts_script(
     upscale_audio: bool,
     f0_file: str,
     embedder_model: str,
-    embedder_model_custom: str = None,
     sid: int = 0,
 ):
@@ -470,7 +470,7 @@ def run_extract_script(
     gpu: int,
     sample_rate: int,
     embedder_model: str,
-    embedder_model_custom: str = None,
 ):
     model_path = os.path.join(logs_path, model_name)
@@ -519,8 +519,8 @@ def run_train_script(
     index_algorithm: str = "Auto",
     cache_data_in_gpu: bool = False,
     custom_pretrained: bool = False,
-    g_pretrained_path: str = None,
-    d_pretrained_path: str = None,
 ):
     if pretrained == True:
@@ -737,15 +737,15 @@ def parse_arguments():
         default="rmvpe",
     )
     infer_parser.add_argument(
-        "--input_path",
         type=str,
-        help="Full path to the input audio file.",
         required=True,
     )
     infer_parser.add_argument(
-        "--output_path",
         type=str,
-        help="Full path to the output audio file.",
         required=True,
     )
     pth_path_description = "Full path to the RVC model file (.pth)."
@@ -2440,8 +2440,8 @@ def main():
                 protect=args.protect,
                 hop_length=args.hop_length,
                 f0_method=args.f0_method,
-                input_path=args.input_path,
-                output_path=args.output_path,
                 pth_path=args.pth_path,
                 index_path=args.index_path,
                 split_audio=args.split_audio,

     upscale_audio: bool,
     f0_file: str,
     embedder_model: str,
+    embedder_model_custom: str | None = None,
     formant_shifting: bool = False,
     formant_qfrency: float = 1.0,
     formant_timbre: float = 1.0,
     upscale_audio: bool,
     f0_file: str,
     embedder_model: str,
+    embedder_model_custom: str | None = None,
     formant_shifting: bool = False,
     formant_qfrency: float = 1.0,
     formant_timbre: float = 1.0,
     upscale_audio: bool,
     f0_file: str,
     embedder_model: str,
+    embedder_model_custom: str | None = None,
     sid: int = 0,
 ):
     gpu: int,
     sample_rate: int,
     embedder_model: str,
+    embedder_model_custom: str | None = None,
 ):
     model_path = os.path.join(logs_path, model_name)
     index_algorithm: str = "Auto",
     cache_data_in_gpu: bool = False,
     custom_pretrained: bool = False,
+    g_pretrained_path: str | None = None,
+    d_pretrained_path: str | None = None,
 ):
     if pretrained == True:
         default="rmvpe",
     )
     infer_parser.add_argument(
+        "--output_rvc_path",
         type=str,
+        help="Full path to the output RVC file.",
         required=True,
     )
     infer_parser.add_argument(
+        "--output_tts_path",
         type=str,
+        help="Full path to the output TTS audio file.",
         required=True,
     )
     pth_path_description = "Full path to the RVC model file (.pth)."
                 protect=args.protect,
                 hop_length=args.hop_length,
                 f0_method=args.f0_method,
+                output_rvc_path=args.output_rvc_path,
+                output_tts_path=args.output_tts_path,
                 pth_path=args.pth_path,
                 index_path=args.index_path,
                 split_audio=args.split_audio,

pyproject.toml CHANGED Viewed

@@ -112,12 +112,23 @@ select = [
 [tool.mypy]
 packages = "assets,core,rvc,tabs,tts_service,tests"
-#1181 errors
 [[tool.mypy.overrides]]
 module = [
-    "core.*",
-    "rvc.infer.infer",
     "rvc.infer.pipeline",
     "rvc.lib.algorithm.attentions",
     "rvc.lib.algorithm.commons",
@@ -127,6 +138,7 @@ module = [
     "rvc.train.train",
     "rvc.train.data_utils",
     "rvc.train.extract.extract",
     "rvc.train.preprocess.preprocess",
     "rvc.train.preprocess.slicer",
     "rvc.train.process.extract_small_model",
@@ -147,6 +159,7 @@ module = [
     "local_attention",
     "matplotlib.*",
     "noisereduce",
     "pydub",
     "pypresence",
     "resampy",

 [tool.mypy]
 packages = "assets,core,rvc,tabs,tts_service,tests"
+check_untyped_defs = true
+[[tool.mypy.overrides]]
+module = [
+    "rvc.lib.algorithm.generators",
+    "rvc.lib.algorithm.residuals",
+    "rvc.lib.predictors.RMVPE",
+    "rvc.lib.tools.gdown",
+    "rvc.lib.tools.model_download",
+    "rvc.train.losses",
+    "rvc.train.process.extract_model",
+    "rvc.train.process.model_blender",
+]
+check_untyped_defs = false
 [[tool.mypy.overrides]]
 module = [
     "rvc.infer.pipeline",
     "rvc.lib.algorithm.attentions",
     "rvc.lib.algorithm.commons",
     "rvc.train.train",
     "rvc.train.data_utils",
     "rvc.train.extract.extract",
+    "rvc.train.mel_processing",
     "rvc.train.preprocess.preprocess",
     "rvc.train.preprocess.slicer",
     "rvc.train.process.extract_small_model",
     "local_attention",
     "matplotlib.*",
     "noisereduce",
+    "pedalboard_native",
     "pydub",
     "pypresence",
     "resampy",

rvc/infer/infer.py CHANGED Viewed

@@ -10,6 +10,8 @@ import soundfile as sf
 import noisereduce as nr
 from pedalboard import (
     Pedalboard,
     Chorus,
     Distortion,
     Reverb,
@@ -60,7 +62,7 @@ class VoiceConverter:
         self.use_f0 = None  # Whether the model uses F0
         self.loaded_model = None
-    def load_hubert(self, embedder_model: str, embedder_model_custom: str = None):
         """
         Loads the HuBERT model for speaker embedding extraction.
@@ -201,7 +203,7 @@ class VoiceConverter:
         model_path: str,
         index_path: str,
         pitch: int = 0,
-        f0_file: str = None,
         f0_method: str = "rmvpe",
         index_rate: float = 0.75,
         volume_envelope: float = 1,
@@ -212,7 +214,7 @@ class VoiceConverter:
         f0_autotune_strength: float = 1,
         filter_radius: int = 3,
         embedder_model: str = "contentvec",
-        embedder_model_custom: str = None,
         clean_audio: bool = False,
         clean_strength: float = 0.5,
         export_format: str = "WAV",
@@ -294,6 +296,8 @@ class VoiceConverter:
             converted_chunks = []
             for c in chunks:
                 audio_opt = self.vc.pipeline(
                     model=self.hubert_model,
                     net_g=self.net_g,

 import noisereduce as nr
 from pedalboard import (
     Pedalboard,
+)
+from pedalboard_native import (
     Chorus,
     Distortion,
     Reverb,
         self.use_f0 = None  # Whether the model uses F0
         self.loaded_model = None
+    def load_hubert(self, embedder_model: str, embedder_model_custom: str | None = None):
         """
         Loads the HuBERT model for speaker embedding extraction.
         model_path: str,
         index_path: str,
         pitch: int = 0,
+        f0_file: str | None = None,
         f0_method: str = "rmvpe",
         index_rate: float = 0.75,
         volume_envelope: float = 1,
         f0_autotune_strength: float = 1,
         filter_radius: int = 3,
         embedder_model: str = "contentvec",
+        embedder_model_custom: str | None = None,
         clean_audio: bool = False,
         clean_strength: float = 0.5,
         export_format: str = "WAV",
             converted_chunks = []
             for c in chunks:
+                if self.vc is None:
+                    raise Exception("Voice conversion model not loaded.")
                 audio_opt = self.vc.pipeline(
                     model=self.hubert_model,
                     net_g=self.net_g,

rvc/infer/pipeline.py CHANGED Viewed

@@ -8,6 +8,7 @@ import torchcrepe
 import faiss
 import librosa
 import numpy as np
 from scipy import signal
 from torch import Tensor
@@ -29,7 +30,7 @@ bh, ah = signal.butter(
     N=FILTER_ORDER, Wn=CUTOFF_FREQUENCY, btype="high", fs=SAMPLE_RATE
 )
-input_audio_path2wav = {}
 class AudioProcessor:
@@ -37,6 +38,7 @@ class AudioProcessor:
     A class for processing audio signals, specifically for adjusting RMS levels.
     """
     def change_rms(
         source_audio: np.ndarray,
         source_rate: int,
@@ -292,9 +294,10 @@ class Pipeline:
         for method in methods:
             f0 = None
             if method == "crepe":
-                f0 = self.get_f0_crepe_computation(
-                    x, f0_min, f0_max, p_len, int(hop_length)
-                )
             elif method == "rmvpe":
                 f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
                 f0 = f0[1:]
@@ -323,8 +326,8 @@ class Pipeline:
     def get_f0(
         self,
-        input_audio_path,
-        x,
         p_len,
         pitch,
         f0_method,
@@ -382,7 +385,7 @@ class Pipeline:
             )
         if f0_autotune is True:
-            f0 = Autotune.autotune_f0(self, f0, f0_autotune_strength)
         f0 *= pow(2, pitch / 12)
         tf0 = self.sample_rate // self.window
@@ -404,7 +407,7 @@ class Pipeline:
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
-        f0_coarse = np.rint(f0_mel).astype(np.int)
         return f0_coarse, f0bak

 import faiss
 import librosa
 import numpy as np
+import numpy.typing as npt
 from scipy import signal
 from torch import Tensor
     N=FILTER_ORDER, Wn=CUTOFF_FREQUENCY, btype="high", fs=SAMPLE_RATE
 )
+input_audio_path2wav: dict[str, npt.NDArray] = {}
 class AudioProcessor:
     A class for processing audio signals, specifically for adjusting RMS levels.
     """
+    @staticmethod
     def change_rms(
         source_audio: np.ndarray,
         source_rate: int,
         for method in methods:
             f0 = None
             if method == "crepe":
+                raise ValueError("Crepe method is not supported in hybrid mode")
+                # f0 = self.get_f0_crepe_computation(
+                #     x, f0_min, f0_max, p_len, int(hop_length)
+                # )
             elif method == "rmvpe":
                 f0 = self.model_rmvpe.infer_from_audio(x, thred=0.03)
                 f0 = f0[1:]
     def get_f0(
         self,
+        input_audio_path: str,
+        x: npt.NDArray,
         p_len,
         pitch,
         f0_method,
             )
         if f0_autotune is True:
+            f0 = self.autotune.autotune_f0(f0, f0_autotune_strength)
         f0 *= pow(2, pitch / 12)
         tf0 = self.sample_rate // self.window
         ) + 1
         f0_mel[f0_mel <= 1] = 1
         f0_mel[f0_mel > 255] = 255
+        f0_coarse = np.rint(f0_mel).astype(np.int32)
         return f0_coarse, f0bak

rvc/lib/algorithm/synthesizers.py CHANGED Viewed

@@ -56,6 +56,7 @@ class Synthesizer(torch.nn.Module):
         spk_embed_dim,
         gin_channels,
         sr,
         use_f0,
         text_enc_hidden_dim=768,
         **kwargs

         spk_embed_dim,
         gin_channels,
         sr,
+        *,
         use_f0,
         text_enc_hidden_dim=768,
         **kwargs