Spaces:
Runtime error
Runtime error
Fix WHISPER_IMPLEMENTATION argument
Browse files- app.py +24 -19
- cli.py +7 -3
- dockerfile +12 -2
- requirements-fastWhisper.txt → requirements-fasterWhisper.txt +2 -1
- src/whisper/whisperFactory.py +2 -0
app.py
CHANGED
|
@@ -125,7 +125,7 @@ class WhisperTranscriber:
|
|
| 125 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
| 126 |
selectedModel = modelName if modelName is not None else "base"
|
| 127 |
|
| 128 |
-
model = create_whisper_container(whisper_implementation=app_config.whisper_implementation,
|
| 129 |
model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
|
| 130 |
|
| 131 |
# Result
|
|
@@ -485,38 +485,43 @@ def create_ui(app_config: ApplicationConfig):
|
|
| 485 |
ui.close()
|
| 486 |
|
| 487 |
if __name__ == '__main__':
|
| 488 |
-
|
| 489 |
-
whisper_models =
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 492 |
-
parser.add_argument("--input_audio_max_duration", type=int, default=
|
| 493 |
help="Maximum audio file length in seconds, or -1 for no limit.") # 600
|
| 494 |
-
parser.add_argument("--share", type=bool, default=
|
| 495 |
help="True to share the app on HuggingFace.") # False
|
| 496 |
-
parser.add_argument("--server_name", type=str, default=
|
| 497 |
help="The host or IP to bind to. If None, bind to localhost.") # None
|
| 498 |
-
parser.add_argument("--server_port", type=int, default=
|
| 499 |
help="The port to bind to.") # 7860
|
| 500 |
-
parser.add_argument("--queue_concurrency_count", type=int, default=
|
| 501 |
help="The number of concurrent requests to process.") # 1
|
| 502 |
-
parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=
|
| 503 |
help="The default model name.") # medium
|
| 504 |
-
parser.add_argument("--default_vad", type=str, default=
|
| 505 |
help="The default VAD.") # silero-vad
|
| 506 |
-
parser.add_argument("--vad_parallel_devices", type=str, default=
|
| 507 |
help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
|
| 508 |
-
parser.add_argument("--vad_cpu_cores", type=int, default=
|
| 509 |
help="The number of CPU cores to use for VAD pre-processing.") # 1
|
| 510 |
-
parser.add_argument("--vad_process_timeout", type=float, default=
|
| 511 |
help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
|
| 512 |
-
parser.add_argument("--auto_parallel", type=bool, default=
|
| 513 |
help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
|
| 514 |
-
parser.add_argument("--output_dir", "-o", type=str, default=
|
| 515 |
-
help="directory to save the outputs")
|
| 516 |
-
parser.add_argument("--whisper_implementation", type=str, default=
|
| 517 |
-
help="the Whisper implementation to use")
|
| 518 |
|
| 519 |
args = parser.parse_args().__dict__
|
| 520 |
|
| 521 |
-
updated_config =
|
|
|
|
|
|
|
| 522 |
create_ui(app_config=updated_config)
|
|
|
|
| 125 |
selectedLanguage = languageName.lower() if len(languageName) > 0 else None
|
| 126 |
selectedModel = modelName if modelName is not None else "base"
|
| 127 |
|
| 128 |
+
model = create_whisper_container(whisper_implementation=self.app_config.whisper_implementation,
|
| 129 |
model_name=selectedModel, cache=self.model_cache, models=self.app_config.models)
|
| 130 |
|
| 131 |
# Result
|
|
|
|
| 485 |
ui.close()
|
| 486 |
|
| 487 |
if __name__ == '__main__':
|
| 488 |
+
default_app_config = ApplicationConfig.create_default()
|
| 489 |
+
whisper_models = default_app_config.get_model_names()
|
| 490 |
+
|
| 491 |
+
# Environment variable overrides
|
| 492 |
+
default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", default_app_config.whisper_implementation)
|
| 493 |
|
| 494 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 495 |
+
parser.add_argument("--input_audio_max_duration", type=int, default=default_app_config.input_audio_max_duration, \
|
| 496 |
help="Maximum audio file length in seconds, or -1 for no limit.") # 600
|
| 497 |
+
parser.add_argument("--share", type=bool, default=default_app_config.share, \
|
| 498 |
help="True to share the app on HuggingFace.") # False
|
| 499 |
+
parser.add_argument("--server_name", type=str, default=default_app_config.server_name, \
|
| 500 |
help="The host or IP to bind to. If None, bind to localhost.") # None
|
| 501 |
+
parser.add_argument("--server_port", type=int, default=default_app_config.server_port, \
|
| 502 |
help="The port to bind to.") # 7860
|
| 503 |
+
parser.add_argument("--queue_concurrency_count", type=int, default=default_app_config.queue_concurrency_count, \
|
| 504 |
help="The number of concurrent requests to process.") # 1
|
| 505 |
+
parser.add_argument("--default_model_name", type=str, choices=whisper_models, default=default_app_config.default_model_name, \
|
| 506 |
help="The default model name.") # medium
|
| 507 |
+
parser.add_argument("--default_vad", type=str, default=default_app_config.default_vad, \
|
| 508 |
help="The default VAD.") # silero-vad
|
| 509 |
+
parser.add_argument("--vad_parallel_devices", type=str, default=default_app_config.vad_parallel_devices, \
|
| 510 |
help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.") # ""
|
| 511 |
+
parser.add_argument("--vad_cpu_cores", type=int, default=default_app_config.vad_cpu_cores, \
|
| 512 |
help="The number of CPU cores to use for VAD pre-processing.") # 1
|
| 513 |
+
parser.add_argument("--vad_process_timeout", type=float, default=default_app_config.vad_process_timeout, \
|
| 514 |
help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.") # 1800
|
| 515 |
+
parser.add_argument("--auto_parallel", type=bool, default=default_app_config.auto_parallel, \
|
| 516 |
help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.") # False
|
| 517 |
+
parser.add_argument("--output_dir", "-o", type=str, default=default_app_config.output_dir, \
|
| 518 |
+
help="directory to save the outputs")
|
| 519 |
+
parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
|
| 520 |
+
help="the Whisper implementation to use")
|
| 521 |
|
| 522 |
args = parser.parse_args().__dict__
|
| 523 |
|
| 524 |
+
updated_config = default_app_config.update(**args)
|
| 525 |
+
|
| 526 |
+
print(f"Using {updated_config.whisper_implementation} for Whisper")
|
| 527 |
create_ui(app_config=updated_config)
|
cli.py
CHANGED
|
@@ -20,6 +20,9 @@ def cli():
|
|
| 20 |
# For the CLI, we fallback to saving the output to the current directory
|
| 21 |
output_dir = app_config.output_dir if app_config.output_dir is not None else "."
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 24 |
parser.add_argument("audio", nargs="+", type=str, \
|
| 25 |
help="audio file(s) to transcribe")
|
|
@@ -32,9 +35,9 @@ def cli():
|
|
| 32 |
parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
|
| 33 |
help="directory to save the outputs")
|
| 34 |
parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
|
| 35 |
-
help="whether to print out the progress and debug messages")
|
| 36 |
-
parser.add_argument("--whisper_implementation", type=str, default=
|
| 37 |
-
help="the Whisper implementation to use")
|
| 38 |
|
| 39 |
parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
|
| 40 |
help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
|
|
@@ -95,6 +98,7 @@ def cli():
|
|
| 95 |
os.makedirs(output_dir, exist_ok=True)
|
| 96 |
|
| 97 |
whisper_implementation = args.pop("whisper_implementation")
|
|
|
|
| 98 |
|
| 99 |
if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
|
| 100 |
warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
|
|
|
|
| 20 |
# For the CLI, we fallback to saving the output to the current directory
|
| 21 |
output_dir = app_config.output_dir if app_config.output_dir is not None else "."
|
| 22 |
|
| 23 |
+
# Environment variable overrides
|
| 24 |
+
default_whisper_implementation = os.environ.get("WHISPER_IMPLEMENTATION", app_config.whisper_implementation)
|
| 25 |
+
|
| 26 |
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
| 27 |
parser.add_argument("audio", nargs="+", type=str, \
|
| 28 |
help="audio file(s) to transcribe")
|
|
|
|
| 35 |
parser.add_argument("--output_dir", "-o", type=str, default=output_dir, \
|
| 36 |
help="directory to save the outputs")
|
| 37 |
parser.add_argument("--verbose", type=str2bool, default=app_config.verbose, \
|
| 38 |
+
help="whether to print out the progress and debug messages")
|
| 39 |
+
parser.add_argument("--whisper_implementation", type=str, default=default_whisper_implementation, choices=["whisper", "faster-whisper"],\
|
| 40 |
+
help="the Whisper implementation to use")
|
| 41 |
|
| 42 |
parser.add_argument("--task", type=str, default=app_config.task, choices=["transcribe", "translate"], \
|
| 43 |
help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')")
|
|
|
|
| 98 |
os.makedirs(output_dir, exist_ok=True)
|
| 99 |
|
| 100 |
whisper_implementation = args.pop("whisper_implementation")
|
| 101 |
+
print(f"Using {whisper_implementation} for Whisper")
|
| 102 |
|
| 103 |
if model_name.endswith(".en") and args["language"] not in {"en", "English"}:
|
| 104 |
warnings.warn(f"{model_name} is an English-only model but receipted '{args['language']}'; using English instead.")
|
dockerfile
CHANGED
|
@@ -1,13 +1,23 @@
|
|
|
|
|
|
|
|
| 1 |
FROM huggingface/transformers-pytorch-gpu
|
| 2 |
EXPOSE 7860
|
| 3 |
|
|
|
|
|
|
|
|
|
|
| 4 |
ADD . /opt/whisper-webui/
|
| 5 |
|
| 6 |
# Latest version of transformers-pytorch-gpu seems to lack tk.
|
| 7 |
# Further, pip install fails, so we must upgrade pip first.
|
| 8 |
RUN apt-get -y install python3-tk
|
| 9 |
-
RUN python3 -m pip install --upgrade pip
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
|
| 13 |
# You can also bind this directory in the container to somewhere on the host.
|
|
|
|
| 1 |
+
# docker build -t whisper-webui --build-arg WHISPER_IMPLEMENTATION=whisper .
|
| 2 |
+
|
| 3 |
FROM huggingface/transformers-pytorch-gpu
|
| 4 |
EXPOSE 7860
|
| 5 |
|
| 6 |
+
ARG WHISPER_IMPLEMENTATION=whisper
|
| 7 |
+
ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
|
| 8 |
+
|
| 9 |
ADD . /opt/whisper-webui/
|
| 10 |
|
| 11 |
# Latest version of transformers-pytorch-gpu seems to lack tk.
|
| 12 |
# Further, pip install fails, so we must upgrade pip first.
|
| 13 |
RUN apt-get -y install python3-tk
|
| 14 |
+
RUN python3 -m pip install --upgrade pip
|
| 15 |
+
|
| 16 |
+
RUN if [ "${WHISPER_IMPLEMENTATION}" = "whisper" ]; then \
|
| 17 |
+
python3 -m pip install -r /opt/whisper-webui/requirements.txt; \
|
| 18 |
+
else \
|
| 19 |
+
python3 -m pip install -r /opt/whisper-webui/requirements-fasterWhisper.txt; \
|
| 20 |
+
fi
|
| 21 |
|
| 22 |
# Note: Models will be downloaded on demand to the directory /root/.cache/whisper.
|
| 23 |
# You can also bind this directory in the container to somewhere on the host.
|
requirements-fastWhisper.txt → requirements-fasterWhisper.txt
RENAMED
|
@@ -5,4 +5,5 @@ gradio==3.23.0
|
|
| 5 |
yt-dlp
|
| 6 |
json5
|
| 7 |
torch
|
| 8 |
-
torchaudio
|
|
|
|
|
|
| 5 |
yt-dlp
|
| 6 |
json5
|
| 7 |
torch
|
| 8 |
+
torchaudio
|
| 9 |
+
more_itertools
|
src/whisper/whisperFactory.py
CHANGED
|
@@ -6,6 +6,8 @@ from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
|
|
| 6 |
def create_whisper_container(whisper_implementation: str,
|
| 7 |
model_name: str, device: str = None, download_root: str = None,
|
| 8 |
cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
|
|
|
|
|
|
|
| 9 |
if (whisper_implementation == "whisper"):
|
| 10 |
from src.whisper.whisperContainer import WhisperContainer
|
| 11 |
return WhisperContainer(model_name, device, download_root, cache, models)
|
|
|
|
| 6 |
def create_whisper_container(whisper_implementation: str,
|
| 7 |
model_name: str, device: str = None, download_root: str = None,
|
| 8 |
cache: modelCache = None, models: List[ModelConfig] = []) -> AbstractWhisperContainer:
|
| 9 |
+
print("Creating whisper container for " + whisper_implementation)
|
| 10 |
+
|
| 11 |
if (whisper_implementation == "whisper"):
|
| 12 |
from src.whisper.whisperContainer import WhisperContainer
|
| 13 |
return WhisperContainer(model_name, device, download_root, cache, models)
|