Add an option for automatically configuring parallel execution
Browse files- app.py +16 -1
- cli.py +3 -0
- src/vadParallel.py +4 -0
app.py
CHANGED
@@ -6,6 +6,8 @@ from io import StringIO
|
|
6 |
import os
|
7 |
import pathlib
|
8 |
import tempfile
|
|
|
|
|
9 |
from src.modelCache import ModelCache
|
10 |
from src.vadParallel import ParallelContext, ParallelTranscription
|
11 |
|
@@ -29,6 +31,9 @@ DELETE_UPLOADED_FILES = True
|
|
29 |
# Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
|
30 |
MAX_FILE_PREFIX_LENGTH = 17
|
31 |
|
|
|
|
|
|
|
32 |
LANGUAGES = [
|
33 |
"English", "Chinese", "German", "Spanish", "Russian", "Korean",
|
34 |
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
|
@@ -65,6 +70,14 @@ class WhisperTranscriber:
|
|
65 |
def set_parallel_devices(self, vad_parallel_devices: str):
|
66 |
self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
|
67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
68 |
def transcribe_webui(self, modelName, languageName, urlData, uploadFile, microphoneData, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow):
|
69 |
try:
|
70 |
source, sourceName = self.__get_source(urlData, uploadFile, microphoneData)
|
@@ -268,11 +281,12 @@ class WhisperTranscriber:
|
|
268 |
|
269 |
|
270 |
def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
|
271 |
-
default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None, vad_process_timeout: float = None, vad_cpu_cores: int = 1):
|
272 |
ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores)
|
273 |
|
274 |
# Specify a list of devices to use for parallel processing
|
275 |
ui.set_parallel_devices(vad_parallel_devices)
|
|
|
276 |
|
277 |
ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
|
278 |
ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
|
@@ -319,6 +333,7 @@ if __name__ == '__main__':
|
|
319 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
320 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
321 |
parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
|
|
322 |
|
323 |
args = parser.parse_args().__dict__
|
324 |
create_ui(**args)
|
|
|
6 |
import os
|
7 |
import pathlib
|
8 |
import tempfile
|
9 |
+
|
10 |
+
import torch
|
11 |
from src.modelCache import ModelCache
|
12 |
from src.vadParallel import ParallelContext, ParallelTranscription
|
13 |
|
|
|
31 |
# Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
|
32 |
MAX_FILE_PREFIX_LENGTH = 17
|
33 |
|
34 |
+
# Limit auto_parallel to a certain number of CPUs (specify vad_cpu_cores to get a higher number)
|
35 |
+
MAX_AUTO_CPU_CORES = 8
|
36 |
+
|
37 |
LANGUAGES = [
|
38 |
"English", "Chinese", "German", "Spanish", "Russian", "Korean",
|
39 |
"French", "Japanese", "Portuguese", "Turkish", "Polish", "Catalan",
|
|
|
70 |
def set_parallel_devices(self, vad_parallel_devices: str):
|
71 |
self.parallel_device_list = [ device.strip() for device in vad_parallel_devices.split(",") ] if vad_parallel_devices else None
|
72 |
|
73 |
+
def set_auto_parallel(self, auto_parallel: bool):
|
74 |
+
if auto_parallel:
|
75 |
+
if torch.cuda.is_available():
|
76 |
+
self.parallel_device_list = [ str(gpu_id) for gpu_id in range(torch.cuda.device_count())]
|
77 |
+
|
78 |
+
self.vad_cpu_cores = min(os.cpu_count(), MAX_AUTO_CPU_CORES)
|
79 |
+
print("[Auto parallel] Using GPU devices " + str(self.parallel_device_list) + " and " + str(self.vad_cpu_cores) + " CPU cores for VAD/transcription.")
|
80 |
+
|
81 |
def transcribe_webui(self, modelName, languageName, urlData, uploadFile, microphoneData, task, vad, vadMergeWindow, vadMaxMergeSize, vadPadding, vadPromptWindow):
|
82 |
try:
|
83 |
source, sourceName = self.__get_source(urlData, uploadFile, microphoneData)
|
|
|
281 |
|
282 |
|
283 |
def create_ui(input_audio_max_duration, share=False, server_name: str = None, server_port: int = 7860,
|
284 |
+
default_model_name: str = "medium", default_vad: str = None, vad_parallel_devices: str = None, vad_process_timeout: float = None, vad_cpu_cores: int = 1, auto_parallel: bool = False):
|
285 |
ui = WhisperTranscriber(input_audio_max_duration, vad_process_timeout, vad_cpu_cores)
|
286 |
|
287 |
# Specify a list of devices to use for parallel processing
|
288 |
ui.set_parallel_devices(vad_parallel_devices)
|
289 |
+
ui.set_auto_parallel(auto_parallel)
|
290 |
|
291 |
ui_description = "Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse "
|
292 |
ui_description += " audio and is also a multi-task model that can perform multilingual speech recognition "
|
|
|
333 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
334 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
335 |
parser.add_argument("--vad_process_timeout", type=float, default="1800", help="The number of seconds before inactivate processes are terminated. Use 0 to close processes immediately, or None for no timeout.")
|
336 |
+
parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
|
337 |
|
338 |
args = parser.parse_args().__dict__
|
339 |
create_ui(**args)
|
cli.py
CHANGED
@@ -34,6 +34,7 @@ def cli():
|
|
34 |
parser.add_argument("--vad_prompt_window", type=optional_float, default=3, help="The window size of the prompt to pass to Whisper")
|
35 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
36 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
|
|
37 |
|
38 |
parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
|
39 |
parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
|
@@ -75,10 +76,12 @@ def cli():
|
|
75 |
vad_padding = args.pop("vad_padding")
|
76 |
vad_prompt_window = args.pop("vad_prompt_window")
|
77 |
vad_cpu_cores = args.pop("vad_cpu_cores")
|
|
|
78 |
|
79 |
model = WhisperContainer(model_name, device=device, download_root=model_dir)
|
80 |
transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores)
|
81 |
transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
|
|
|
82 |
|
83 |
if (transcriber._has_parallel_devices()):
|
84 |
print("Using parallel devices:", transcriber.parallel_device_list)
|
|
|
34 |
parser.add_argument("--vad_prompt_window", type=optional_float, default=3, help="The window size of the prompt to pass to Whisper")
|
35 |
parser.add_argument("--vad_cpu_cores", type=int, default=1, help="The number of CPU cores to use for VAD pre-processing.")
|
36 |
parser.add_argument("--vad_parallel_devices", type=str, default="", help="A commma delimited list of CUDA devices to use for parallel processing. If None, disable parallel processing.")
|
37 |
+
parser.add_argument("--auto_parallel", type=bool, default=False, help="True to use all available GPUs and CPU cores for processing. Use vad_cpu_cores/vad_parallel_devices to specify the number of CPU cores/GPUs to use.")
|
38 |
|
39 |
parser.add_argument("--temperature", type=float, default=0, help="temperature to use for sampling")
|
40 |
parser.add_argument("--best_of", type=optional_int, default=5, help="number of candidates when sampling with non-zero temperature")
|
|
|
76 |
vad_padding = args.pop("vad_padding")
|
77 |
vad_prompt_window = args.pop("vad_prompt_window")
|
78 |
vad_cpu_cores = args.pop("vad_cpu_cores")
|
79 |
+
auto_parallel = args.pop("auto_parallel")
|
80 |
|
81 |
model = WhisperContainer(model_name, device=device, download_root=model_dir)
|
82 |
transcriber = WhisperTranscriber(delete_uploaded_files=False, vad_cpu_cores=vad_cpu_cores)
|
83 |
transcriber.set_parallel_devices(args.pop("vad_parallel_devices"))
|
84 |
+
transcriber.set_auto_parallel(auto_parallel)
|
85 |
|
86 |
if (transcriber._has_parallel_devices()):
|
87 |
print("Using parallel devices:", transcriber.parallel_device_list)
|
src/vadParallel.py
CHANGED
@@ -176,6 +176,10 @@ class ParallelTranscription(AbstractTranscription):
|
|
176 |
while (chunk_start < total_duration):
|
177 |
chunk_end = min(chunk_start + chunk_size, total_duration)
|
178 |
|
|
|
|
|
|
|
|
|
179 |
print("Parallel VAD: Executing chunk from " + str(chunk_start) + " to " +
|
180 |
str(chunk_end) + " on CPU device " + str(cpu_device_id))
|
181 |
parameters.append([audio, config, chunk_start, chunk_end]);
|
|
|
176 |
while (chunk_start < total_duration):
|
177 |
chunk_end = min(chunk_start + chunk_size, total_duration)
|
178 |
|
179 |
+
if (chunk_end - chunk_start < 1):
|
180 |
+
# No need to process chunks that are less than 1 second
|
181 |
+
break
|
182 |
+
|
183 |
print("Parallel VAD: Executing chunk from " + str(chunk_start) + " to " +
|
184 |
str(chunk_end) + " on CPU device " + str(cpu_device_id))
|
185 |
parameters.append([audio, config, chunk_start, chunk_end]);
|