Slower-whisper

Sleeping

App Files Files Community

Slower-whisper / src /whisper /whisperContainer.py

aadnk

Adding support for word timestamps

f55c594 over 1 year ago

raw

history blame

9.17 kB

	# External programs
	import abc
	import os
	import sys
	from typing import List
	from urllib.parse import urlparse
	import torch
	import urllib3
	from src.hooks.progressListener import ProgressListener

	import whisper
	from whisper import Whisper

	from src.config import ModelConfig, VadInitialPromptMode
	from src.hooks.whisperProgressHook import create_progress_listener_handle

	from src.modelCache import GLOBAL_MODEL_CACHE, ModelCache
	from src.utils import download_file
	from src.whisper.abstractWhisperContainer import AbstractWhisperCallback, AbstractWhisperContainer

	class WhisperContainer(AbstractWhisperContainer):
	def __init__(self, model_name: str, device: str = None, compute_type: str = "float16",
	download_root: str = None,
	cache: ModelCache = None, models: List[ModelConfig] = []):
	if device is None:
	device = "cuda" if torch.cuda.is_available() else "cpu"
	super().__init__(model_name, device, compute_type, download_root, cache, models)

	def ensure_downloaded(self):
	"""
	Ensure that the model is downloaded. This is useful if you want to ensure that the model is downloaded before
	passing the container to a subprocess.
	"""
	# Warning: Using private API here
	try:
	root_dir = self.download_root
	model_config = self._get_model_config()

	if root_dir is None:
	root_dir = os.path.join(os.path.expanduser("~"), ".cache", "whisper")

	if self.model_name in whisper._MODELS:
	whisper._download(whisper._MODELS[self.model_name], root_dir, False)
	else:
	# If the model is not in the official list, see if it needs to be downloaded
	model_config.download_url(root_dir)
	return True

	except Exception as e:
	# Given that the API is private, it could change at any time. We don't want to crash the program
	print("Error pre-downloading model: " + str(e))
	return False

	def _get_model_config(self) -> ModelConfig:
	"""
	Get the model configuration for the model.
	"""
	for model in self.models:
	if model.name == self.model_name:
	return model
	return None

	def _create_model(self):
	print("Loading whisper model " + self.model_name)
	model_config = self._get_model_config()

	# Note that the model will not be downloaded in the case of an official Whisper model
	model_path = self._get_model_path(model_config, self.download_root)

	return whisper.load_model(model_path, device=self.device, download_root=self.download_root)

	def create_callback(self, language: str = None, task: str = None, initial_prompt: str = None,
	initial_prompt_mode: VadInitialPromptMode = VadInitialPromptMode.PREPREND_FIRST_SEGMENT,
	**decodeOptions: dict) -> AbstractWhisperCallback:
	"""
	Create a WhisperCallback object that can be used to transcript audio files.

	Parameters
	----------
	language: str
	The target language of the transcription. If not specified, the language will be inferred from the audio content.
	task: str
	The task - either translate or transcribe.
	initial_prompt: str
	The initial prompt to use for the transcription.
	initial_prompt_mode: VadInitialPromptMode
	The mode to use for the initial prompt. If set to PREPEND_FIRST_SEGMENT, the initial prompt will be prepended to the first segment of audio.
	If set to PREPEND_ALL_SEGMENTS, the initial prompt will be prepended to all segments of audio.
	decodeOptions: dict
	Additional options to pass to the decoder. Must be pickleable.

	Returns
	-------
	A WhisperCallback object.
	"""
	return WhisperCallback(self, language=language, task=task, initial_prompt=initial_prompt, initial_prompt_mode=initial_prompt_mode, **decodeOptions)

	def _get_model_path(self, model_config: ModelConfig, root_dir: str = None):
	from src.conversion.hf_converter import convert_hf_whisper
	"""
	Download the model.

	Parameters
	----------
	model_config: ModelConfig
	The model configuration.
	"""
	# See if path is already set
	if model_config.path is not None:
	return model_config.path

	if root_dir is None:
	root_dir = os.path.join(os.path.expanduser("~"), ".cache", "whisper")

	model_type = model_config.type.lower() if model_config.type is not None else "whisper"

	if model_type in ["huggingface", "hf"]:
	model_config.path = model_config.url
	destination_target = os.path.join(root_dir, model_config.name + ".pt")

	# Convert from HuggingFace format to Whisper format
	if os.path.exists(destination_target):
	print(f"File {destination_target} already exists, skipping conversion")
	else:
	print("Saving HuggingFace model in Whisper format to " + destination_target)
	convert_hf_whisper(model_config.url, destination_target)

	model_config.path = destination_target

	elif model_type in ["whisper", "w"]:
	model_config.path = model_config.url

	# See if URL is just a file
	if model_config.url in whisper._MODELS:
	# No need to download anything - Whisper will handle it
	model_config.path = model_config.url
	elif model_config.url.startswith("file://"):
	# Get file path
	model_config.path = urlparse(model_config.url).path
	# See if it is an URL
	elif model_config.url.startswith("http://") or model_config.url.startswith("https://"):
	# Extension (or file name)
	extension = os.path.splitext(model_config.url)[-1]
	download_target = os.path.join(root_dir, model_config.name + extension)

	if os.path.exists(download_target) and not os.path.isfile(download_target):
	raise RuntimeError(f"{download_target} exists and is not a regular file")

	if not os.path.isfile(download_target):
	download_file(model_config.url, download_target)
	else:
	print(f"File {download_target} already exists, skipping download")

	model_config.path = download_target
	# Must be a local file
	else:
	model_config.path = model_config.url

	else:
	raise ValueError(f"Unknown model type {model_type}")

	return model_config.path

	class WhisperCallback(AbstractWhisperCallback):
	def __init__(self, model_container: WhisperContainer, language: str = None, task: str = None, initial_prompt: str = None,
	initial_prompt_mode: VadInitialPromptMode=VadInitialPromptMode.PREPREND_FIRST_SEGMENT, **decodeOptions: dict):
	self.model_container = model_container
	self.language = language
	self.task = task
	self.initial_prompt = initial_prompt
	self.initial_prompt_mode = initial_prompt_mode
	self.decodeOptions = decodeOptions

	def invoke(self, audio, segment_index: int, prompt: str, detected_language: str, progress_listener: ProgressListener = None):
	"""
	Peform the transcription of the given audio file or data.

	Parameters
	----------
	audio: Union[str, np.ndarray, torch.Tensor]
	The audio file to transcribe, or the audio data as a numpy array or torch tensor.
	segment_index: int
	The target language of the transcription. If not specified, the language will be inferred from the audio content.
	task: str
	The task - either translate or transcribe.
	progress_listener: ProgressListener
	A callback to receive progress updates.
	"""
	model = self.model_container.get_model()

	if progress_listener is not None:
	with create_progress_listener_handle(progress_listener):
	return self._transcribe(model, audio, segment_index, prompt, detected_language)
	else:
	return self._transcribe(model, audio, segment_index, prompt, detected_language)

	def _transcribe(self, model: Whisper, audio, segment_index: int, prompt: str, detected_language: str):
	decodeOptions = self.decodeOptions.copy()

	# Add fp16
	if self.model_container.compute_type in ["fp16", "float16"]:
	decodeOptions["fp16"] = True

	initial_prompt = self._get_initial_prompt(self.initial_prompt, self.initial_prompt_mode, prompt, segment_index)

	result = model.transcribe(audio, \
	language=self.language if self.language else detected_language, task=self.task, \
	initial_prompt=initial_prompt, \
	**decodeOptions
	)
	return result