nizarzerouale's picture
Upload 10 files
5397a6e verified
raw
history blame
1.34 kB
import librosa
import torch
def preprocess_audio(file_path, target_sr=16000):
"""
Loads and resamples audio from the specified file.
Parameters:
file_path (str): Path to the audio file.
target_sr (int): Target sampling rate. Defaults to 16000 Hz.
Returns:
resampled_audio (np.ndarray): Resampled audio data.
"""
audio_input, sample_rate = librosa.load(file_path, sr=None) # Keep original sample rate
resampled_audio = librosa.resample(audio_input, orig_sr=sample_rate, target_sr=target_sr)
return resampled_audio
def transcribe_audio(model, processor, audio, target_sr=16000):
"""
Transcribes the given audio using the Whisper model.
Parameters:
model: The Whisper model.
processor: The processor used for preparing the input features.
audio (np.ndarray): The resampled audio data.
target_sr (int): The target sampling rate for the audio.
Returns:
transcription (str): The transcribed text from the audio.
"""
input_features = processor(audio, sampling_rate=target_sr, return_tensors="pt").input_features
with torch.no_grad():
predicted_ids = model.generate(input_features)
transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
return transcription