Spaces:
Sleeping
Sleeping
import os | |
import whisper | |
def has_intersection(t1, t2): | |
if t1[1] < t2[0] or t2[1] < t1[0]: | |
return False | |
else: | |
return True | |
class AudioTranslator(): | |
def __init__(self, model='base', device='cuda'): | |
self.device = device | |
self.model = whisper.load_model(model).to(device) | |
def __call__(self, video_path): | |
print("Extract the audio results.") | |
audio_results = self.model.transcribe(video_path,language="zh")["segments"] | |
print("Finished.") | |
return audio_results | |
def match(self, audio_results, start, end): | |
transcript = '' | |
for res in audio_results: | |
if has_intersection((start, end), (res["start"], res["end"])): | |
transcript += res['text'] + ' ' | |
return transcript | |