Multimodal_Demo

Sleeping

Multimodal_Demo / models /whisper_model.py

Update models/whisper_model.py

5f1678c about 1 year ago

793 Bytes

	import os
	import whisper

	def has_intersection(t1, t2):
	if t1[1] < t2[0] or t2[1] < t1[0]:
	return False
	else:
	return True

	class AudioTranslator():
	def __init__(self, model='base', device='cuda'):
	self.device = device
	self.model = whisper.load_model(model).to(device)

	def __call__(self, video_path):
	print("Extract the audio results.")
	audio_results = self.model.transcribe(video_path,language="zh")["segments"]
	print("Finished.")
	return audio_results

	def match(self, audio_results, start, end):
	transcript = ''
	for res in audio_results:
	if has_intersection((start, end), (res["start"], res["end"])):
	transcript += res['text'] + ' '
	return transcript