Spaces:
Running
Running
import os | |
import gradio as gr | |
import torch | |
from transformers import pipeline | |
title = "Transcribe speech in several languages" | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
asr_pipe_audio2Text_Ge = pipeline(task="automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-german") | |
asr_pipe_whisper = pipeline(task="automatic-speech-recognition", model="openai/whisper-medium", device=device) | |
def transcribeFile(inputlang, audio_path : str) -> str: | |
#transcription = asr_pipe_audio2Text_Ge(audio_path) | |
#transcription = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"transcribe"}) | |
if inputlang == "Auto Detect": | |
transcription = asr_pipe_whisper(audio_path, chunk_length_s=10, stride_length_s=(4, 2), generate_kwargs={"task":"transcribe"}, batch_size=32) | |
elif inputlang == "German": | |
transcription = asr_pipe_audio2Text_Ge(audio_path, chunk_length_s=10, stride_length_s=(4, 2), batch_size=32) | |
return transcription["text"] | |
def translateAudio(audio_path): | |
translationOutput = asr_pipe_whisper(audio_path, max_new_tokens=256, generate_kwargs={"task":"translate"}) | |
return translationOutput | |
def transcribeFileMulti(inputlang, audio_path : str) -> str: | |
if inputlang == "English": | |
transcription = asr_pipe_whisper(audio_path) | |
elif inputlang == "German": | |
transcription = asr_pipe_audio2Text_Ge(audio_path) | |
translation = translateAudio(audio_path) | |
t1 = transcription["text"] | |
t2 = translation["text"] | |
output = t1+t2 | |
return output #transcription["text"] | |
app1 = gr.Interface( | |
fn=transcribeFile, | |
#inputs=gr.inputs.Audio(label="Upload audio file", type="filepath"), | |
inputs=[gr.Radio(["Auto Detect", "German"], value="Auto Detect", label="Source Language", info="Select the language of the speech you want to transcribe"), | |
gr.Audio(source="upload", type="filepath",label="Upload audio file")], | |
outputs="text", | |
title=title | |
) | |
app2 = gr.Interface( | |
fn=transcribeFileMulti, | |
inputs=[gr.Radio(["Auto Detect", "German"], value="Auto Detect", label="Source Language", info="Select the language of the speech you want to transcribe"), | |
gr.Audio(source="microphone", type="filepath")], | |
outputs="text", | |
title=title | |
) | |
demo = gr.TabbedInterface([app1, app2], ["Audio File", "Microphone"]) | |
if __name__ == "__main__": | |
demo.launch() | |