DrishtiSharma's picture
Update app.py
a187c1a
raw
history blame
2.19 kB
import gradio as gr
import librosa
from transformers import AutoFeatureExtractor, AutoModelForSeq2SeqLM, AutoTokenizer, pipeline
def load_and_fix_data(input_file, model_sampling_rate):
speech, sample_rate = librosa.load(input_file)
if len(speech.shape) > 1:
speech = speech[:, 0] + speech[:, 1]
if sample_rate != model_sampling_rate:
speech = librosa.resample(speech, sample_rate, model_sampling_rate)
return speech
feature_extractor = AutoFeatureExtractor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
sampling_rate = feature_extractor.sampling_rate
asr = pipeline("automatic-speech-recognition", model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish")
model_name = 'hackathon-pln-es/t5-small-finetuned-spanish-to-quechua'
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
new_line = '\n'
def predict_and_ctc_lm_decode(input_file):
speech = load_and_fix_data(input_file, sampling_rate)
transcribed_text = asr(speech, chunk_length_s=5, stride_length_s=1)
transcribed_text = transcribed_text["text"]
input = tokenizer(transcribed_text, return_tensors="pt")
output = model.generate(input["input_ids"], max_length=40, num_beams=4, early_stopping=True)
output = tokenizer.decode(output[0], skip_special_tokens=True)
return f"Spanish Audio Transcription: {transcribed_text} {new_line} Quechua Translation :{output}"
gr.Interface(
predict_and_ctc_lm_decode,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", label="Record your audio")
],
outputs=[gr.outputs.Textbox()],
examples=[["audio_test.wav"], ["travel.wav"]],
title="Spanish-Audio-Transcriptions-to-Quechua-Translation",
description = "This is a Gradio demo of Spanish Audio Transcriptions to Quechua Translation. To use this, simply provide an audio input (audio recording or via microphone), which will subsequently be transcribed and translated to Quechua language.",
#article="<p><center><img src='........e'></center></p>",
layout="horizontal",
theme="huggingface",
).launch(enable_queue=True, cache_examples=True)