CoSTA / app.py
bhavanishankarpullela's picture
Updated app.py
20f17fc verified
import requests
from deep_translator import GoogleTranslator
import gradio as gr
import soundfile as sf
def speech_translation(audio, language):
if audio is None:
return "No audio input provided!", "No audio input provided!"
# Convert audio to .wav format if not already
if not audio.endswith(".wav"):
wav_data, samplerate = sf.read(audio)
sf.write("temp_audio.wav", wav_data, samplerate)
audio_file = "temp_audio.wav"
else:
audio_file = audio
# ASR processing
files = {
'file': open(audio_file, "rb"),
'language': (None, language),
'vtt': (None, 'true'),
}
response = requests.post('https://asr.iitm.ac.in/internal/asr/decode', files=files)
print(response.json())
try:
asr_output = response.json()['transcript']
except:
asr_output = "Error in ASR processing"
asr_output = asr_output.replace("।", "")
asr_output = asr_output.replace(".", "")
translator = GoogleTranslator(source=language, target='en')
translation = translator.translate(asr_output)
return translation
iface = gr.Interface(
fn=speech_translation,
inputs=[
gr.Audio(type="filepath", label="Record your speech"),
gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language")
],
outputs=["text"],
title="Speech Translation",
description="Record your speech and get the English translation.",
)
iface.launch(share=True)