Spaces:
Running
Running
import requests | |
from deep_translator import GoogleTranslator | |
import gradio as gr | |
import soundfile as sf | |
def speech_translation(audio, language): | |
if audio is None: | |
return "No audio input provided!", "No audio input provided!" | |
# Convert audio to .wav format if not already | |
if not audio.endswith(".wav"): | |
wav_data, samplerate = sf.read(audio) | |
sf.write("temp_audio.wav", wav_data, samplerate) | |
audio_file = "temp_audio.wav" | |
else: | |
audio_file = audio | |
# ASR processing | |
files = { | |
'file': open(audio_file, "rb"), | |
'language': (None, language), | |
'vtt': (None, 'true'), | |
} | |
response = requests.post('https://asr.iitm.ac.in/internal/asr/decode', files=files) | |
print(response.json()) | |
try: | |
asr_output = response.json()['transcript'] | |
except: | |
asr_output = "Error in ASR processing" | |
asr_output = asr_output.replace("।", "") | |
asr_output = asr_output.replace(".", "") | |
translator = GoogleTranslator(source=language, target='en') | |
translation = translator.translate(asr_output) | |
return translation | |
iface = gr.Interface( | |
fn=speech_translation, | |
inputs=[ | |
gr.Audio(type="filepath", label="Record your speech"), | |
gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language") | |
], | |
outputs=["text"], | |
title="Speech Translation", | |
description="Record your speech and get the English translation.", | |
) | |
iface.launch(share=True) |