textTOspeech / app.py
vismaya2939's picture
Update app.py
d138cc3 verified
import os
import streamlit as st
from dotenv import load_dotenv
import base64
import requests
import time
# Load environment variables from the .env file
load_dotenv()
# Set your Hugging Face API token from the environment variable
HUGGINGFACE_API_TOKEN = os.getenv("HUGGINGFACE_API_TOKEN")
# Translation API URL (using LibreTranslate as an example)
TRANSLATION_API_URL = "https://libretranslate.com/translate"
# Function to get user input
def get_text():
input_text = st.text_input("Enter text for speech generation:", key="input")
return input_text
# Function to select the language model and target language for translation
def select_language():
language_options = {
"English": "en",
"Spanish": "es",
"French": "fr",
"German": "de",
"Italian": "it",
}
selected_language = st.selectbox("Select Language", list(language_options.keys()))
return language_options[selected_language]
# Function to translate text to English (using LibreTranslate API as an example)
def translate_text(text, target_language):
if target_language != "en":
payload = {
"q": text,
"source": target_language,
"target": "en",
}
response = requests.post(TRANSLATION_API_URL, json=payload)
if response.status_code == 200:
return response.json().get("translatedText", "")
else:
st.error(f"Error: {response.status_code} - Could not translate text.")
return text
return text # If English, return the same text
# Function to send a request to the Hugging Face API with retry on 503 and error handling for 500
def generate_speech(model_id, text):
url = f"https://api-inference.huggingface.co/models/{model_id}"
headers = {"Authorization": f"Bearer {HUGGINGFACE_API_TOKEN}"}
payload = {"inputs": text}
while True:
response = requests.post(url, headers=headers, json=payload, stream=True)
if response.status_code == 200:
return response.content # Return the audio data if successful
elif response.status_code == 503:
# Parse the estimated wait time from the response and wait
response_data = response.json()
estimated_time = response_data.get("estimated_time", 10)
st.info(f"Model is loading, please wait {estimated_time} seconds...")
time.sleep(estimated_time) # Wait before retrying
elif response.status_code == 500:
st.error("Internal Server Error: The model encountered an issue. Please try again later.")
return None
else:
st.error(f"Error: {response.status_code} - {response.text}")
return None
# Function to play audio in Streamlit
def play_audio(audio_bytes):
b64 = base64.b64encode(audio_bytes).decode()
audio_html = f"""
<audio controls autoplay>
<source src="data:audio/wav;base64,{b64}" type="audio/wav">
Your browser does not support the audio element.
</audio>
"""
st.markdown(audio_html, unsafe_allow_html=True)
# Initialize Streamlit UI
st.set_page_config(page_title="Multilingual Text-to-Speech", page_icon="πŸ”Š")
st.header("Multilingual Text-to-Speech Demo")
# Get user input and language selection
user_input = get_text()
selected_language = select_language()
# Create a button for generating speech
submit = st.button('Generate Speech')
# If the generate button is clicked and user input is not empty
if submit and user_input:
with st.spinner("Translating and generating speech..."):
# Translate the text to English if needed
translated_text = translate_text(user_input, selected_language)
# Display both original and translated text
if selected_language != "en":
st.write(f"Original Text: {user_input}")
st.write(f"Translated Text: {translated_text}")
# Generate speech with retry if the model is loading and handle 500 errors
audio_data = generate_speech("myshell-ai/MeloTTS-English", translated_text)
if audio_data:
play_audio(audio_data)
elif submit:
st.warning("Please enter some text.") # Warning for empty input