|
import gradio as gr |
|
import tempfile |
|
import os |
|
from gtts import gTTS |
|
from deep_translator import GoogleTranslator |
|
from groq import Groq |
|
import logging |
|
from sentence_transformers import SentenceTransformer |
|
import numpy as np |
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s') |
|
|
|
|
|
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
|
|
|
|
sentence_model = SentenceTransformer('all-MiniLM-L6-v2') |
|
|
|
indexed_texts = [] |
|
indexed_embeddings = [] |
|
|
|
|
|
translation_languages = { |
|
"English": "en", |
|
"Arabic": "ar", |
|
"Hindi": "hi", |
|
"Kannada": "kn", |
|
"Marathi": "mr", |
|
"Telugu": "te", |
|
"Tamil": "ta", |
|
"Gujarati": "gu", |
|
"Malayalam": "ml" |
|
} |
|
|
|
|
|
audio_language_dict = { |
|
"English": {"code": "en"}, |
|
"Arabic": {"code": "ar"}, |
|
"Hindi": {"code": "hi"}, |
|
"Kannada": {"code": "kn"}, |
|
"Marathi": {"code": "mr"}, |
|
"Telugu": {"code": "te"}, |
|
"Tamil": {"code": "ta"}, |
|
"Gujarati": {"code": "gu"}, |
|
"Malayalam": {"code": "ml"} |
|
} |
|
|
|
def index_text(text: str) -> str: |
|
global indexed_texts, indexed_embeddings |
|
try: |
|
|
|
chunks = text.split('. ') |
|
for chunk in chunks: |
|
if chunk: |
|
embedding = sentence_model.encode([chunk])[0] |
|
indexed_texts.append(chunk) |
|
indexed_embeddings.append(embedding) |
|
return f"Text indexed successfully. Total indexed chunks: {len(indexed_texts)}" |
|
except Exception as e: |
|
return f"Error indexing text: {str(e)}" |
|
|
|
def clear_index() -> str: |
|
global indexed_texts, indexed_embeddings |
|
indexed_texts.clear() |
|
indexed_embeddings.clear() |
|
return "Index cleared successfully. Ready for new indexing." |
|
|
|
def find_most_similar(query: str, top_k: int = 3) -> list: |
|
if not indexed_texts: |
|
return ["No indexed text available."] |
|
query_embedding = sentence_model.encode([query])[0] |
|
similarities = [np.dot(query_embedding, doc_embedding) for doc_embedding in indexed_embeddings] |
|
top_indices = np.argsort(similarities)[-top_k:][::-1] |
|
return [indexed_texts[i] for i in top_indices] |
|
|
|
def chat_with_context(question: str, model: str) -> str: |
|
if not indexed_texts: |
|
return "Please index some text first." |
|
|
|
relevant_contexts = find_most_similar(question, top_k=3) |
|
context = " ".join(relevant_contexts) |
|
|
|
try: |
|
prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:" |
|
chat_completion = groq_client.chat.completions.create( |
|
messages=[ |
|
{ |
|
"role": "user", |
|
"content": prompt, |
|
} |
|
], |
|
model=model, |
|
max_tokens=500 |
|
) |
|
return chat_completion.choices[0].message.content |
|
except Exception as e: |
|
logging.error(f"Error in chat: {str(e)}") |
|
return f"Error in chat: {str(e)}" |
|
|
|
|
|
def translate_text(text, target_lang_code): |
|
try: |
|
translator = GoogleTranslator(source='auto', target=target_lang_code) |
|
return translator.translate(text) |
|
except Exception as e: |
|
return f"Translation Error: {str(e)}" |
|
|
|
|
|
def google_tts(text, lang): |
|
try: |
|
tts = gTTS(text=text, lang=lang, slow=False) |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: |
|
tts.save(temp_audio.name) |
|
return temp_audio.name, f"Speech generated with Google TTS using {lang} language" |
|
except Exception as e: |
|
return None, f"Error in Google TTS: {str(e)}" |
|
|
|
with gr.Blocks() as iface: |
|
gr.Markdown("# Free Text-to-Speech Tool with Language Translation and Chat") |
|
|
|
with gr.Row(): |
|
text_input = gr.Textbox(label="Enter text for translation and speech generation", lines=3) |
|
|
|
with gr.Row(): |
|
translation_lang_dropdown = gr.Dropdown(list(translation_languages.keys()), label="Select Translation Language", value="English") |
|
convert_button = gr.Button("Convert") |
|
|
|
translated_text = gr.Textbox(label="Translated Text") |
|
|
|
with gr.Row(): |
|
index_button = gr.Button("Index") |
|
clear_index_button = gr.Button("Clear Index") |
|
|
|
index_status = gr.Textbox(label="Indexing Status") |
|
|
|
use_chat = gr.Checkbox(label="Use Chat for TTS input", value=False) |
|
|
|
chat_group = gr.Group(visible=False) |
|
with chat_group: |
|
chat_input = gr.Textbox(label="Ask a question about the indexed text") |
|
chat_model = gr.Dropdown( |
|
choices=["llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], |
|
label="Select Chat Model", |
|
value="llama3-70b-8192" |
|
) |
|
chat_button = gr.Button("Ask") |
|
chat_output = gr.Textbox(label="Answer", interactive=False) |
|
|
|
with gr.Group() as tts_options: |
|
audio_lang_dropdown = gr.Dropdown(list(audio_language_dict.keys()), label="Select Audio Language", value="English") |
|
|
|
generate_button = gr.Button("Generate Speech") |
|
audio_output = gr.Audio(label="Generated Speech") |
|
message_output = gr.Textbox(label="Message") |
|
|
|
def update_chat_visibility(use_chat): |
|
return gr.update(visible=use_chat) |
|
|
|
def convert_text(text, translation_lang): |
|
target_code = translation_languages[translation_lang] |
|
translated = translate_text(text, target_code) |
|
return translated |
|
|
|
def generate_speech(text, audio_lang, use_chat, chat_output): |
|
if use_chat and chat_output: |
|
text = chat_output |
|
logging.info(f"Generating speech: lang={audio_lang}") |
|
try: |
|
return google_tts(text, audio_language_dict[audio_lang]["code"]) |
|
except Exception as e: |
|
logging.error(f"Error generating speech: {str(e)}") |
|
return None, f"Error generating speech: {str(e)}" |
|
|
|
convert_button.click(convert_text, inputs=[text_input, translation_lang_dropdown], outputs=translated_text) |
|
index_button.click(index_text, inputs=[translated_text], outputs=[index_status]) |
|
clear_index_button.click(clear_index, outputs=[index_status]) |
|
use_chat.change(update_chat_visibility, inputs=[use_chat], outputs=[chat_group]) |
|
chat_button.click(chat_with_context, inputs=[chat_input, chat_model], outputs=[chat_output]) |
|
|
|
generate_button.click( |
|
generate_speech, |
|
inputs=[translated_text, audio_lang_dropdown, use_chat, chat_output], |
|
outputs=[audio_output, message_output] |
|
) |
|
|
|
iface.launch() |