import os import gradio as gr import whisper from groq import Groq from gtts import gTTS import tempfile # Load the open-source Whisper model model = whisper.load_model("base") # Options: "tiny", "base", "small", "medium", "large" # Set your Groq API key directly client = Groq(api_key="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") # Function to transcribe, generate response, and convert to speech def chat_with_bot(audio_input): try: # Step 1: Transcribe audio input using open-source Whisper try: result = model.transcribe(audio_input) user_input = result['text'] except Exception as e: return "Error during transcription: " + str(e), "", None # Step 2: Generate response using Groq API with Llama 8B model try: chat_completion = client.chat.completions.create( messages=[ { "role": "user", "content": user_input, } ], model="llama3-8b-8192", ) response_text = chat_completion.choices[0].message.content except Exception as e: return "Error during Groq API call: " + str(e), "", None # Step 3: Convert the response text to speech using gTTS try: tts = gTTS(text=response_text, lang='en') # Save the TTS output to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f: tts.save(f.name) output_audio = f.name except Exception as e: return "Error during text-to-speech conversion: " + str(e), "", None # Step 4: Return the transcription, response, and audio file for display in Gradio UI return user_input, response_text, output_audio except Exception as e: return "An unexpected error occurred: " + str(e), "", None # Gradio Interface iface = gr.Interface( fn=chat_with_bot, inputs=gr.Audio(type="filepath"), # Use 'type="filepath"' for audio input outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Response"), gr.Audio(label="Generated Speech") # Output to replay the generated speech ], live=True, title="Real-Time Voice-to-Voice Chatbot", description="Speak into the microphone to chat with the Llama 8B model via Groq API." ) # Launch the Gradio Interface iface.launch()