Spaces:
Sleeping
Sleeping
File size: 2,500 Bytes
86c0140 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import os
import gradio as gr
import whisper
from groq import Groq
from gtts import gTTS
import tempfile
# Load the open-source Whisper model
model = whisper.load_model("base") # Options: "tiny", "base", "small", "medium", "large"
# Set your Groq API key directly
client = Groq(api_key="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")
# Function to transcribe, generate response, and convert to speech
def chat_with_bot(audio_input):
try:
# Step 1: Transcribe audio input using open-source Whisper
try:
result = model.transcribe(audio_input)
user_input = result['text']
except Exception as e:
return "Error during transcription: " + str(e), "", None
# Step 2: Generate response using Groq API with Llama 8B model
try:
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": user_input,
}
],
model="llama3-8b-8192",
)
response_text = chat_completion.choices[0].message.content
except Exception as e:
return "Error during Groq API call: " + str(e), "", None
# Step 3: Convert the response text to speech using gTTS
try:
tts = gTTS(text=response_text, lang='en')
# Save the TTS output to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
tts.save(f.name)
output_audio = f.name
except Exception as e:
return "Error during text-to-speech conversion: " + str(e), "", None
# Step 4: Return the transcription, response, and audio file for display in Gradio UI
return user_input, response_text, output_audio
except Exception as e:
return "An unexpected error occurred: " + str(e), "", None
# Gradio Interface
iface = gr.Interface(
fn=chat_with_bot,
inputs=gr.Audio(type="filepath"), # Use 'type="filepath"' for audio input
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="Response"),
gr.Audio(label="Generated Speech") # Output to replay the generated speech
],
live=True,
title="Real-Time Voice-to-Voice Chatbot",
description="Speak into the microphone to chat with the Llama 8B model via Groq API."
)
# Launch the Gradio Interface
iface.launch() |