File size: 2,500 Bytes
86c0140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import gradio as gr
import whisper
from groq import Groq
from gtts import gTTS
import tempfile

# Load the open-source Whisper model
model = whisper.load_model("base")  # Options: "tiny", "base", "small", "medium", "large"

# Set your Groq API key directly
client = Groq(api_key="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")

# Function to transcribe, generate response, and convert to speech
def chat_with_bot(audio_input):
    try:
        # Step 1: Transcribe audio input using open-source Whisper
        try:
            result = model.transcribe(audio_input)
            user_input = result['text']
        except Exception as e:
            return "Error during transcription: " + str(e), "", None

        # Step 2: Generate response using Groq API with Llama 8B model
        try:
            chat_completion = client.chat.completions.create(
                messages=[
                    {
                        "role": "user",
                        "content": user_input,
                    }
                ],
                model="llama3-8b-8192",
            )
            response_text = chat_completion.choices[0].message.content
        except Exception as e:
            return "Error during Groq API call: " + str(e), "", None

        # Step 3: Convert the response text to speech using gTTS
        try:
            tts = gTTS(text=response_text, lang='en')
            # Save the TTS output to a temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
                tts.save(f.name)
                output_audio = f.name
        except Exception as e:
            return "Error during text-to-speech conversion: " + str(e), "", None

        # Step 4: Return the transcription, response, and audio file for display in Gradio UI
        return user_input, response_text, output_audio

    except Exception as e:
        return "An unexpected error occurred: " + str(e), "", None

# Gradio Interface
iface = gr.Interface(
    fn=chat_with_bot,
    inputs=gr.Audio(type="filepath"),  # Use 'type="filepath"' for audio input
    outputs=[
        gr.Textbox(label="Transcription"),
        gr.Textbox(label="Response"),
        gr.Audio(label="Generated Speech")  # Output to replay the generated speech
    ],
    live=True,
    title="Real-Time Voice-to-Voice Chatbot",
    description="Speak into the microphone to chat with the Llama 8B model via Groq API."
)

# Launch the Gradio Interface
iface.launch()