File size: 2,241 Bytes
55b4bbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import os
import torch
from transformers import pipeline
from gtts import gTTS
import gradio as gr
from groq import Groq

# Load Whisper model from Hugging Face
try:
    pipe = pipeline(model="openai/whisper-small", device="cuda" if torch.cuda.is_available() else "cpu")
except Exception as e:
    print(f"Error loading Whisper model: {e}")
    raise

GROQ_API_KEY = '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'

Client = Groq(api_key=GROQ_API_KEY)

# Function to get response from Groq LLM
def get_llm_response(transcribed_text):
    try:
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": transcribed_text}],
            model="llama3-8b-8192",
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        print(f"Error getting response from LLM: {e}")
        return "Sorry, I couldn't process your request."

# Function to convert text to speech
def text_to_speech(response_text):
    try:
        tts = gTTS(response_text, lang='en')
        tts.save("response_audio.mp3")
        return "response_audio.mp3"  # Returning the file path
    except Exception as e:
        print(f"Error converting text to speech: {e}")
        return "Sorry, I couldn't convert the response to audio."

# Function to handle the entire voice chat process
def voice_chat(audio_input):
    try:
        # Transcribe the input audio using Hugging Face Whisper model
        result = pipe(audio_input)["text"]
        transcribed_text = result
        print(f"Transcribed Text: {transcribed_text}")

        # Get the LLM response
        response_text = get_llm_response(transcribed_text)
        print(f"LLM Response: {response_text}")

        # Convert the response text to speech and return the audio file
        response_audio = text_to_speech(response_text)

        return response_audio
    except Exception as e:
        print(f"Error in voice chat process: {e}")
        return "Sorry, there was an error processing your audio."

# Create the Gradio interface
iface = gr.Interface(
    fn=voice_chat,
    inputs=gr.Audio(type="filepath"),  # Specify input type only
    outputs="audio"
)

# Launch the Gradio interface
iface.launch()