Spaces:
Running
Running
import os | |
import gradio as gr | |
from groq import Groq | |
import whisper | |
from gtts import gTTS | |
import tempfile | |
# Set up Groq API key | |
os.environ['GROQ_API_KEY'] = 'GROQ_API-KEY' | |
groq_client = Groq(api_key=os.environ.get('GROQ_API_KEY')) | |
# Load Whisper model | |
whisper_model = whisper.load_model("base") | |
def process_audio(audio_file): | |
# Transcribe audio using Whisper | |
result = whisper_model.transcribe(audio_file) | |
user_text = result['text'] | |
# Generate response using Llama 8b model with Groq API | |
chat_completion = groq_client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": user_text, | |
} | |
], | |
model="llama3-8b-8192", | |
) | |
response_text = chat_completion.choices[0].message.content | |
# Convert response text to speech using gTTS | |
tts = gTTS(text=response_text, lang='en') | |
audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') | |
tts.save(audio_file.name) | |
return response_text, audio_file.name | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(type="filepath"), | |
outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")], | |
live=True | |
) | |
iface.launch() |