Spaces:

Ammar-khan
/

Vocalinteraction

Running

Vocalinteraction / app.py

Update app.py

a64b6c9 verified 4 months ago

1.23 kB

	import os
	import gradio as gr
	from groq import Groq
	import whisper
	from gtts import gTTS
	import tempfile

	# Set up Groq API key
	groq_api_key = os.getenv('GROQ_API_KEY')
	groq_client = Groq(api_key=groq_api_key)

	# Load Whisper model
	whisper_model = whisper.load_model("base")

	def process_audio(audio_file):
	# Transcribe audio using Whisper
	result = whisper_model.transcribe(audio_file)
	user_text = result['text']

	# Generate response using Llama 8b model with Groq API
	chat_completion = groq_client.chat.completions.create(
	messages=[
	{
	"role": "user",
	"content": user_text,
	}
	],
	model="llama3-8b-8192",
	)
	response_text = chat_completion.choices[0].message.content

	# Convert response text to speech using gTTS
	tts = gTTS(text=response_text, lang='en')
	audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
	tts.save(audio_file.name)

	return response_text, audio_file.name

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_audio,
	inputs=gr.Audio(type="filepath"),
	outputs=[gr.Textbox(label="Response"), gr.Audio(label="Response Audio")],
	live=True
	)

	iface.launch()