LiveVoice / app.py
forbiddensoul90's picture
Update app.py
c63e100 verified
import subprocess
import sys
# Install dependencies from apt_requirements.txt
def install_apt_dependencies():
try:
subprocess.check_call(['sudo', 'apt-get', 'install', '-y'] + [line.strip() for line in open('BLAH.txt').readlines()])
print("Apt dependencies installed successfully.")
except Exception as e:
print("Failed to install apt dependencies:", str(e))
sys.exit(1)
# Install dependencies from requirements.txt
def install_pip_dependencies():
try:
subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-r', 'BLAHBLAH.txt'])
print("Pip dependencies installed successfully.")
except Exception as e:
print("Failed to install pip dependencies:", str(e))
sys.exit(1)
# Install dependencies
install_apt_dependencies()
install_pip_dependencies()
import streamlit as st
import os
import time
import pvporcupine
import pyaudio
import struct
import wave
from elevenlabs import stream
from groq import Groq
import threading
# Initialize Groq client
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Initialize ElevenLabs client
from elevenlabs.client import ElevenLabs
eleven_client = ElevenLabs(api_key=os.environ.get("ELEVEN_API_KEY"))
# Porcupine wake word detector
porcupine = pvporcupine.create(
access_key=os.environ.get("PORCUPINE_ACCESS_KEY"),
keywords=["grapefruit"]
)
# Audio recording parameters
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
def record_audio(duration):
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
frames = []
for _ in range(0, int(RATE / CHUNK * duration)):
data = stream.read(CHUNK)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
return b''.join(frames)
def save_audio(audio_data, filename):
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(pyaudio.PyAudio().get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(audio_data)
wf.close()
def transcribe_audio(filename):
with open(filename, "rb") as file:
transcription = groq_client.audio.transcriptions.create(
file=(filename, file.read()),
model="whisper-large-v3",
response_format="verbose_json",
)
return transcription.text
def get_ai_response(message):
chat_completion = groq_client.chat.completions.create(
messages=[
{
"role": "user",
"content": message,
}
],
model="llama-3.1-70b-versatile",
)
return chat_completion.choices[0].message.content
def text_to_speech(text):
audio_stream = eleven_client.generate(
text=text,
stream=True
)
stream(audio_stream)
def listen_for_wake_word():
pa = pyaudio.PyAudio()
audio_stream = pa.open(
rate=porcupine.sample_rate,
channels=1,
format=pyaudio.paInt16,
input=True,
frames_per_buffer=porcupine.frame_length
)
while True:
pcm = audio_stream.read(porcupine.frame_length)
pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)
keyword_index = porcupine.process(pcm)
if keyword_index >= 0:
print("Wake word detected!")
return True
def main():
st.title("Voice Conversation App with Wake Word")
if st.button("Start Conversation"):
st.write("Listening for wake word 'computer'...")
wake_word_detected = listen_for_wake_word()
if wake_word_detected:
st.write("Wake word detected! Starting conversation...")
while True:
st.write("Listening for your message...")
audio_data = record_audio(5) # Record for 5 seconds
save_audio(audio_data, "user_message.wav")
user_message = transcribe_audio("user_message.wav")
st.write(f"You said: {user_message}")
ai_response = get_ai_response(user_message)
st.write(f"AI response: {ai_response}")
text_to_speech(ai_response)
time.sleep(2) # Short pause before next turn
if __name__ == "__main__":
main()