hey-llama-code-editor

Running on CPU Upgrade

App Files Files Community

freddyaboulton HF staff commited on Dec 12, 2024

Commit

d52ab93

verified ·

1 Parent(s): 6660df8

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -43

app.py CHANGED Viewed

@@ -1,43 +1,45 @@
 import gradio as gr
-from gradio_webrtc import WebRTC, ReplyOnPause, AdditionalOutputs
 import numpy as np
 import os
-from twilio.rest import Client
 import base64
 import openai
 import re
-from huggingface_hub import InferenceClient
-from pydub import AudioSegment
-import io
 from dotenv import load_dotenv
 load_dotenv()
-hf_client = InferenceClient()
 spinner_html = open("spinner.html").read()
-account_sid = os.environ.get("TWILIO_ACCOUNT_SID")
-auth_token = os.environ.get("TWILIO_AUTH_TOKEN")
-if account_sid and auth_token:
-    client = Client(account_sid, auth_token)
-    token = client.tokens.create()
-    rtc_configuration = {
-        "iceServers": token.ice_servers,
-        "iceTransportPolicy": "relay",
-    }
-else:
-    rtc_configuration = None
 client = openai.OpenAI(
     api_key=os.environ.get("SAMBANOVA_API_KEY"),
     base_url="https://api.sambanova.ai/v1",
 )
 system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
 user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
@@ -50,24 +52,6 @@ def extract_html_content(text):
     return match.group(0) if match else None
-def audio_to_bytes(audio: tuple[int, np.ndarray]):
-    audio_segment = AudioSegment(
-        audio[1].squeeze().tobytes(),
-        frame_rate=audio[0],
-        sample_width=audio[1].dtype.itemsize,
-        channels=1
-    )
-    # Export the audio segment to MP3 bytes - use a high bitrate to maximise quality
-    mp3_io = io.BytesIO()
-    audio_segment.export(mp3_io, format="mp3", bitrate="320k")
-    # Get the MP3 bytes
-    mp3_bytes = mp3_io.getvalue()
-    mp3_io.close()
-    return mp3_bytes
 def display_in_sandbox(code):
     encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8')
     data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
@@ -79,7 +63,14 @@ def generate(user_message: tuple[int, np.ndarray],
              code: str):
     yield AdditionalOutputs(history, spinner_html)
-    text = hf_client.automatic_speech_recognition(audio_to_bytes(user_message)).text
     user_msg_formatted = user_prompt.format(user_message=text, code=code)
     history.append({"role": "user", "content": user_msg_formatted})
@@ -104,13 +95,10 @@ with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
             gr.HTML(
                 """
                 <h1 style='text-align: center'>
-                Llama Code Editor
                 </h1>
-                <h2 style='text-align: center'>
-                Powered by SambaNova and Gradio-WebRTC ⚡️
-                </h2>
                 <p style='text-align: center'>
-                Create and edit single-file HTML applications with just your voice!
                 </p>
                 <p style='text-align: center'>
                 Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
@@ -128,7 +116,9 @@ with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
                 with gr.Tab("Chat"):
                     cb = gr.Chatbot(type="messages")
-    webrtc.stream(ReplyOnPause(generate),
                   inputs=[webrtc, history, code],
                   outputs=[webrtc], time_limit=90,
                   concurrency_limit=10)

 import gradio as gr
+from gradio_webrtc import WebRTC, ReplyOnStopWords, AdditionalOutputs, audio_to_bytes
 import numpy as np
 import os
 import base64
 import openai
 import re
+from groq import Groq
 from dotenv import load_dotenv
 load_dotenv()
 spinner_html = open("spinner.html").read()
+rtc_configuration = None
+print("rtc_configuration", rtc_configuration)
+import logging
+# Configure the root logger to WARNING to suppress debug messages from other libraries
+logging.basicConfig(level=logging.WARNING)
+# Create a console handler
+console_handler = logging.FileHandler("gradio_webrtc.log")
+console_handler.setLevel(logging.DEBUG)
+# Create a formatter
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+console_handler.setFormatter(formatter)
+# Configure the logger for your specific library
+logger = logging.getLogger("gradio_webrtc")
+logger.setLevel(logging.DEBUG)
+logger.addHandler(console_handler)
 client = openai.OpenAI(
     api_key=os.environ.get("SAMBANOVA_API_KEY"),
     base_url="https://api.sambanova.ai/v1",
 )
+groq_client = Groq()
 system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
 user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"
     return match.group(0) if match else None
 def display_in_sandbox(code):
     encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8')
     data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
              code: str):
     yield AdditionalOutputs(history, spinner_html)
+    sr, audio = user_message
+    audio = audio.squeeze()
+    text = groq_client.audio.transcriptions.create(
+        file=("audio-file.mp3", audio_to_bytes((sr, audio))),
+        model="whisper-large-v3-turbo",
+        response_format="verbose_json",
+    ).text
     user_msg_formatted = user_prompt.format(user_message=text, code=code)
     history.append({"role": "user", "content": user_msg_formatted})
             gr.HTML(
                 """
                 <h1 style='text-align: center'>
+                Hello Llama! 🦙
                 </h1>
                 <p style='text-align: center'>
+                Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question.
                 </p>
                 <p style='text-align: center'>
                 Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
                 with gr.Tab("Chat"):
                     cb = gr.Chatbot(type="messages")
+    webrtc.stream(ReplyOnStopWords(generate,
+                                   input_sample_rate=16000,
+                                   stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"]),
                   inputs=[webrtc, history, code],
                   outputs=[webrtc], time_limit=90,
                   concurrency_limit=10)