import gradio as gr from gradio_webrtc import WebRTC, ReplyOnStopWords, AdditionalOutputs, audio_to_bytes import numpy as np import base64 import re from groq import Groq from dotenv import load_dotenv load_dotenv() spinner_html = open("spinner.html").read() rtc_configuration = None print("rtc_configuration", rtc_configuration) import logging # Configure the root logger to WARNING to suppress debug messages from other libraries logging.basicConfig(level=logging.WARNING) # Create a console handler console_handler = logging.FileHandler("gradio_webrtc.log") console_handler.setLevel(logging.DEBUG) # Create a formatter formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console_handler.setFormatter(formatter) # Configure the logger for your specific library logger = logging.getLogger("gradio_webrtc") logger.setLevel(logging.DEBUG) logger.addHandler(console_handler) groq_client = Groq() system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response." user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" def extract_html_content(text): """ Extract content including HTML tags. """ match = re.search(r'.*?', text, re.DOTALL) return match.group(0) if match else None def display_in_sandbox(code): encoded_html = base64.b64encode(code.encode('utf-8')).decode('utf-8') data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" return f"" def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str): yield AdditionalOutputs(history, spinner_html) sr, audio = user_message audio = audio.squeeze() text = groq_client.audio.transcriptions.create( file=("audio-file.mp3", audio_to_bytes((sr, audio))), model="whisper-large-v3-turbo", response_format="verbose_json", ).text user_msg_formatted = user_prompt.format(user_message=text, code=code) history.append({"role": "user", "content": user_msg_formatted}) print("generating response") response = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=history, temperature=1, max_tokens=1024, top_p=1, stream=False, ) print("finished generating response") output = response.choices[0].message.content html_code = extract_html_content(output) history.append({"role": "assistant", "content": output}) yield AdditionalOutputs(history, html_code) with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo: history = gr.State([{"role": "system", "content": system_prompt}]) with gr.Row(): with gr.Column(scale=1): gr.HTML( """

Hello Llama! 🦙

Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question.

Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.

""" ) webrtc = WebRTC(rtc_configuration=rtc_configuration, mode="send", modality="audio") with gr.Column(scale=10): with gr.Tabs(): with gr.Tab("Sandbox"): sandbox = gr.HTML(value=open("sandbox.html").read()) with gr.Tab("Code"): code = gr.Code(language="html", max_lines=50, interactive=False, elem_classes="code-component") with gr.Tab("Chat"): cb = gr.Chatbot(type="messages") webrtc.stream(ReplyOnStopWords(generate, input_sample_rate=16000, stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"]), inputs=[webrtc, history, code], outputs=[webrtc], time_limit=90, concurrency_limit=10) webrtc.on_additional_outputs(lambda history, code: (history, code, history), outputs=[history, code, cb]) code.change(display_in_sandbox, code, sandbox, queue=False) if __name__ == "__main__": demo.launch()