import gradio as gr from gradio_webrtc import ( WebRTC, ReplyOnStopWords, AdditionalOutputs, audio_to_bytes, get_twilio_turn_credentials, ) import numpy as np import base64 import re from groq import Groq from dotenv import load_dotenv load_dotenv() spinner_html = open("spinner.html").read() sandbox_html = open("sandbox.html").read() something_happened_html = open("something_happened.html").read() rtc_configuration = get_twilio_turn_credentials() import logging # Configure the root logger to WARNING to suppress debug messages from other libraries logging.basicConfig(level=logging.WARNING) # Create a console handler console_handler = logging.FileHandler("gradio_webrtc.log") console_handler.setLevel(logging.DEBUG) # Create a formatter formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") console_handler.setFormatter(formatter) # Configure the logger for your specific library logger = logging.getLogger("gradio_webrtc") logger.setLevel(logging.DEBUG) logger.addHandler(console_handler) groq_client = Groq() system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response." user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}" def extract_html_content(text): """ Extract content including HTML tags. """ match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL) return match.group(0) if match else None def display_in_sandbox(code): encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8") data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}" return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>' def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str): yield AdditionalOutputs(history, spinner_html) sr, audio = user_message audio = audio.squeeze() text = groq_client.audio.transcriptions.create( file=("audio-file.mp3", audio_to_bytes((sr, audio))), model="whisper-large-v3-turbo", response_format="verbose_json", ).text user_msg_formatted = user_prompt.format(user_message=text, code=code) history.append({"role": "user", "content": user_msg_formatted}) print("generating response") response = groq_client.chat.completions.create( model="llama-3.3-70b-versatile", messages=history, temperature=1, max_tokens=2048, top_p=1, stream=False, ) print("finished generating response") output = response.choices[0].message.content try: html_code = extract_html_content(output) except Exception as e: html_code = something_happened_html print(e) history.append({"role": "assistant", "content": output}) yield AdditionalOutputs(history, html_code) with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo: history = gr.State([{"role": "system", "content": system_prompt}]) with gr.Row(): with gr.Column(scale=1): gr.HTML( """ <h1 style='text-align: center'> Hello Llama! 🦙 </h1> <p style='text-align: center'> Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question. </p> <p style='text-align: center'> Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation. </p> """ ) webrtc = WebRTC( rtc_configuration=rtc_configuration, mode="send", modality="audio" ) with gr.Column(scale=10): with gr.Tabs(): with gr.Tab("Sandbox"): sandbox = gr.HTML(value=sandbox_html) with gr.Tab("Code"): code = gr.Code( language="html", max_lines=50, interactive=False, elem_classes="code-component", ) with gr.Tab("Chat"): cb = gr.Chatbot(type="messages") webrtc.stream( ReplyOnStopWords( generate, input_sample_rate=16000, stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"], ), inputs=[webrtc, history, code], outputs=[webrtc], time_limit=90, concurrency_limit=10, ) webrtc.on_additional_outputs( lambda history, code: (history, code, history), outputs=[history, code, cb] ) code.change(display_in_sandbox, code, sandbox, queue=False) if __name__ == "__main__": demo.launch()