hey-llama-code-editor

Running on CPU Upgrade

App Files Files Community

hey-llama-code-editor / app.py

freddyaboulton HF staff

Fix errors

aca1d5b 6 days ago

raw

history blame contribute delete

5.11 kB

	import gradio as gr
	from gradio_webrtc import (
	WebRTC,
	ReplyOnStopWords,
	AdditionalOutputs,
	audio_to_bytes,
	get_twilio_turn_credentials,
	)
	import numpy as np
	import base64
	import re
	from groq import Groq

	from dotenv import load_dotenv

	load_dotenv()

	spinner_html = open("spinner.html").read()
	sandbox_html = open("sandbox.html").read()
	something_happened_html = open("something_happened.html").read()

	rtc_configuration = get_twilio_turn_credentials()


	import logging

	# Configure the root logger to WARNING to suppress debug messages from other libraries
	logging.basicConfig(level=logging.WARNING)

	# Create a console handler
	console_handler = logging.FileHandler("gradio_webrtc.log")
	console_handler.setLevel(logging.DEBUG)

	# Create a formatter
	formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
	console_handler.setFormatter(formatter)

	# Configure the logger for your specific library
	logger = logging.getLogger("gradio_webrtc")
	logger.setLevel(logging.DEBUG)
	logger.addHandler(console_handler)


	groq_client = Groq()

	system_prompt = "You are an AI coding assistant. Your task is to write single-file HTML applications based on a user's request. Only return the necessary code. Include all necessary imports and styles. You may also be asked to edit your original response."
	user_prompt = "Please write a single-file HTML application to fulfill the following request.\nThe message:{user_message}\nCurrent code you have written:{code}"


	def extract_html_content(text):
	"""
	Extract content including HTML tags.
	"""
	match = re.search(r"<!DOCTYPE html>.*?</html>", text, re.DOTALL)
	return match.group(0) if match else None


	def display_in_sandbox(code):
	encoded_html = base64.b64encode(code.encode("utf-8")).decode("utf-8")
	data_uri = f"data:text/html;charset=utf-8;base64,{encoded_html}"
	return f'<iframe src="{data_uri}" width="100%" height="600px"></iframe>'


	def generate(user_message: tuple[int, np.ndarray], history: list[dict], code: str):
	yield AdditionalOutputs(history, spinner_html)

	sr, audio = user_message
	audio = audio.squeeze()

	text = groq_client.audio.transcriptions.create(
	file=("audio-file.mp3", audio_to_bytes((sr, audio))),
	model="whisper-large-v3-turbo",
	response_format="verbose_json",
	).text

	user_msg_formatted = user_prompt.format(user_message=text, code=code)
	history.append({"role": "user", "content": user_msg_formatted})

	print("generating response")
	response = groq_client.chat.completions.create(
	model="llama-3.3-70b-versatile",
	messages=history,
	temperature=1,
	max_tokens=2048,
	top_p=1,
	stream=False,
	)
	print("finished generating response")

	output = response.choices[0].message.content
	try:
	html_code = extract_html_content(output)
	except Exception as e:
	html_code = something_happened_html
	print(e)
	history.append({"role": "assistant", "content": output})
	yield AdditionalOutputs(history, html_code)


	with gr.Blocks(css=".code-component {max-height: 500px !important}") as demo:
	history = gr.State([{"role": "system", "content": system_prompt}])
	with gr.Row():
	with gr.Column(scale=1):
	gr.HTML(
	"""
	<h1 style='text-align: center'>
	Hello Llama! 🦙
	</h1>
	<p style='text-align: center'>
	Create and edit single-file HTML applications with just your voice! After recording, say "Hey Llama" and wait for confirmation, before asking your question.
	</p>
	<p style='text-align: center'>
	Each conversation is limited to 90 seconds. Once the time limit is up you can rejoin the conversation.
	</p>
	"""
	)
	webrtc = WebRTC(
	rtc_configuration=rtc_configuration, mode="send", modality="audio"
	)
	with gr.Column(scale=10):
	with gr.Tabs():
	with gr.Tab("Sandbox"):
	sandbox = gr.HTML(value=sandbox_html)
	with gr.Tab("Code"):
	code = gr.Code(
	language="html",
	max_lines=50,
	interactive=False,
	elem_classes="code-component",
	)
	with gr.Tab("Chat"):
	cb = gr.Chatbot(type="messages")

	webrtc.stream(
	ReplyOnStopWords(
	generate,
	input_sample_rate=16000,
	stop_words=["hello llama", "hello lama", "hello lamma", "hello llamma"],
	),
	inputs=[webrtc, history, code],
	outputs=[webrtc],
	time_limit=90,
	concurrency_limit=10,
	)
	webrtc.on_additional_outputs(
	lambda history, code: (history, code, history), outputs=[history, code, cb]
	)
	code.change(display_in_sandbox, code, sandbox, queue=False)

	if __name__ == "__main__":
	demo.launch()