Spaces:

benjamin-paine
/

hey-buddy

Running

App Files Files Community

hey-buddy / production.html

benjamin-paine

Upload 3 files

4bad033 verified about 1 month ago

raw

history blame

16 kB

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Hey, Buddy!</title>
	<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.0/dist/ort.min.js"></script>
	<script src="dist/hey-buddy.min.js"></script>
	<style>
	body {
	display: flex;
	flex-flow: column nowrap;
	justify-content: center;
	align-items: center;
	height: 100vh;
	width: 100vw;
	padding: 0;
	margin: 0;
	font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
	background-color: rgb(11,15,25);
	color: white
	}

	h1 {
	font-size: 16px;
	margin-top: 0;
	}

	p {
	font-size: 15px;
	margin-bottom: 10px;
	margin-top: 5px;
	}

	strong, em {
	color: #16c8ce;
	}

	.card {
	max-width: 640px;
	margin: 0 auto;
	padding: 16px;
	border: 1px solid rgb(107, 114, 128);
	border-radius: 16px;
	background-color: rgb(16, 22, 35);
	}

	.card p:last-child {
	margin-bottom: 0;
	}

	.card img {
	width: 100%;
	max-width: 420px;
	margin: 0 auto;
	}

	#logo, #links {
	display: flex;
	flex-flow: row wrap;
	justify-content: center;
	}

	#links {
	gap: 1em;
	margin: 1em;
	}

	#links img {
	height: 20px;
	}

	#graphs {
	display: flex;
	flex-flow: column nowrap;
	justify-content: center;
	align-items: center;
	gap: 1em;
	}

	label {
	display: block;
	}

	#graphs div {
	position: relative;
	}

	#graphs label {
	position: absolute;
	right: 0;
	top: 0;
	max-width: 120px;
	text-transform: uppercase;
	font-family: monospace;
	text-align: right;
	padding: 0 4px;
	line-height: 20px;
	background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
	border: 1px solid rgba(255,255,255,0.1);
	border-top: none;
	border-right: none;
	}

	#graphs .legend {
	display: flex;
	flex-flow: row wrap;
	justify-content: flex-end;
	gap: 1px 5px;
	text-transform: uppercase;
	font-family: monospace;
	font-size: 10px;
	line-height: 11px;
	}

	canvas.graph {
	border: 1px solid rgba(255,255,255,0.1);
	border-bottom: none;
	background-image:
	repeating-linear-gradient(to top, rgba(255,255,255,0.05), rgba(255,255,255,0.05) 1px, transparent 1px, transparent 10px),
	linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
	}

	#recording {
	margin-top: 1em;
	position: relative;
	display: block;
	height: 100px;
	line-height: 100px;
	text-align: center;
	font-size: 11px;
	background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
	border: 1px solid rgba(255,255,255,0.1);
	border-bottom-left-radius: 10px;
	border-bottom-right-radius: 10px;
	}

	#recording #audio {
	display: flex;
	flex-flow: row nowrap;
	align-items: center;
	justify-content: center;
	height: 100%;
	}

	#recording label {
	position: absolute;
	right: 0;
	top: 0;
	max-width: 120px;
	text-transform: uppercase;
	font-family: monospace;
	font-size: 12px;
	text-align: right;
	padding: 0 4px;
	line-height: 20px;
	background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
	border: 1px solid rgba(255,255,255,0.1);
	border-top: none;
	border-right: none;
	}
	</style>
	</head>
	<body>
	<div class="card">
	<section id="logo">
	<img src="logo.png" alt="Hey Buddy!" />
	</section>
	<section id="headline">
	<p><strong><em>Hey Buddy!</em></strong> is a library for training wake word models (a.k.a audio keyword spotters) and deploying them to the browser for real-time use on CPU or GPU.</p>
	<p>Using a wake-word as a gating mechanism for voice-enabled web applications carries numerous benefits, including reduced power consumption, improved privacy, and enhanced performance in noisy environments over speech-to-text systems.</p>
	<p>This space serves as a demonstration of the JavaScript library for front-end applications. Say something like, <em>“Hey buddy, how are you?”</em> to see the wake word and voice activity detection in action. Your voice command will be isolated as an audio clip, which is then ready to be sent to your application's backend for further processing.</p>
	</section>
	<section id="links">
	<a href="https://github.com/painebenjamin/hey-buddy" target="_blank">
	<img src="https://img.shields.io/static/v1?label=painebenjamin&message=hey-buddy&logo=github&color=0b1830" alt="painebenjamin - hey-buddy" />
	</a>
	<a href="https://huggingface.co/benjamin-paine/hey-buddy" target="_blank">
	<img src="https://img.shields.io/static/v1?label=benjamin-paine&message=hey-buddy&logo=huggingface&color=0b1830" alt="painebenjamin - hey-buddy" />
	</a>
	</section>
	<section id="graphs"></section>
	<section id="recording">
	<label>Recording</label>
	<div id="audio">No recording yet</div>
	</section>
	</div>
	</body>
	<script>
	/** Configuration */
	const colors = {
	"buddy": [0,119,187],
	"hey buddy": [51,187,238],
	"hi buddy": [0,153,136],
	"sup buddy": [238,119,51],
	"yo buddy": [204,51,17],
	"okay buddy": [238,51,119],
	"speech": [22,200,206],
	"frame budget": [25,255,25]
	};
	const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"];
	const canvasSize = { width: 640, height: 100 };
	const graphLineWidth = 1;
	const options = {
	debug: true,
	modelPath: wakeWords.map((word) => `/models/${word.replace(' ', '-')}.onnx`)
	};

	/** Helper method for conversion */
	const float32ToWavBlob = (audioData, sampleRate, numChannels = 1) => {
	// Helper to write a string to the DataView
	const writeString = (view, offset, string) => {
	for (let i = 0; i < string.length; i++) {
	view.setUint8(offset + i, string.charCodeAt(i));
	}
	};

	// Helper to convert Float32Array to Int16Array (16-bit PCM)
	const floatTo16BitPCM = (output, offset, input) => {
	for (let i = 0; i < input.length; i++, offset += 2) {
	let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
	output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
	}
	};

	const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample

	// Calculate sizes
	const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
	const wavHeaderSize = 44;
	const dataLength = audioData.length * numChannels * 2; // 16-bit PCM data length
	const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
	const view = new DataView(buffer);

	// Write WAV file headers
	writeString(view, 0, 'RIFF'); // ChunkID
	view.setUint32(4, 36 + dataLength, true); // ChunkSize
	writeString(view, 8, 'WAVE'); // Format
	writeString(view, 12, 'fmt '); // Subchunk1ID
	view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
	view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
	view.setUint16(22, numChannels, true); // NumChannels
	view.setUint32(24, sampleRate, true); // SampleRate
	view.setUint32(28, byteRate, true); // ByteRate
	view.setUint16(32, blockAlign, true); // BlockAlign
	view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
	writeString(view, 36, 'data'); // Subchunk2ID
	view.setUint32(40, dataLength, true); // Subchunk2Size

	// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
	floatTo16BitPCM(view, wavHeaderSize, audioData);

	// Create and return the Blob
	return new Blob([view], { type: 'audio/wav' });
	}

	/** Helper method for turning the audio samples into an audio element */
	const saveRecording = (audioContainer, audioSamples, sampleRate = 16000) => {
	const blob = float32ToWavBlob(audioSamples, sampleRate);
	const url = URL.createObjectURL(blob);
	audioContainer.innerHTML = `<audio controls src="${url}"></audio>`;
	}

	/** DOM elements */
	const graphsContainer = document.getElementById("graphs");
	const audioContainer = document.getElementById("audio");

	/** Memory for drawing */
	const graphs = {};
	const history = {};
	const current = {};
	const active = {};

	/** Instantiate */
	const heyBuddy = new HeyBuddy(options);

	/** Add callbacks */

	// When processed, update state for next draw
	heyBuddy.onProcessed((result) => {
	current["frame budget"] = heyBuddy.frameTimeEma;
	current["speech"] = result.speech.probability \|\| 0.0;
	active["speech"] = result.speech.active;
	for (let wakeWord in result.wakeWords) {
	current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability \|\| 0.0;
	active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
	}
	if (result.recording) {
	audioContainer.innerHTML = "Recording…";
	}
	});

	// When recording is complete, replace the audio element
	heyBuddy.onRecording((audioSamples) => {
	saveRecording(audioContainer, audioSamples);
	});

	/** Add graphs */
	for (let graphName of ["wake words", "speech", "frame budget"]) {
	// Create containers for the graph and its label
	const graphContainer = document.createElement("div");
	const graphLabel = document.createElement("label");
	graphLabel.textContent = graphName;

	// Create a canvas for the graph
	const graphCanvas = document.createElement("canvas");
	graphCanvas.className = "graph";
	graphCanvas.width = canvasSize.width;
	graphCanvas.height = canvasSize.height;
	graphs[graphName] = graphCanvas;

	// Add the canvas to the container and the container to the document
	graphContainer.appendChild(graphCanvas);
	graphContainer.appendChild(graphLabel);
	graphsContainer.appendChild(graphContainer);

	// If this is the wake-word graph, also add legend
	if (graphName === "wake words") {
	const graphLegend = document.createElement("div");
	graphLegend.className = "legend";
	for (let wakeWord of wakeWords) {
	const legendItem = document.createElement("div");
	const [r,g,b] = colors[wakeWord];
	legendItem.style.color = `rgb(${r},${g},${b})`;
	legendItem.textContent = wakeWord;
	graphLegend.appendChild(legendItem);
	}
	graphLabel.appendChild(graphLegend);
	}
	}

	/** Define draw loop */
	const draw = () => {
	// Draw speech and model graphs
	for (let graphName in graphs) {
	const isWakeWords = graphName === "wake words";
	const isFrameBudget = graphName === "frame budget";
	const subGraphs = isWakeWords ? wakeWords : [graphName];

	let isFirst = true;
	for (let name of subGraphs) {
	// Update history
	history[name] = history[name] \|\| [];
	if (isFrameBudget) {
	history[name].push((current[name] \|\| 0.0) / 120.0); // 120ms budget
	} else {
	history[name].push(current[name] \|\| 0.0);
	}

	// Trim history
	if (history[name].length > canvasSize.width) {
	history[name] = history[name].slice(history[name].length - canvasSize.width);
	}

	// Draw graph
	const canvas = graphs[graphName];
	const ctx = canvas.getContext("2d");
	const [r,g,b] = colors[name];
	const opacity = isFrameBudget \|\| active[name] ? 1.0 : 0.5;

	if (isFirst) {
	// Clear canvas on first draw
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	isFirst = false;
	}

	ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
	ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
	ctx.lineWidth = graphLineWidth;

	// Draw from left to right (the frame shifts right to left)
	ctx.beginPath();
	let lastX;
	for (let i = 0; i < history[name].length; i++) {
	const x = i;
	const y = canvas.height - history[name][i] * canvas.height;
	if (i === 0) {
	ctx.moveTo(1, y);
	} else {
	ctx.lineTo(x, y);
	}
	lastX = x;
	}
	// extend downwards to make a polygon
	ctx.lineTo(lastX, canvas.height);
	ctx.lineTo(0, canvas.height);
	ctx.closePath();
	ctx.fill();
	ctx.stroke();
	}
	}

	// Request next frame
	requestAnimationFrame(draw);
	};

	/** Start the loop */
	requestAnimationFrame(draw);
	</script>
	</html>