hey-buddy / production.html
benjamin-paine's picture
Upload 3 files
4bad033 verified
raw
history blame
16 kB
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Hey, Buddy!</title>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.0/dist/ort.min.js"></script>
<script src="dist/hey-buddy.min.js"></script>
<style>
body {
display: flex;
flex-flow: column nowrap;
justify-content: center;
align-items: center;
height: 100vh;
width: 100vw;
padding: 0;
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif;
background-color: rgb(11,15,25);
color: white
}
h1 {
font-size: 16px;
margin-top: 0;
}
p {
font-size: 15px;
margin-bottom: 10px;
margin-top: 5px;
}
strong, em {
color: #16c8ce;
}
.card {
max-width: 640px;
margin: 0 auto;
padding: 16px;
border: 1px solid rgb(107, 114, 128);
border-radius: 16px;
background-color: rgb(16, 22, 35);
}
.card p:last-child {
margin-bottom: 0;
}
.card img {
width: 100%;
max-width: 420px;
margin: 0 auto;
}
#logo, #links {
display: flex;
flex-flow: row wrap;
justify-content: center;
}
#links {
gap: 1em;
margin: 1em;
}
#links img {
height: 20px;
}
#graphs {
display: flex;
flex-flow: column nowrap;
justify-content: center;
align-items: center;
gap: 1em;
}
label {
display: block;
}
#graphs div {
position: relative;
}
#graphs label {
position: absolute;
right: 0;
top: 0;
max-width: 120px;
text-transform: uppercase;
font-family: monospace;
text-align: right;
padding: 0 4px;
line-height: 20px;
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
border: 1px solid rgba(255,255,255,0.1);
border-top: none;
border-right: none;
}
#graphs .legend {
display: flex;
flex-flow: row wrap;
justify-content: flex-end;
gap: 1px 5px;
text-transform: uppercase;
font-family: monospace;
font-size: 10px;
line-height: 11px;
}
canvas.graph {
border: 1px solid rgba(255,255,255,0.1);
border-bottom: none;
background-image:
repeating-linear-gradient(to top, rgba(255,255,255,0.05), rgba(255,255,255,0.05) 1px, transparent 1px, transparent 10px),
linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
}
#recording {
margin-top: 1em;
position: relative;
display: block;
height: 100px;
line-height: 100px;
text-align: center;
font-size: 11px;
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
border: 1px solid rgba(255,255,255,0.1);
border-bottom-left-radius: 10px;
border-bottom-right-radius: 10px;
}
#recording #audio {
display: flex;
flex-flow: row nowrap;
align-items: center;
justify-content: center;
height: 100%;
}
#recording label {
position: absolute;
right: 0;
top: 0;
max-width: 120px;
text-transform: uppercase;
font-family: monospace;
font-size: 12px;
text-align: right;
padding: 0 4px;
line-height: 20px;
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0));
border: 1px solid rgba(255,255,255,0.1);
border-top: none;
border-right: none;
}
</style>
</head>
<body>
<div class="card">
<section id="logo">
<img src="logo.png" alt="Hey Buddy!" />
</section>
<section id="headline">
<p><strong><em>Hey Buddy!</em></strong> is a library for training wake word models (a.k.a audio keyword spotters) and deploying them to the browser for real-time use on CPU or GPU.</p>
<p>Using a wake-word as a gating mechanism for voice-enabled web applications carries numerous benefits, including reduced power consumption, improved privacy, and enhanced performance in noisy environments over speech-to-text systems.</p>
<p>This space serves as a demonstration of the JavaScript library for front-end applications. Say something like, <em>&ldquo;Hey buddy, how are you?&rdquo;</em> to see the wake word and voice activity detection in action. Your voice command will be isolated as an audio clip, which is then ready to be sent to your application's backend for further processing.</p>
</section>
<section id="links">
<a href="https://github.com/painebenjamin/hey-buddy" target="_blank">
<img src="https://img.shields.io/static/v1?label=painebenjamin&message=hey-buddy&logo=github&color=0b1830" alt="painebenjamin - hey-buddy" />
</a>
<a href="https://huggingface.co/benjamin-paine/hey-buddy" target="_blank">
<img src="https://img.shields.io/static/v1?label=benjamin-paine&message=hey-buddy&logo=huggingface&color=0b1830" alt="painebenjamin - hey-buddy" />
</a>
</section>
<section id="graphs"></section>
<section id="recording">
<label>Recording</label>
<div id="audio">No recording yet</div>
</section>
</div>
</body>
<script>
/** Configuration */
const colors = {
"buddy": [0,119,187],
"hey buddy": [51,187,238],
"hi buddy": [0,153,136],
"sup buddy": [238,119,51],
"yo buddy": [204,51,17],
"okay buddy": [238,51,119],
"speech": [22,200,206],
"frame budget": [25,255,25]
};
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"];
const canvasSize = { width: 640, height: 100 };
const graphLineWidth = 1;
const options = {
debug: true,
modelPath: wakeWords.map((word) => `/models/${word.replace(' ', '-')}.onnx`)
};
/** Helper method for conversion */
const float32ToWavBlob = (audioData, sampleRate, numChannels = 1) => {
// Helper to write a string to the DataView
const writeString = (view, offset, string) => {
for (let i = 0; i < string.length; i++) {
view.setUint8(offset + i, string.charCodeAt(i));
}
};
// Helper to convert Float32Array to Int16Array (16-bit PCM)
const floatTo16BitPCM = (output, offset, input) => {
for (let i = 0; i < input.length; i++, offset += 2) {
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1]
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM
}
};
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample
// Calculate sizes
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio
const wavHeaderSize = 44;
const dataLength = audioData.length * numChannels * 2; // 16-bit PCM data length
const buffer = new ArrayBuffer(wavHeaderSize + dataLength);
const view = new DataView(buffer);
// Write WAV file headers
writeString(view, 0, 'RIFF'); // ChunkID
view.setUint32(4, 36 + dataLength, true); // ChunkSize
writeString(view, 8, 'WAVE'); // Format
writeString(view, 12, 'fmt '); // Subchunk1ID
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16)
view.setUint16(20, 1, true); // AudioFormat (PCM = 1)
view.setUint16(22, numChannels, true); // NumChannels
view.setUint32(24, sampleRate, true); // SampleRate
view.setUint32(28, byteRate, true); // ByteRate
view.setUint16(32, blockAlign, true); // BlockAlign
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM)
writeString(view, 36, 'data'); // Subchunk2ID
view.setUint32(40, dataLength, true); // Subchunk2Size
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView
floatTo16BitPCM(view, wavHeaderSize, audioData);
// Create and return the Blob
return new Blob([view], { type: 'audio/wav' });
}
/** Helper method for turning the audio samples into an audio element */
const saveRecording = (audioContainer, audioSamples, sampleRate = 16000) => {
const blob = float32ToWavBlob(audioSamples, sampleRate);
const url = URL.createObjectURL(blob);
audioContainer.innerHTML = `<audio controls src="${url}"></audio>`;
}
/** DOM elements */
const graphsContainer = document.getElementById("graphs");
const audioContainer = document.getElementById("audio");
/** Memory for drawing */
const graphs = {};
const history = {};
const current = {};
const active = {};
/** Instantiate */
const heyBuddy = new HeyBuddy(options);
/** Add callbacks */
// When processed, update state for next draw
heyBuddy.onProcessed((result) => {
current["frame budget"] = heyBuddy.frameTimeEma;
current["speech"] = result.speech.probability || 0.0;
active["speech"] = result.speech.active;
for (let wakeWord in result.wakeWords) {
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0;
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active;
}
if (result.recording) {
audioContainer.innerHTML = "Recording&hellip;";
}
});
// When recording is complete, replace the audio element
heyBuddy.onRecording((audioSamples) => {
saveRecording(audioContainer, audioSamples);
});
/** Add graphs */
for (let graphName of ["wake words", "speech", "frame budget"]) {
// Create containers for the graph and its label
const graphContainer = document.createElement("div");
const graphLabel = document.createElement("label");
graphLabel.textContent = graphName;
// Create a canvas for the graph
const graphCanvas = document.createElement("canvas");
graphCanvas.className = "graph";
graphCanvas.width = canvasSize.width;
graphCanvas.height = canvasSize.height;
graphs[graphName] = graphCanvas;
// Add the canvas to the container and the container to the document
graphContainer.appendChild(graphCanvas);
graphContainer.appendChild(graphLabel);
graphsContainer.appendChild(graphContainer);
// If this is the wake-word graph, also add legend
if (graphName === "wake words") {
const graphLegend = document.createElement("div");
graphLegend.className = "legend";
for (let wakeWord of wakeWords) {
const legendItem = document.createElement("div");
const [r,g,b] = colors[wakeWord];
legendItem.style.color = `rgb(${r},${g},${b})`;
legendItem.textContent = wakeWord;
graphLegend.appendChild(legendItem);
}
graphLabel.appendChild(graphLegend);
}
}
/** Define draw loop */
const draw = () => {
// Draw speech and model graphs
for (let graphName in graphs) {
const isWakeWords = graphName === "wake words";
const isFrameBudget = graphName === "frame budget";
const subGraphs = isWakeWords ? wakeWords : [graphName];
let isFirst = true;
for (let name of subGraphs) {
// Update history
history[name] = history[name] || [];
if (isFrameBudget) {
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget
} else {
history[name].push(current[name] || 0.0);
}
// Trim history
if (history[name].length > canvasSize.width) {
history[name] = history[name].slice(history[name].length - canvasSize.width);
}
// Draw graph
const canvas = graphs[graphName];
const ctx = canvas.getContext("2d");
const [r,g,b] = colors[name];
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5;
if (isFirst) {
// Clear canvas on first draw
ctx.clearRect(0, 0, canvas.width, canvas.height);
isFirst = false;
}
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`;
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`;
ctx.lineWidth = graphLineWidth;
// Draw from left to right (the frame shifts right to left)
ctx.beginPath();
let lastX;
for (let i = 0; i < history[name].length; i++) {
const x = i;
const y = canvas.height - history[name][i] * canvas.height;
if (i === 0) {
ctx.moveTo(1, y);
} else {
ctx.lineTo(x, y);
}
lastX = x;
}
// extend downwards to make a polygon
ctx.lineTo(lastX, canvas.height);
ctx.lineTo(0, canvas.height);
ctx.closePath();
ctx.fill();
ctx.stroke();
}
}
// Request next frame
requestAnimationFrame(draw);
};
/** Start the loop */
requestAnimationFrame(draw);
</script>
</html>