Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Hey, Buddy!</title> | |
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web@1.19.0/dist/ort.min.js"></script> | |
<script src="dist/hey-buddy.min.js"></script> | |
<style> | |
body { | |
display: flex; | |
flex-flow: column nowrap; | |
justify-content: center; | |
align-items: center; | |
height: 100vh; | |
width: 100vw; | |
padding: 0; | |
margin: 0; | |
font-family: -apple-system, BlinkMacSystemFont, "Arial", sans-serif; | |
background-color: rgb(11,15,25); | |
color: white | |
} | |
h1 { | |
font-size: 16px; | |
margin-top: 0; | |
} | |
p { | |
font-size: 15px; | |
margin-bottom: 10px; | |
margin-top: 5px; | |
} | |
strong, em { | |
color: #16c8ce; | |
} | |
.card { | |
max-width: 640px; | |
margin: 0 auto; | |
padding: 16px; | |
border: 1px solid rgb(107, 114, 128); | |
border-radius: 16px; | |
background-color: rgb(16, 22, 35); | |
} | |
.card p:last-child { | |
margin-bottom: 0; | |
} | |
.card img { | |
width: 100%; | |
max-width: 420px; | |
margin: 0 auto; | |
} | |
#logo, #links { | |
display: flex; | |
flex-flow: row wrap; | |
justify-content: center; | |
} | |
#links { | |
gap: 1em; | |
margin: 1em; | |
} | |
#links img { | |
height: 20px; | |
} | |
#graphs { | |
display: flex; | |
flex-flow: column nowrap; | |
justify-content: center; | |
align-items: center; | |
gap: 1em; | |
} | |
label { | |
display: block; | |
} | |
#graphs div { | |
position: relative; | |
} | |
#graphs label { | |
position: absolute; | |
right: 0; | |
top: 0; | |
max-width: 120px; | |
text-transform: uppercase; | |
font-family: monospace; | |
text-align: right; | |
padding: 0 4px; | |
line-height: 20px; | |
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0)); | |
border: 1px solid rgba(255,255,255,0.1); | |
border-top: none; | |
border-right: none; | |
} | |
#graphs .legend { | |
display: flex; | |
flex-flow: row wrap; | |
justify-content: flex-end; | |
gap: 1px 5px; | |
text-transform: uppercase; | |
font-family: monospace; | |
font-size: 10px; | |
line-height: 11px; | |
} | |
canvas.graph { | |
border: 1px solid rgba(255,255,255,0.1); | |
border-bottom: none; | |
background-image: | |
repeating-linear-gradient(to top, rgba(255,255,255,0.05), rgba(255,255,255,0.05) 1px, transparent 1px, transparent 10px), | |
linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0)); | |
} | |
#recording { | |
margin-top: 1em; | |
position: relative; | |
display: block; | |
height: 100px; | |
line-height: 100px; | |
text-align: center; | |
font-size: 11px; | |
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0)); | |
border: 1px solid rgba(255,255,255,0.1); | |
border-bottom-left-radius: 10px; | |
border-bottom-right-radius: 10px; | |
} | |
#recording #audio { | |
display: flex; | |
flex-flow: row nowrap; | |
align-items: center; | |
justify-content: center; | |
height: 100%; | |
} | |
#recording label { | |
position: absolute; | |
right: 0; | |
top: 0; | |
max-width: 120px; | |
text-transform: uppercase; | |
font-family: monospace; | |
font-size: 12px; | |
text-align: right; | |
padding: 0 4px; | |
line-height: 20px; | |
background-image: linear-gradient(to top, rgba(255,255,255,0.1), rgba(255,255,255,0.0)); | |
border: 1px solid rgba(255,255,255,0.1); | |
border-top: none; | |
border-right: none; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="card"> | |
<section id="logo"> | |
<img src="logo.png" alt="Hey Buddy!" /> | |
</section> | |
<section id="headline"> | |
<p><strong><em>Hey Buddy!</em></strong> is a library for training wake word models (a.k.a audio keyword spotters) and deploying them to the browser for real-time use on CPU or GPU.</p> | |
<p>Using a wake-word as a gating mechanism for voice-enabled web applications carries numerous benefits, including reduced power consumption, improved privacy, and enhanced performance in noisy environments over speech-to-text systems.</p> | |
<p>This space serves as a demonstration of the JavaScript library for front-end applications. Say something like, <em>“Hey buddy, how are you?”</em> to see the wake word and voice activity detection in action. Your voice command will be isolated as an audio clip, which is then ready to be sent to your application's backend for further processing.</p> | |
</section> | |
<section id="links"> | |
<a href="https://github.com/painebenjamin/hey-buddy" target="_blank"> | |
<img src="https://img.shields.io/static/v1?label=painebenjamin&message=hey-buddy&logo=github&color=0b1830" alt="painebenjamin - hey-buddy" /> | |
</a> | |
<a href="https://huggingface.co/benjamin-paine/hey-buddy" target="_blank"> | |
<img src="https://img.shields.io/static/v1?label=benjamin-paine&message=hey-buddy&logo=huggingface&color=0b1830" alt="painebenjamin - hey-buddy" /> | |
</a> | |
</section> | |
<section id="graphs"></section> | |
<section id="recording"> | |
<label>Recording</label> | |
<div id="audio">No recording yet</div> | |
</section> | |
</div> | |
</body> | |
<script> | |
/** Configuration */ | |
const colors = { | |
"buddy": [0,119,187], | |
"hey buddy": [51,187,238], | |
"hi buddy": [0,153,136], | |
"sup buddy": [238,119,51], | |
"yo buddy": [204,51,17], | |
"okay buddy": [238,51,119], | |
"speech": [22,200,206], | |
"frame budget": [25,255,25] | |
}; | |
const wakeWords = ["buddy", "hey buddy", "hi buddy", "sup buddy", "yo buddy", "okay buddy"]; | |
const canvasSize = { width: 640, height: 100 }; | |
const graphLineWidth = 1; | |
const options = { | |
debug: true, | |
modelPath: wakeWords.map((word) => `/models/${word.replace(' ', '-')}.onnx`) | |
}; | |
/** Helper method for conversion */ | |
const float32ToWavBlob = (audioData, sampleRate, numChannels = 1) => { | |
// Helper to write a string to the DataView | |
const writeString = (view, offset, string) => { | |
for (let i = 0; i < string.length; i++) { | |
view.setUint8(offset + i, string.charCodeAt(i)); | |
} | |
}; | |
// Helper to convert Float32Array to Int16Array (16-bit PCM) | |
const floatTo16BitPCM = (output, offset, input) => { | |
for (let i = 0; i < input.length; i++, offset += 2) { | |
let s = Math.max(-1, Math.min(1, input[i])); // Clamping to [-1, 1] | |
output.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true); // Convert to 16-bit PCM | |
} | |
}; | |
const byteRate = sampleRate * numChannels * 2; // 16-bit PCM = 2 bytes per sample | |
// Calculate sizes | |
const blockAlign = numChannels * 2; // 2 bytes per sample for 16-bit audio | |
const wavHeaderSize = 44; | |
const dataLength = audioData.length * numChannels * 2; // 16-bit PCM data length | |
const buffer = new ArrayBuffer(wavHeaderSize + dataLength); | |
const view = new DataView(buffer); | |
// Write WAV file headers | |
writeString(view, 0, 'RIFF'); // ChunkID | |
view.setUint32(4, 36 + dataLength, true); // ChunkSize | |
writeString(view, 8, 'WAVE'); // Format | |
writeString(view, 12, 'fmt '); // Subchunk1ID | |
view.setUint32(16, 16, true); // Subchunk1Size (PCM = 16) | |
view.setUint16(20, 1, true); // AudioFormat (PCM = 1) | |
view.setUint16(22, numChannels, true); // NumChannels | |
view.setUint32(24, sampleRate, true); // SampleRate | |
view.setUint32(28, byteRate, true); // ByteRate | |
view.setUint16(32, blockAlign, true); // BlockAlign | |
view.setUint16(34, 16, true); // BitsPerSample (16-bit PCM) | |
writeString(view, 36, 'data'); // Subchunk2ID | |
view.setUint32(40, dataLength, true); // Subchunk2Size | |
// Convert the Float32Array audio samples to 16-bit PCM and write them to the DataView | |
floatTo16BitPCM(view, wavHeaderSize, audioData); | |
// Create and return the Blob | |
return new Blob([view], { type: 'audio/wav' }); | |
} | |
/** Helper method for turning the audio samples into an audio element */ | |
const saveRecording = (audioContainer, audioSamples, sampleRate = 16000) => { | |
const blob = float32ToWavBlob(audioSamples, sampleRate); | |
const url = URL.createObjectURL(blob); | |
audioContainer.innerHTML = `<audio controls src="${url}"></audio>`; | |
} | |
/** DOM elements */ | |
const graphsContainer = document.getElementById("graphs"); | |
const audioContainer = document.getElementById("audio"); | |
/** Memory for drawing */ | |
const graphs = {}; | |
const history = {}; | |
const current = {}; | |
const active = {}; | |
/** Instantiate */ | |
const heyBuddy = new HeyBuddy(options); | |
/** Add callbacks */ | |
// When processed, update state for next draw | |
heyBuddy.onProcessed((result) => { | |
current["frame budget"] = heyBuddy.frameTimeEma; | |
current["speech"] = result.speech.probability || 0.0; | |
active["speech"] = result.speech.active; | |
for (let wakeWord in result.wakeWords) { | |
current[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].probability || 0.0; | |
active[wakeWord.replace('-', ' ')] = result.wakeWords[wakeWord].active; | |
} | |
if (result.recording) { | |
audioContainer.innerHTML = "Recording…"; | |
} | |
}); | |
// When recording is complete, replace the audio element | |
heyBuddy.onRecording((audioSamples) => { | |
saveRecording(audioContainer, audioSamples); | |
}); | |
/** Add graphs */ | |
for (let graphName of ["wake words", "speech", "frame budget"]) { | |
// Create containers for the graph and its label | |
const graphContainer = document.createElement("div"); | |
const graphLabel = document.createElement("label"); | |
graphLabel.textContent = graphName; | |
// Create a canvas for the graph | |
const graphCanvas = document.createElement("canvas"); | |
graphCanvas.className = "graph"; | |
graphCanvas.width = canvasSize.width; | |
graphCanvas.height = canvasSize.height; | |
graphs[graphName] = graphCanvas; | |
// Add the canvas to the container and the container to the document | |
graphContainer.appendChild(graphCanvas); | |
graphContainer.appendChild(graphLabel); | |
graphsContainer.appendChild(graphContainer); | |
// If this is the wake-word graph, also add legend | |
if (graphName === "wake words") { | |
const graphLegend = document.createElement("div"); | |
graphLegend.className = "legend"; | |
for (let wakeWord of wakeWords) { | |
const legendItem = document.createElement("div"); | |
const [r,g,b] = colors[wakeWord]; | |
legendItem.style.color = `rgb(${r},${g},${b})`; | |
legendItem.textContent = wakeWord; | |
graphLegend.appendChild(legendItem); | |
} | |
graphLabel.appendChild(graphLegend); | |
} | |
} | |
/** Define draw loop */ | |
const draw = () => { | |
// Draw speech and model graphs | |
for (let graphName in graphs) { | |
const isWakeWords = graphName === "wake words"; | |
const isFrameBudget = graphName === "frame budget"; | |
const subGraphs = isWakeWords ? wakeWords : [graphName]; | |
let isFirst = true; | |
for (let name of subGraphs) { | |
// Update history | |
history[name] = history[name] || []; | |
if (isFrameBudget) { | |
history[name].push((current[name] || 0.0) / 120.0); // 120ms budget | |
} else { | |
history[name].push(current[name] || 0.0); | |
} | |
// Trim history | |
if (history[name].length > canvasSize.width) { | |
history[name] = history[name].slice(history[name].length - canvasSize.width); | |
} | |
// Draw graph | |
const canvas = graphs[graphName]; | |
const ctx = canvas.getContext("2d"); | |
const [r,g,b] = colors[name]; | |
const opacity = isFrameBudget || active[name] ? 1.0 : 0.5; | |
if (isFirst) { | |
// Clear canvas on first draw | |
ctx.clearRect(0, 0, canvas.width, canvas.height); | |
isFirst = false; | |
} | |
ctx.strokeStyle = `rgba(${r},${g},${b},${opacity})`; | |
ctx.fillStyle = `rgba(${r},${g},${b},${opacity/2})`; | |
ctx.lineWidth = graphLineWidth; | |
// Draw from left to right (the frame shifts right to left) | |
ctx.beginPath(); | |
let lastX; | |
for (let i = 0; i < history[name].length; i++) { | |
const x = i; | |
const y = canvas.height - history[name][i] * canvas.height; | |
if (i === 0) { | |
ctx.moveTo(1, y); | |
} else { | |
ctx.lineTo(x, y); | |
} | |
lastX = x; | |
} | |
// extend downwards to make a polygon | |
ctx.lineTo(lastX, canvas.height); | |
ctx.lineTo(0, canvas.height); | |
ctx.closePath(); | |
ctx.fill(); | |
ctx.stroke(); | |
} | |
} | |
// Request next frame | |
requestAnimationFrame(draw); | |
}; | |
/** Start the loop */ | |
requestAnimationFrame(draw); | |
</script> | |
</html> | |