web-assembly-vad-sherpa-onnx / sherpa-onnx-vad.js
csukuangfj's picture
update model
404fa6a
function freeConfig(config, Module) {
if ('buffer' in config) {
Module._free(config.buffer);
}
if ('sileroVad' in config) {
freeConfig(config.sileroVad, Module)
}
Module._free(config.ptr);
}
// The user should free the returned pointers
function initSherpaOnnxSileroVadModelConfig(config, Module) {
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
const n = modelLen;
const buffer = Module._malloc(n);
const len = 6 * 4;
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, modelLen);
offset = 0;
Module.setValue(ptr, buffer, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
offset += 4;
Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
offset += 4;
Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
offset += 4;
Module.setValue(ptr + offset, config.windowSize || 512, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.maxSpeechDuration || 20, 'float');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxVadModelConfig(config, Module) {
if (!('sileroVad' in config)) {
config.sileroVad = {
model: '',
threshold: 0.50,
minSilenceDuration: 0.50,
minSpeechDuration: 0.25,
windowSize: 512,
maxSpeechDuration: 20,
};
}
const sileroVad =
initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);
const len = sileroVad.len + 4 * 4;
const ptr = Module._malloc(len);
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
const buffer = Module._malloc(providerLen);
Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
let offset = 0;
Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset);
offset += sileroVad.len;
Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer, 'i8*'); // provider
offset += 4;
Module.setValue(ptr + offset, config.debug || 0, 'i32');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad,
}
}
function createVad(Module, myConfig) {
const sileroVad = {
model: './silero_vad.onnx',
threshold: 0.50,
minSilenceDuration: 0.50,
minSpeechDuration: 0.25,
maxSpeechDuration: 20,
windowSize: 512,
};
let config = {
sileroVad: sileroVad,
sampleRate: 16000,
numThreads: 1,
provider: 'cpu',
debug: 1,
bufferSizeInSeconds: 30,
};
if (myConfig) {
config = myConfig;
}
return new Vad(config, Module);
}
class CircularBuffer {
constructor(capacity, Module) {
this.handle = Module._SherpaOnnxCreateCircularBuffer(capacity);
this.Module = Module;
}
free() {
this.Module._SherpaOnnxDestroyCircularBuffer(this.handle);
this.handle = 0
}
/**
* @param samples {Float32Array}
*/
push(samples) {
const pointer =
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
this.Module._SherpaOnnxCircularBufferPush(
this.handle, pointer, samples.length);
this.Module._free(pointer);
}
get(startIndex, n) {
const p =
this.Module._SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
const samplesPtr = p / 4;
const samples = new Float32Array(n);
for (let i = 0; i < n; i++) {
samples[i] = this.Module.HEAPF32[samplesPtr + i];
}
this.Module._SherpaOnnxCircularBufferFree(p);
return samples;
}
pop(n) {
this.Module._SherpaOnnxCircularBufferPop(this.handle, n);
}
size() {
return this.Module._SherpaOnnxCircularBufferSize(this.handle);
}
head() {
return this.Module._SherpaOnnxCircularBufferHead(this.handle);
}
reset() {
this.Module._SherpaOnnxCircularBufferReset(this.handle);
}
}
class Vad {
constructor(configObj, Module) {
this.config = configObj;
const config = initSherpaOnnxVadModelConfig(configObj, Module);
const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
config.ptr, configObj.bufferSizeInSeconds || 30);
freeConfig(config, Module);
this.handle = handle;
this.Module = Module;
}
free() {
this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle);
this.handle = 0
}
// samples is a float32 array
acceptWaveform(samples) {
const pointer =
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform(
this.handle, pointer, samples.length);
this.Module._free(pointer);
}
isEmpty() {
return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) == 1;
}
isDetected() {
return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) ==
1;
}
pop() {
this.Module._SherpaOnnxVoiceActivityDetectorPop(this.handle);
}
clear() {
this.Module._SherpaOnnxVoiceActivityDetectorClear(this.handle);
}
/*
{
samples: a 1-d float32 array,
start: an int32
}
*/
front() {
const h = this.Module._SherpaOnnxVoiceActivityDetectorFront(this.handle);
const start = this.Module.HEAP32[h / 4];
const samplesPtr = this.Module.HEAP32[h / 4 + 1] / 4;
const numSamples = this.Module.HEAP32[h / 4 + 2];
const samples = new Float32Array(numSamples);
for (let i = 0; i < numSamples; i++) {
samples[i] = this.Module.HEAPF32[samplesPtr + i];
}
this.Module._SherpaOnnxDestroySpeechSegment(h);
return {samples: samples, start: start};
}
reset() {
this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle);
}
flush() {
this.Module._SherpaOnnxVoiceActivityDetectorFlush(this.handle);
}
};
if (typeof process == 'object' && typeof process.versions == 'object' &&
typeof process.versions.node == 'string') {
module.exports = {
createVad,
CircularBuffer,
};
}