diff --git a/nodejs-addon-examples/.gitignore b/nodejs-addon-examples/.gitignore new file mode 100644 index 000000000..c80438466 --- /dev/null +++ b/nodejs-addon-examples/.gitignore @@ -0,0 +1 @@ +crash.log diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 45975393a..81eea42eb 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -38,3 +38,12 @@ node ./test_asr_streaming_transducer.js node ./test_asr_streaming_transducer_microphone.js ``` + +# VAD + +```bash +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + +node ./test_vad_microphone.js +``` + diff --git a/nodejs-addon-examples/test_asr_streaming_transducer.js b/nodejs-addon-examples/test_asr_streaming_transducer.js index a92c9aca3..4ff5b966a 100644 --- a/nodejs-addon-examples/test_asr_streaming_transducer.js +++ b/nodejs-addon-examples/test_asr_streaming_transducer.js @@ -36,10 +36,10 @@ console.log('Started') let start = performance.now(); const stream = recognizer.createStream(); const wave = sherpa_onnx.readWave(waveFilename); -stream.acceptWaveform(wave.samples, wave.sampleRate); +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); const tailPadding = new Float32Array(wave.sampleRate * 0.4); -stream.acceptWaveform(tailPadding, wave.sampleRate); +stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate}); while (recognizer.isReady(stream)) { recognizer.decode(stream); diff --git a/nodejs-addon-examples/test_asr_streaming_transducer_microphone.js b/nodejs-addon-examples/test_asr_streaming_transducer_microphone.js index c85231910..f8c15f8f5 100644 --- a/nodejs-addon-examples/test_asr_streaming_transducer_microphone.js +++ b/nodejs-addon-examples/test_asr_streaming_transducer_microphone.js @@ -60,7 +60,8 @@ const display = new sherpa_onnx.Display(50); ai.on('data', data => { const samples = new Float32Array(data.buffer); - stream.acceptWaveform(samples, recognizer.config.featConfig.sampleRate); + stream.acceptWaveform( + {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples}); while (recognizer.isReady(stream)) { recognizer.decode(stream); diff --git a/nodejs-addon-examples/test_vad_microphone.js b/nodejs-addon-examples/test_vad_microphone.js new file mode 100644 index 000000000..63cffa6e3 --- /dev/null +++ b/nodejs-addon-examples/test_vad_microphone.js @@ -0,0 +1,88 @@ +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) + +const portAudio = require('naudiodon2'); +// console.log(portAudio.getDevices()); + +const sherpa_onnx = require('sherpa-onnx-node'); + +function createVad() { + // please download silero_vad.onnx from + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx + const config = { + sileroVad: { + model: './silero_vad.onnx', + threshold: 0.5, + minSpeechDuration: 0.25, + minSilenceDuration: 0.5, + windowSize: 512, + }, + sampleRate: 16000, + debug: true, + numThreads: 1, + }; + + const bufferSizeInSeconds = 60; + + return new sherpa_onnx.Vad(config, bufferSizeInSeconds); +} + +vad = createVad(); + +const bufferSizeInSeconds = 30; +const buffer = + new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate); + + +const ai = new portAudio.AudioIO({ + inOptions: { + channelCount: 1, + closeOnError: true, // Close the stream if an audio error is detected, if + // set false then just log the error + deviceId: -1, // Use -1 or omit the deviceId to select the default device + sampleFormat: portAudio.SampleFormatFloat32, + sampleRate: vad.config.sampleRate, + } +}); + +let printed = false; +let index = 0; +ai.on('data', data => { + const windowSize = vad.config.sileroVad.windowSize; + buffer.push(new Float32Array(data.buffer)); + while (buffer.size() > windowSize) { + const samples = buffer.get(buffer.head(), windowSize); + buffer.pop(windowSize); + vad.acceptWaveform(samples) + if (vad.isDetected() && !printed) { + console.log(`${index}: Detected speech`) + printed = true; + } + + if (!vad.isDetected()) { + printed = false; + } + + while (!vad.isEmpty()) { + const segment = vad.front(); + vad.pop(); + const filename = `${index}-${ + new Date() + .toLocaleTimeString('en-US', {hour12: false}) + .split(' ')[0]}.wav`; + sherpa_onnx.writeWave( + filename, + {samples: segment.samples, sampleRate: vad.config.sampleRate}) + const duration = segment.samples.length / vad.config.sampleRate; + console.log(`${index} End of speech. Duration: ${duration} seconds`); + console.log(`Saved to ${filename}`); + index += 1; + } + } +}); + +ai.on('close', () => { + console.log('Free resources'); +}); + +ai.start(); +console.log('Started! Please speak') diff --git a/python-api-examples/offline-tts-play.py b/python-api-examples/offline-tts-play.py index 4c5f9db62..9e526c368 100755 --- a/python-api-examples/offline-tts-play.py +++ b/python-api-examples/offline-tts-play.py @@ -47,7 +47,7 @@ --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \ --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \ --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \ - --tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \ + --tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \ --vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \ --sid=2 \ --output-filename=./test-2.wav \ diff --git a/python-api-examples/offline-tts.py b/python-api-examples/offline-tts.py index 59b69328f..18ea638e8 100755 --- a/python-api-examples/offline-tts.py +++ b/python-api-examples/offline-tts.py @@ -48,7 +48,7 @@ --vits-model=./sherpa-onnx-vits-zh-ll/model.onnx \ --vits-lexicon=./sherpa-onnx-vits-zh-ll/lexicon.txt \ --vits-tokens=./sherpa-onnx-vits-zh-ll/tokens.txt \ - --tts-rule-fsts='./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst' \ + --tts-rule-fsts=./sherpa-onnx-vits-zh-ll/phone.fst,./sherpa-onnx-vits-zh-ll/date.fst,./sherpa-onnx-vits-zh-ll/number.fst \ --vits-dict-dir=./sherpa-onnx-vits-zh-ll/dict \ --sid=2 \ --output-filename=./test-2.wav \ diff --git a/scripts/node-addon-api/CMakeLists.txt b/scripts/node-addon-api/CMakeLists.txt index 6c488beca..d8656081a 100644 --- a/scripts/node-addon-api/CMakeLists.txt +++ b/scripts/node-addon-api/CMakeLists.txt @@ -20,7 +20,9 @@ include_directories(${CMAKE_JS_INC}) set(srcs src/sherpa-onnx-node-addon-api.cc src/streaming-asr.cc + src/vad.cc src/wave-reader.cc + src/wave-writer.cc ) if(NOT DEFINED ENV{SHERPA_ONNX_INSTALL_DIR}) diff --git a/scripts/node-addon-api/lib/sherpa-onnx.js b/scripts/node-addon-api/lib/sherpa-onnx.js index b9b343f78..7619b26ff 100644 --- a/scripts/node-addon-api/lib/sherpa-onnx.js +++ b/scripts/node-addon-api/lib/sherpa-onnx.js @@ -1,8 +1,12 @@ const addon = require('./addon.js') const streaming_asr = require('./streaming-asr.js'); +const vad = require('./vad.js'); module.exports = { OnlineRecognizer: streaming_asr.OnlineRecognizer, readWave: addon.readWave, + writeWave: addon.writeWave, Display: streaming_asr.Display, + Vad: vad.Vad, + CircularBuffer: vad.CircularBuffer, } diff --git a/scripts/node-addon-api/lib/streaming-asr.js b/scripts/node-addon-api/lib/streaming-asr.js index 21c8632ea..ad598cffa 100644 --- a/scripts/node-addon-api/lib/streaming-asr.js +++ b/scripts/node-addon-api/lib/streaming-asr.js @@ -15,10 +15,11 @@ class OnlineStream { this.handle = handle; } + // obj is {samples: samples, sampleRate: sampleRate} // samples is a float32 array containing samples in the range [-1, 1] - acceptWaveform(samples, sampleRate) { - addon.acceptWaveformOnline( - this.handle, {samples: samples, sampleRate: sampleRate}) + // sampleRate is a number + acceptWaveform(obj) { + addon.acceptWaveformOnline(this.handle, obj) } inputFinished() { diff --git a/scripts/node-addon-api/lib/vad.js b/scripts/node-addon-api/lib/vad.js new file mode 100644 index 000000000..30ecc5273 --- /dev/null +++ b/scripts/node-addon-api/lib/vad.js @@ -0,0 +1,88 @@ +const addon = require('./addon.js'); + +class CircularBuffer { + constructor(capacity) { + this.handle = addon.createCircularBuffer(capacity); + } + + // samples is a float32 array + push(samples) { + addon.circularBufferPush(this.handle, samples); + } + + // return a float32 array + get(startIndex, n) { + return addon.circularBufferGet(this.handle, startIndex, n); + } + + pop(n) { + return addon.circularBufferPop(this.handle, n); + } + + size() { + return addon.circularBufferSize(this.handle); + } + + head() { + return addon.circularBufferHead(this.handle); + } + + reset() { + return addon.circularBufferReset(this.handle); + } +} + +class Vad { + /* +config = { + sileroVad: { + model: "./silero_vad.onnx", + threshold: 0.5, + } +} + */ + constructor(config, bufferSizeInSeconds) { + this.handle = + addon.createVoiceActivityDetector(config, bufferSizeInSeconds); + this.config = config; + } + + acceptWaveform(samples) { + addon.voiceActivityDetectorAcceptWaveform(this.handle, samples) + } + + isEmpty() { + return addon.voiceActivityDetectorIsEmpty(this.handle) + } + + isDetected() { + return addon.voiceActivityDetectorIsDetected(this.handle) + } + + pop() { + addon.voiceActivityDetectorPop(this.handle) + } + + clear() { + addon.VoiceActivityDetectorClearWrapper(this.handle) + } + + /* +{ + samples: a 1-d float32 array, + start: a int32 +} + */ + front() { + return addon.voiceActivityDetectorFront(this.handle) + } + + reset() { + return addon.VoiceActivityDetectorResetWrapper(this.handle) + } +} + +module.exports = { + Vad, + CircularBuffer, +} diff --git a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc index ab7d35d55..97910fcd6 100644 --- a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc +++ b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc @@ -5,10 +5,14 @@ void InitStreamingAsr(Napi::Env env, Napi::Object exports); void InitWaveReader(Napi::Env env, Napi::Object exports); +void InitWaveWriter(Napi::Env env, Napi::Object exports); +void InitVad(Napi::Env env, Napi::Object exports); Napi::Object Init(Napi::Env env, Napi::Object exports) { InitStreamingAsr(env, exports); InitWaveReader(env, exports); + InitWaveWriter(env, exports); + InitVad(env, exports); return exports; } diff --git a/scripts/node-addon-api/src/streaming-asr.cc b/scripts/node-addon-api/src/streaming-asr.cc index f1aee13b0..53a997c52 100644 --- a/scripts/node-addon-api/src/streaming-asr.cc +++ b/scripts/node-addon-api/src/streaming-asr.cc @@ -125,8 +125,13 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { config.provider = p; } - if (o.Has("debug") && o.Get("debug").IsNumber()) { - config.debug = o.Get("debug").As().Int32Value(); + if (o.Has("debug") && + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { + if (o.Get("debug").IsBoolean()) { + config.debug = o.Get("debug").As().Value(); + } else { + config.debug = o.Get("debug").As().Int32Value(); + } } if (o.Has("modelType") && o.Get("modelType").IsString()) { diff --git a/scripts/node-addon-api/src/vad.cc b/scripts/node-addon-api/src/vad.cc new file mode 100644 index 000000000..2c61c29fe --- /dev/null +++ b/scripts/node-addon-api/src/vad.cc @@ -0,0 +1,620 @@ +// scripts/node-addon-api/src/vad.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include + +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +static Napi::External CreateCircularBufferWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsNumber()) { + Napi::TypeError::New(env, "You should pass an integer as the argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxCircularBuffer *buf = + SherpaOnnxCreateCircularBuffer(info[0].As().Int32Value()); + + return Napi::External::New( + env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) { + SherpaOnnxDestroyCircularBuffer(p); + }); +} + +static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxCircularBuffer *buf = + info[0].As>().Data(); + + if (!info[1].IsTypedArray()) { + Napi::TypeError::New(env, "Argument 1 should be a Float32Array.") + .ThrowAsJavaScriptException(); + + return; + } + + Napi::Float32Array data = info[1].As(); + SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength()); +} + +// see https://github.com/nodejs/node-addon-api/blob/main/doc/typed_array.md +// https://github.com/nodejs/node-addon-examples/blob/main/src/2-js-to-native-conversion/typed_array_to_native/node-addon-api/typed_array_to_native.cc +static Napi::Float32Array CircularBufferGetWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 3) { + std::ostringstream os; + os << "Expect only 3 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxCircularBuffer *buf = + info[0].As>().Data(); + + if (!info[1].IsNumber()) { + Napi::TypeError::New(env, "Argument 1 should be an integer (startIndex).") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[2].IsNumber()) { + Napi::TypeError::New(env, "Argument 2 should be an integer (n).") + .ThrowAsJavaScriptException(); + + return {}; + } + + int32_t start_index = info[1].As().Int32Value(); + int32_t n = info[2].As().Int32Value(); + + const float *data = SherpaOnnxCircularBufferGet(buf, start_index, n); + + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast(data), sizeof(float) * n, + [](Napi::Env /*env*/, void *p) { + SherpaOnnxCircularBufferFree(reinterpret_cast(p)); + }); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, n, arrayBuffer, 0); + + return float32Array; +} + +static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxCircularBuffer *buf = + info[0].As>().Data(); + + if (!info[1].IsNumber()) { + Napi::TypeError::New(env, "Argument 1 should be an integer (n).") + .ThrowAsJavaScriptException(); + + return; + } + + int32_t n = info[1].As().Int32Value(); + + SherpaOnnxCircularBufferPop(buf, n); +} + +static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxCircularBuffer *buf = + info[0].As>().Data(); + + int32_t size = SherpaOnnxCircularBufferSize(buf); + + return Napi::Number::New(env, size); +} + +static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxCircularBuffer *buf = + info[0].As>().Data(); + + int32_t size = SherpaOnnxCircularBufferHead(buf); + + return Napi::Number::New(env, size); +} + +static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be an CircularBuffer pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxCircularBuffer *buf = + info[0].As>().Data(); + + SherpaOnnxCircularBufferReset(buf); +} + +static SherpaOnnxSileroVadModelConfig GetSileroVadConfig( + const Napi::Object &obj) { + SherpaOnnxSileroVadModelConfig c; + memset(&c, 0, sizeof(c)); + + if (!obj.Has("sileroVad") || !obj.Get("sileroVad").IsObject()) { + return c; + } + + Napi::Object o = obj.Get("sileroVad").As(); + + if (o.Has("model") && o.Get("model").IsString()) { + Napi::String model = o.Get("model").As(); + std::string s = model.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + c.model = p; + } + + if (o.Has("threshold") && o.Get("threshold").IsNumber()) { + c.threshold = o.Get("threshold").As().FloatValue(); + } + + if (o.Has("minSilenceDuration") && o.Get("minSilenceDuration").IsNumber()) { + c.min_silence_duration = + o.Get("minSilenceDuration").As().FloatValue(); + } + + if (o.Has("minSpeechDuration") && o.Get("minSpeechDuration").IsNumber()) { + c.min_speech_duration = + o.Get("minSpeechDuration").As().FloatValue(); + } + + if (o.Has("windowSize") && o.Get("windowSize").IsNumber()) { + c.window_size = o.Get("windowSize").As().Int32Value(); + } + + return c; +} + +static Napi::External +CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsObject()) { + Napi::TypeError::New(env, + "You should pass an object as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsNumber()) { + Napi::TypeError::New(env, + "You should pass an integer as the second argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object o = info[0].As(); + + SherpaOnnxVadModelConfig c; + memset(&c, 0, sizeof(c)); + c.silero_vad = GetSileroVadConfig(o); + + if (o.Has("sampleRate") && o.Get("sampleRate").IsNumber()) { + c.sample_rate = o.Get("sampleRate").As().Int32Value(); + } + + if (o.Has("numThreads") && o.Get("numThreads").IsNumber()) { + c.num_threads = o.Get("numThreads").As().Int32Value(); + } + + if (o.Has("provider") && o.Get("provider").IsString()) { + Napi::String provider = o.Get("provider").As(); + std::string s = provider.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + c.provider = p; + } + + if (o.Has("debug") && + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { + if (o.Get("debug").IsBoolean()) { + c.debug = o.Get("debug").As().Value(); + } else { + c.debug = o.Get("debug").As().Int32Value(); + } + } + + float buffer_size_in_seconds = info[1].As().FloatValue(); + + SherpaOnnxVoiceActivityDetector *vad = + SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds); + + if (c.silero_vad.model) { + delete[] c.silero_vad.model; + } + + if (c.provider) { + delete[] c.provider; + } + + return Napi::External::New( + env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) { + SherpaOnnxDestroyVoiceActivityDetector(p); + }); +} + +static void VoiceActivityDetectorAcceptWaveformWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + if (!info[1].IsTypedArray()) { + Napi::TypeError::New( + env, "Argument 1 should be a Float32Array containing samples") + .ThrowAsJavaScriptException(); + + return; + } + + Napi::Float32Array samples = info[1].As(); + + SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(), + samples.ElementLength()); +} + +static Napi::Boolean VoiceActivityDetectorEmptyWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad); + + return Napi::Boolean::New(env, is_empty); +} + +static Napi::Boolean VoiceActivityDetectorDetectedWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad); + + return Napi::Boolean::New(env, is_detected); +} + +static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + SherpaOnnxVoiceActivityDetectorPop(vad); +} + +static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + SherpaOnnxVoiceActivityDetectorClear(vad); +} + +static Napi::Object VoiceActivityDetectorFrontWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + const SherpaOnnxSpeechSegment *segment = + SherpaOnnxVoiceActivityDetectorFront(vad); + + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast(segment->samples), sizeof(float) * segment->n, + [](Napi::Env /*env*/, void * /*data*/, + const SherpaOnnxSpeechSegment *hint) { + SherpaOnnxDestroySpeechSegment(hint); + }, + segment); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, segment->n, arrayBuffer, 0); + + Napi::Object obj = Napi::Object::New(env); + obj.Set(Napi::String::New(env, "start"), segment->start); + obj.Set(Napi::String::New(env, "samples"), float32Array); + + return obj; +} + +static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a VAD pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxVoiceActivityDetector *vad = + info[0].As>().Data(); + + SherpaOnnxVoiceActivityDetectorReset(vad); +} + +void InitVad(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "createCircularBuffer"), + Napi::Function::New(env, CreateCircularBufferWrapper)); + + exports.Set(Napi::String::New(env, "circularBufferPush"), + Napi::Function::New(env, CircularBufferPushWrapper)); + + exports.Set(Napi::String::New(env, "circularBufferGet"), + Napi::Function::New(env, CircularBufferGetWrapper)); + + exports.Set(Napi::String::New(env, "circularBufferPop"), + Napi::Function::New(env, CircularBufferPopWrapper)); + + exports.Set(Napi::String::New(env, "circularBufferSize"), + Napi::Function::New(env, CircularBufferSizeWrapper)); + + exports.Set(Napi::String::New(env, "circularBufferHead"), + Napi::Function::New(env, CircularBufferHeadWrapper)); + + exports.Set(Napi::String::New(env, "circularBufferReset"), + Napi::Function::New(env, CircularBufferResetWrapper)); + + exports.Set(Napi::String::New(env, "createVoiceActivityDetector"), + Napi::Function::New(env, CreateVoiceActivityDetectorWrapper)); + + exports.Set( + Napi::String::New(env, "voiceActivityDetectorAcceptWaveform"), + Napi::Function::New(env, VoiceActivityDetectorAcceptWaveformWrapper)); + + exports.Set(Napi::String::New(env, "voiceActivityDetectorIsEmpty"), + Napi::Function::New(env, VoiceActivityDetectorEmptyWrapper)); + + exports.Set(Napi::String::New(env, "voiceActivityDetectorIsDetected"), + Napi::Function::New(env, VoiceActivityDetectorDetectedWrapper)); + + exports.Set(Napi::String::New(env, "voiceActivityDetectorPop"), + Napi::Function::New(env, VoiceActivityDetectorPopWrapper)); + + exports.Set(Napi::String::New(env, "voiceActivityDetectorClear"), + Napi::Function::New(env, VoiceActivityDetectorClearWrapper)); + + exports.Set(Napi::String::New(env, "voiceActivityDetectorFront"), + Napi::Function::New(env, VoiceActivityDetectorFrontWrapper)); + + exports.Set(Napi::String::New(env, "voiceActivityDetectorReset"), + Napi::Function::New(env, VoiceActivityDetectorResetWrapper)); +} diff --git a/scripts/node-addon-api/src/wave-writer.cc b/scripts/node-addon-api/src/wave-writer.cc new file mode 100644 index 000000000..28ee71a27 --- /dev/null +++ b/scripts/node-addon-api/src/wave-writer.cc @@ -0,0 +1,81 @@ +// scripts/node-addon-api/src/wave-writer.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include + +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +// (filename, {samples: samples, sampleRate: sampleRate} +static Napi::Boolean WriteWaveWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 argument2. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsString()) { + Napi::TypeError::New(env, "Argument 0 should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object obj = info[1].As(); + + if (!obj.Has("samples")) { + Napi::TypeError::New(env, "The argument object should have a field samples") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("samples").IsTypedArray()) { + Napi::TypeError::New(env, "The object['samples'] should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("sampleRate")) { + Napi::TypeError::New(env, + "The argument object should have a field sampleRate") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("sampleRate").IsNumber()) { + Napi::TypeError::New(env, "The object['samples'] should be a number") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Float32Array samples = obj.Get("samples").As(); + int32_t sample_rate = obj.Get("sampleRate").As().Int32Value(); + + int32_t ok = + SherpaOnnxWriteWave(samples.Data(), samples.ElementLength(), sample_rate, + info[0].As().Utf8Value().c_str()); + + return Napi::Boolean::New(env, ok); +} + +void InitWaveWriter(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "writeWave"), + Napi::Function::New(env, WriteWaveWrapper)); +}