Skip to content

Commit

Permalink
Add JNI binding for speaker embedding extractor.
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Jan 22, 2024
1 parent c8ab603 commit ed89ada
Show file tree
Hide file tree
Showing 8 changed files with 321 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class MainActivity : ComponentActivity() {
}

ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)

}

@Deprecated("Deprecated in Java")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package com.k2fsa.sherpa.onnx

import android.content.res.AssetManager


data class SpeakerEmbeddingExtractorConfig(
val model: String,
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
)

class SpeakerEmbeddingExtractorStream(var ptr: Long) {
fun acceptWaveform(samples: FloatArray, sampleRate: Int) = acceptWaveform(ptr, samples, sampleRate)

fun inputFinished() = inputFinished(ptr)

protected fun finalize() {
delete(ptr)
ptr = 0
}

private external fun myTest(ptr: Long, v: Array<FloatArray>)

fun release() = finalize()
private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)

private external fun inputFinished(ptr: Long)

private external fun delete(ptr: Long)
companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}

class SpeakerEmbeddingExtractor(
assetManager: AssetManager? = null,
config: SpeakerEmbeddingExtractorConfig,
) {
private var ptr: Long

init {
ptr = if (assetManager != null) {
new(assetManager, config)
} else {
newFromFile(config)
}
}

protected fun finalize() {
delete(ptr)
ptr = 0
}

fun release() = finalize()

fun createStream(): SpeakerEmbeddingExtractorStream {
val p = createStream(ptr)
return SpeakerEmbeddingExtractorStream(p)
}

fun isReady(stream: SpeakerEmbeddingExtractorStream) = isReady(ptr, stream.ptr)
fun compute(stream: SpeakerEmbeddingExtractorStream) = compute(ptr, stream.ptr)

private external fun new(
assetManager: AssetManager,
config: SpeakerEmbeddingExtractorConfig,
): Long

private external fun newFromFile(
config: SpeakerEmbeddingExtractorConfig,
): Long

private external fun delete(ptr: Long)

private external fun createStream(ptr: Long): Long

private external fun isReady(ptr: Long, streamPtr: Long): Boolean


private external fun compute(ptr: Long, streamPtr: Long): FloatArray

companion object {
init {
System.loadLibrary("sherpa-onnx-jni")
}
}
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.k2fsa.sherpa.onnx.speaker.identification.screens

import android.Manifest
import android.annotation.SuppressLint
import android.app.Activity
import android.content.pm.PackageManager
import android.media.AudioAttributes
Expand All @@ -20,6 +21,7 @@ import androidx.compose.foundation.layout.fillMaxWidth
import androidx.compose.foundation.layout.padding
import androidx.compose.foundation.layout.width
import androidx.compose.material3.Button
import androidx.compose.material3.MaterialTheme
import androidx.compose.material3.OutlinedTextField
import androidx.compose.material3.Text
import androidx.compose.runtime.Composable
Expand All @@ -31,6 +33,7 @@ import androidx.compose.ui.Alignment
import androidx.compose.ui.Modifier
import androidx.compose.ui.platform.LocalContext
import androidx.compose.ui.res.stringResource
import androidx.compose.ui.text.font.FontWeight
import androidx.compose.ui.tooling.preview.Preview
import androidx.compose.ui.unit.dp
import androidx.core.app.ActivityCompat
Expand All @@ -42,11 +45,26 @@ private var audioRecord: AudioRecord? = null

private var sampleList: MutableList<FloatArray>? = null

private var allSampleList: MutableList<MutableList<FloatArray>>? = null

private var number = 0

@SuppressLint("UnrememberedMutableState")
@Preview
@Composable
fun RegisterScreen(modifier: Modifier = Modifier) {
val activity = LocalContext.current as Activity

var firstTime by remember { mutableStateOf(true) }
if (firstTime) {
firstTime = false
// clear states

number = 0
}

var numberAudio by mutableStateOf(number)

Box(
modifier = Modifier.fillMaxSize(),
contentAlignment = Alignment.TopCenter
Expand Down Expand Up @@ -108,6 +126,8 @@ fun RegisterScreen(modifier: Modifier = Modifier) {
}

Log.i(TAG, "Recording is stopped. ${sampleList?.count()}")

++number
}
}
} else {
Expand Down Expand Up @@ -155,6 +175,12 @@ fun RegisterScreen(modifier: Modifier = Modifier) {

Column(horizontalAlignment = Alignment.CenterHorizontally) {
SpeakerNameRow(speakerName = speakerName, onValueChange = onSpeakerNameChange)
Text(
"Number of recordings: ${numberAudio}",
modifier = modifier.padding(24.dp),
style = MaterialTheme.typography.headlineMedium,
fontWeight = FontWeight.Bold,
)
RegisterSpeakerButtonRow(
modifier,
isStarted = isStarted,
Expand All @@ -177,7 +203,9 @@ fun SpeakerNameRow(
Text("Please input the speaker name")
},
singleLine = true,
modifier = modifier.fillMaxWidth().padding(8.dp)
modifier = modifier
.fillMaxWidth()
.padding(8.dp)
)
}

Expand Down
32 changes: 32 additions & 0 deletions kotlin-api-examples/Main.kt
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,43 @@ fun callback(samples: FloatArray): Unit {
}

fun main() {
testSpeakerRecognition()
testTts()
testAsr("transducer")
testAsr("zipformer2-ctc")
}

fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
var objArray = WaveReader.readWaveFromFile(
filename = filename,
)
var samples: FloatArray = objArray[0] as FloatArray
var sampleRate: Int = objArray[1] as Int

val stream = extractor.createStream()
stream.acceptWaveform(sampleRate = sampleRate, samples=samples)
stream.inputFinished()
check(extractor.isReady(stream))

val embedding = extractor.compute(stream)

stream.release()

return embedding
}

fun testSpeakerRecognition() {
val config = SpeakerEmbeddingExtractorConfig(
model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
)
val extractor = SpeakerEmbeddingExtractor(config = config)

val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")
println(embedding1a.count())
}

fun testTts() {
// see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
Expand Down
1 change: 1 addition & 0 deletions kotlin-api-examples/Speaker.kt
18 changes: 17 additions & 1 deletion kotlin-api-examples/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,22 @@ export LD_LIBRARY_PATH=$PWD/build/lib:$LD_LIBRARY_PATH

cd ../kotlin-api-examples

if [ ! -f ./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx ]; then
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx
fi

if [ ! -f ./speaker1_a_cn_16k.wav ]; then
wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_a_cn_16k.wav
fi

if [ ! -f ./speaker1_b_cn_16k.wav ]; then
wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker1_b_cn_16k.wav
fi

if [ ! -f ./speaker2_a_cn_16k.wav ]; then
wget -q https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav
fi

if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then
git lfs install
git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21
Expand All @@ -46,7 +62,7 @@ if [ ! -f ./vits-piper-en_US-amy-low/en_US-amy-low.onnx ]; then
rm vits-piper-en_US-amy-low.tar.bz2
fi

kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt
kotlinc-jvm -include-runtime -d main.jar Main.kt WaveReader.kt SherpaOnnx.kt faked-asset-manager.kt Tts.kt Speaker.kt

ls -lh main.jar

Expand Down
Loading

0 comments on commit ed89ada

Please sign in to comment.