From 7ae73e75ba771a0fa11dbbf2082dd09d038c49dd Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 26 Jan 2024 22:28:21 +0800 Subject: [PATCH] Run TTS engine service without starting the app. (#553) --- CMakeLists.txt | 2 +- .../app/src/main/AndroidManifest.xml | 3 +- .../sherpa/onnx/tts/engine/MainActivity.kt | 23 +++++- .../k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt | 53 +++++++------ .../sherpa/onnx/tts/engine/TtsService.kt | 13 +++- .../sherpa/onnx/tts/engine/TtsViewModel.kt | 74 +++++++++++++++++++ 6 files changed, 134 insertions(+), 34 deletions(-) create mode 100644 android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsViewModel.kt diff --git a/CMakeLists.txt b/CMakeLists.txt index a4446e8817..19a96ea640 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.9.8") +set(SHERPA_ONNX_VERSION "1.9.9") # Disable warning about # diff --git a/android/SherpaOnnxTtsEngine/app/src/main/AndroidManifest.xml b/android/SherpaOnnxTtsEngine/app/src/main/AndroidManifest.xml index 64d7faafe1..46a9b08a5c 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/AndroidManifest.xml +++ b/android/SherpaOnnxTtsEngine/app/src/main/AndroidManifest.xml @@ -1,6 +1,7 @@ + xmlns:tools="http://schemas.android.com/tools" + package="com.k2fsa.sherpa.onnx.tts.engine"> 0 && audio.save(filename) if (ok) { - val mediaPlayer = MediaPlayer.create( + stopMediaPlayer() + mediaPlayer = MediaPlayer.create( applicationContext, Uri.fromFile(File(filename)) ) - mediaPlayer.start() + mediaPlayer?.start() } else { Log.i(TAG, "Failed to generate or save audio") } @@ -162,4 +168,15 @@ class MainActivity : ComponentActivity() { } } } + + override fun onDestroy() { + stopMediaPlayer() + super.onDestroy() + } + + private fun stopMediaPlayer() { + mediaPlayer?.stop() + mediaPlayer?.release() + mediaPlayer = null + } } \ No newline at end of file diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt index 1ab09caa25..e4e4c9b79b 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt @@ -1,6 +1,6 @@ package com.k2fsa.sherpa.onnx.tts.engine -import android.app.Application +import android.content.Context import android.content.res.AssetManager import android.util.Log import androidx.compose.runtime.MutableState @@ -21,7 +21,6 @@ object TtsEngine { var lang: String? = null - val speedState: MutableState = mutableStateOf(1.0F) val speakerIdState: MutableState = mutableStateOf(0) @@ -44,19 +43,7 @@ object TtsEngine { private var dataDir: String? = null private var assets: AssetManager? = null - private var application: Application? = null - - fun createTts(application: Application) { - Log.i(TAG, "Init Next-gen Kaldi TTS") - if (tts == null) { - this.application = application - initTts() - } - } - - private fun initTts() { - assets = application?.assets - + init { // The purpose of such a design is to make the CI test easier // Please see // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py @@ -89,9 +76,21 @@ object TtsEngine { // ruleFsts = "vits-zh-aishell3/rule.fst" // lexicon = "lexicon.txt" // lang = "zho" + } + + + fun createTts(context: Context) { + Log.i(TAG, "Init Next-gen Kaldi TTS") + if (tts == null) { + initTts(context) + } + } + + private fun initTts(context: Context) { + assets = context.assets if (dataDir != null) { - val newDir = copyDataDir(modelDir!!) + val newDir = copyDataDir(context, modelDir!!) modelDir = newDir + "/" + modelDir dataDir = newDir + "/" + dataDir assets = null @@ -107,28 +106,28 @@ object TtsEngine { } - private fun copyDataDir(dataDir: String): String { + private fun copyDataDir(context: Context, dataDir: String): String { println("data dir is $dataDir") - copyAssets(dataDir) + copyAssets(context, dataDir) - val newDataDir = application!!.getExternalFilesDir(null)!!.absolutePath + val newDataDir = context.getExternalFilesDir(null)!!.absolutePath println("newDataDir: $newDataDir") return newDataDir } - private fun copyAssets(path: String) { + private fun copyAssets(context: Context, path: String) { val assets: Array? try { - assets = application!!.assets.list(path) + assets = context.assets.list(path) if (assets!!.isEmpty()) { - copyFile(path) + copyFile(context, path) } else { - val fullPath = "${application!!.getExternalFilesDir(null)}/$path" + val fullPath = "${context.getExternalFilesDir(null)}/$path" val dir = File(fullPath) dir.mkdirs() for (asset in assets.iterator()) { val p: String = if (path == "") "" else path + "/" - copyAssets(p + asset) + copyAssets(context, p + asset) } } } catch (ex: IOException) { @@ -136,10 +135,10 @@ object TtsEngine { } } - private fun copyFile(filename: String) { + private fun copyFile(context: Context, filename: String) { try { - val istream = application!!.assets.open(filename) - val newFilename = application!!.getExternalFilesDir(null).toString() + "/" + filename + val istream = context.assets.open(filename) + val newFilename = context.getExternalFilesDir(null).toString() + "/" + filename val ostream = FileOutputStream(newFilename) // Log.i(TAG, "Copying $filename to $newFilename") val buffer = ByteArray(1024) diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt index 29731f24e9..c89f29cc48 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt @@ -56,12 +56,18 @@ Failed to get default language from engine com.k2fsa.sherpa.chapter5 class TtsService : TextToSpeechService() { override fun onCreate() { + Log.i(TAG, "onCreate tts service") super.onCreate() // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68 onLoadLanguage(TtsEngine.lang, "", "") } + override fun onDestroy() { + Log.i(TAG, "onDestroy tts service") + super.onDestroy() + } + // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onislanguageavailable override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int { val lang = _lang ?: "" @@ -79,12 +85,15 @@ class TtsService : TextToSpeechService() { // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onLoadLanguage(kotlin.String,%20kotlin.String,%20kotlin.String) override fun onLoadLanguage(_lang: String?, _country: String?, _variant: String?): Int { + Log.i(TAG, "onLoadLanguage: $_lang, $_country") val lang = _lang ?: "" return if (lang == TtsEngine.lang) { + Log.i(TAG, "creating tts, lang :$lang") TtsEngine.createTts(application) TextToSpeech.LANG_AVAILABLE } else { + Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}") TextToSpeech.LANG_NOT_SUPPORTED } } @@ -118,7 +127,7 @@ class TtsService : TextToSpeechService() { return } - val ttsCallback = {floatSamples: FloatArray -> + val ttsCallback = { floatSamples: FloatArray -> // convert FloatArray to ByteArray val samples = floatArrayToByteArray(floatSamples) val maxBufferSize: Int = callback.maxBufferSize @@ -136,7 +145,7 @@ class TtsService : TextToSpeechService() { text = text, sid = TtsEngine.speakerId, speed = TtsEngine.speed, - callback=ttsCallback, + callback = ttsCallback, ) callback.done() diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsViewModel.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsViewModel.kt new file mode 100644 index 0000000000..2226c6b93e --- /dev/null +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsViewModel.kt @@ -0,0 +1,74 @@ +package com.k2fsa.sherpa.onnx.tts.engine + +import android.app.Application +import android.os.FileUtils.ProgressListener +import android.speech.tts.TextToSpeech +import android.speech.tts.TextToSpeech.OnInitListener +import android.speech.tts.UtteranceProgressListener +import android.util.Log +import androidx.lifecycle.ViewModel +import java.util.Locale + +class TtsApp : Application() { + companion object { + lateinit var instance: TtsApp + } + + override fun onCreate() { + super.onCreate() + instance = this + } + +} + +class TtsViewModel : ViewModel() { + + // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeech.OnInitListener + private val onInitListener = object : OnInitListener { + override fun onInit(status: Int) { + when (status) { + TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeded") + TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed") + else -> Log.i(TAG, "Unknown status $status") + } + } + } + + // https://developer.android.com/reference/kotlin/android/speech/tts/UtteranceProgressListener + private val utteranceProgressListener = object : UtteranceProgressListener() { + override fun onStart(utteranceId: String?) { + Log.i(TAG, "onStart: $utteranceId") + } + + override fun onStop(utteranceId: String?, interrupted: Boolean) { + Log.i(TAG, "onStop: $utteranceId, $interrupted") + super.onStop(utteranceId, interrupted) + } + + override fun onError(utteranceId: String?, errorCode: Int) { + Log.i(TAG, "onError: $utteranceId, $errorCode") + super.onError(utteranceId, errorCode) + } + + override fun onDone(utteranceId: String?) { + Log.i(TAG, "onDone: $utteranceId") + } + + @Deprecated("Deprecated in Java") + override fun onError(utteranceId: String?) { + Log.i(TAG, "onError: $utteranceId") + } + } + + val tts = TextToSpeech(TtsApp.instance, onInitListener, "com.k2fsa.sherpa.onnx.tts.engine") + + init { + tts.setLanguage(Locale(TtsEngine.lang!!)) + tts.setOnUtteranceProgressListener(utteranceProgressListener) + } + + override fun onCleared() { + super.onCleared() + tts.shutdown() + } +} \ No newline at end of file