Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Android Demo for MatchaTTS models. #1683

Merged
merged 2 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/apk-tts-engine.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ jobs:
total: ["40"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]


steps:
- uses: actions/checkout@v4
with:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,8 @@ class MainActivity : AppCompatActivity() {
private fun initTts() {
var modelDir: String?
var modelName: String?
var acousticModelName: String?
var vocoder: String?
var ruleFsts: String?
var ruleFars: String?
var lexicon: String?
Expand All @@ -193,8 +195,18 @@ class MainActivity : AppCompatActivity() {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null

// VITS -- begin
modelName = null
// VITS -- end

// Matcha -- begin
acousticModelName = null
vocoder = null
// Matcha -- end


modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
Expand All @@ -217,7 +229,6 @@ class MainActivity : AppCompatActivity() {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"

Expand All @@ -233,24 +244,47 @@ class MainActivity : AppCompatActivity() {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"

// Example 6
// vits-melo-tts-zh_en
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
// modelDir = "vits-melo-tts-zh_en"
// modelName = "model.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-melo-tts-zh_en/dict"

// Example 7
// matcha-icefall-zh-baker
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = "matcha-icefall-zh-baker"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// lexicon = "lexicon.txt"
// dictDir = "matcha-icefall-zh-baker/dict"

// Example 8
// matcha-icefall-en_US-ljspeech
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = "matcha-icefall-en_US-ljspeech"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"

if (dataDir != null) {
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
val newDir = copyDataDir(dataDir!!)
dataDir = "$newDir/$dataDir"
}

if (dictDir != null) {
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
val newDir = copyDataDir(dictDir!!)
dictDir = "$newDir/$dictDir"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!,
modelName = modelName!!,
modelName = modelName ?: "",
acousticModelName = acousticModelName ?: "",
vocoder = vocoder ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,16 @@ class MainActivity : ComponentActivity() {
color = MaterialTheme.colorScheme.background
) {
Scaffold(topBar = {
TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
}) {
Box(modifier = Modifier.padding(it)) {
Column(modifier = Modifier.padding(16.dp)) {
Column {
Text("Speed " + String.format("%.1f", TtsEngine.speed))
Slider(
value = TtsEngine.speedState.value,
onValueChange = {
TtsEngine.speed = it
onValueChange = {
TtsEngine.speed = it
preferenceHelper.setSpeed(it)
},
valueRange = 0.2F..3.0F,
Expand Down Expand Up @@ -138,7 +138,9 @@ class MainActivity : ComponentActivity() {
val filename =
application.filesDir.absolutePath + "/generated.wav"
val ok =
audio.samples.isNotEmpty() && audio.save(filename)
audio.samples.isNotEmpty() && audio.save(
filename
)

if (ok) {
stopMediaPlayer()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.k2fsa.sherpa.onnx.tts.engine

import PreferenceHelper
import android.content.Context
import android.content.res.AssetManager
import android.util.Log
Expand All @@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
import PreferenceHelper

object TtsEngine {
var tts: OfflineTts? = null
Expand Down Expand Up @@ -41,6 +41,8 @@ object TtsEngine {

private var modelDir: String? = null
private var modelName: String? = null
private var acousticModelName: String? = null
private var vocoder: String? = null
private var ruleFsts: String? = null
private var ruleFars: String? = null
private var lexicon: String? = null
Expand All @@ -52,8 +54,17 @@ object TtsEngine {
// The purpose of such a design is to make the CI test easier
// Please see
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
modelDir = null
//
// For VITS -- begin
modelName = null
// For VITS -- end

// For Matcha -- begin
acousticModelName = null
vocoder = null
// For Matcha -- end

modelDir = null
ruleFsts = null
ruleFars = null
lexicon = null
Expand Down Expand Up @@ -82,7 +93,6 @@ object TtsEngine {
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
// modelDir = "vits-icefall-zh-aishell3"
// modelName = "model.onnx"
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
// lexicon = "lexicon.txt"
// lang = "zho"
Expand All @@ -101,8 +111,35 @@ object TtsEngine {
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// lang = "deu"
}

// Example 6
// vits-melo-tts-zh_en
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
// modelDir = "vits-melo-tts-zh_en"
// modelName = "model.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-melo-tts-zh_en/dict"
// lang = "zho"

// Example 7
// matcha-icefall-zh-baker
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = "matcha-icefall-zh-baker"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// lexicon = "lexicon.txt"
// dictDir = "matcha-icefall-zh-baker/dict"
// lang = "zho"

// Example 8
// matcha-icefall-en_US-ljspeech
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = "matcha-icefall-en_US-ljspeech"
// acousticModelName = "model-steps-3.onnx"
// vocoder = "hifigan_v2.onnx"
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
// lang = "eng"
}

fun createTts(context: Context) {
Log.i(TAG, "Init Next-gen Kaldi TTS")
Expand All @@ -115,22 +152,22 @@ object TtsEngine {
assets = context.assets

if (dataDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = "$newDir/$modelDir"
val newDir = copyDataDir(context, dataDir!!)
dataDir = "$newDir/$dataDir"
assets = null
}

if (dictDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = "$newDir/$modelDir"
dictDir = "$modelDir/dict"
val newDir = copyDataDir(context, dictDir!!)
dictDir = "$newDir/$dictDir"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}

val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
modelDir = modelDir!!,
modelName = modelName ?: "",
acousticModelName = acousticModelName ?: "",
vocoder = vocoder ?: "",
lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
Expand Down
20 changes: 19 additions & 1 deletion scripts/apk/build-apk-tts-engine.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,22 +37,39 @@ mkdir -p apks
pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
lang={{ tts_model.lang }}
lang_iso_639_3={{ tts_model.lang_iso_639_3 }}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

{% if tts_model.vocoder %}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
{% endif %}

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt

{% if tts_model.model_name %}
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.model_name %}
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.vocoder %}
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./TtsEngine.kt
{% endif %}

{% if tts_model.rule_fsts %}
rule_fsts={{ tts_model.rule_fsts }}
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
Expand Down Expand Up @@ -109,6 +126,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done

rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir
rm -fv ./android/SherpaOnnxTtsEngine/app/src/main/assets/*.onnx
{% endfor %}

git checkout .
Expand Down
23 changes: 22 additions & 1 deletion scripts/apk/build-apk-tts.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,38 @@ mkdir -p apks
pushd ./android/SherpaOnnxTts/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
lang={{ tts_model.lang }}

wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2

{% if tts_model.vocoder %}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
{% endif %}

popd
# Now we are at the project root directory

git checkout .
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt


{% if tts_model.model_name %}
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.acoustic_model_name %}
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./MainActivity.kt
{% endif %}

{% if tts_model.vocoder %}
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./MainActivity.kt
{% endif %}


{% if tts_model.rule_fsts %}
rule_fsts={{ tts_model.rule_fsts }}
Expand Down Expand Up @@ -107,6 +126,8 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done

rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
rm -fv ./android/SherpaOnnxTts/app/src/main/assets/*.onnx

{% endfor %}

git checkout .
Expand Down
Loading
Loading