From e1a88a799f0407605a38013d8cd774d7790da72c Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 7 Feb 2025 15:18:02 +0800 Subject: [PATCH] Add Go API for Kokoro TTS 1.0 (#1804) --- .github/workflows/test-go-package.yaml | 5 ++++ .github/workflows/test-go.yaml | 5 ++++ go-api-examples/non-streaming-tts/main.go | 2 ++ .../non-streaming-tts/run-kokoro-zh-en.sh | 23 +++++++++++++++++++ .../non-streaming-tts/run-kokoro-zh-en.sh | 1 + scripts/go/sherpa_onnx.go | 8 +++++++ 6 files changed, 44 insertions(+) create mode 100755 go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh create mode 120000 scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh diff --git a/.github/workflows/test-go-package.yaml b/.github/workflows/test-go-package.yaml index f13d7eb74..f2e4cb1bc 100644 --- a/.github/workflows/test-go-package.yaml +++ b/.github/workflows/test-go-package.yaml @@ -209,6 +209,11 @@ jobs: go build ls -lh + echo "Test kokoro zh+en" + ./run-kokoro-zh-en.sh + rm -rf kokoro-multi-* + ls -lh + echo "Test kokoro en" ./run-kokoro-en.sh rm -rf kokoro-en-* diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index c3df3d75e..8d68076d7 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -224,6 +224,11 @@ jobs: go build ls -lh + echo "Test kokoro zh+en" + ./run-kokoro-zh-en.sh + rm -rf kokoro-multi-* + ls -lh + echo "Test kokoro en" ./run-kokoro-en.sh rm -rf kokoro-en-* diff --git a/go-api-examples/non-streaming-tts/main.go b/go-api-examples/non-streaming-tts/main.go index f3df7f105..8a5d03a30 100644 --- a/go-api-examples/non-streaming-tts/main.go +++ b/go-api-examples/non-streaming-tts/main.go @@ -37,6 +37,8 @@ func main() { flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro") flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro") flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro") + flag.StringVar(&config.Model.Kokoro.DictDir, "kokoro-dict-dir", "", "Path to dict for Kokoro") + flag.StringVar(&config.Model.Kokoro.Lexicon, "kokoro-lexicon", "", "Path to lexicon files for Kokoro") flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") diff --git a/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh b/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh new file mode 100755 index 000000000..4ed74f90d --- /dev/null +++ b/go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 +fi + +go mod tidy +go build + +./non-streaming-tts \ + --kokoro-model=./kokoro-multi-lang-v1_0/model.onnx \ + --kokoro-voices=./kokoro-multi-lang-v1_0/voices.bin \ + --kokoro-tokens=./kokoro-multi-lang-v1_0/tokens.txt \ + --kokoro-data-dir=./kokoro-multi-lang-v1_0/espeak-ng-data \ + --kokoro-dict-dir=./kokoro-multi-lang-v1_0/dict \ + --kokoro-lexicon=./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt \ + --debug=1 \ + --output-filename=./test-kokoro-zh-en.wav \ + "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?" diff --git a/scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh b/scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh new file mode 120000 index 000000000..9710b21ac --- /dev/null +++ b/scripts/go/_internal/non-streaming-tts/run-kokoro-zh-en.sh @@ -0,0 +1 @@ +../../../../go-api-examples/non-streaming-tts/run-kokoro-zh-en.sh \ No newline at end of file diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index 37f596627..d374f519a 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -687,6 +687,8 @@ type OfflineTtsKokoroModelConfig struct { Voices string // Path to the voices.bin for kokoro Tokens string // Path to tokens.txt DataDir string // Path to espeak-ng-data directory + DictDir string // Path to dict directory + Lexicon string // Path to lexicon files LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed } @@ -798,6 +800,12 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir) defer C.free(unsafe.Pointer(c.model.kokoro.data_dir)) + c.model.kokoro.dict_dir = C.CString(config.Model.Kokoro.DictDir) + defer C.free(unsafe.Pointer(c.model.kokoro.dict_dir)) + + c.model.kokoro.lexicon = C.CString(config.Model.Kokoro.Lexicon) + defer C.free(unsafe.Pointer(c.model.kokoro.lexicon)) + c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale) c.model.num_threads = C.int(config.Model.NumThreads)