Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add C API for Kokoro TTS 1.0 #1801

Merged
merged 1 commit into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/c-api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,27 @@ jobs:
rm ./kws-c-api
rm -rf sherpa-onnx-kws-*

- name: Test Kokoro TTS (zh+en)
shell: bash
run: |
gcc -o kokoro-tts-zh-en-c-api ./c-api-examples/kokoro-tts-zh-en-c-api.c \
-I ./build/install/include \
-L ./build/install/lib/ \
-l sherpa-onnx-c-api \
-l onnxruntime

curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH

./kokoro-tts-zh-en-c-api

rm ./kokoro-tts-zh-en-c-api
rm -rf kokoro-zh-en-*

- name: Test Kokoro TTS (en)
shell: bash
run: |
Expand Down
3 changes: 3 additions & 0 deletions c-api-examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ if(SHERPA_ONNX_ENABLE_TTS)

add_executable(kokoro-tts-en-c-api kokoro-tts-en-c-api.c)
target_link_libraries(kokoro-tts-en-c-api sherpa-onnx-c-api)

add_executable(kokoro-tts-zh-en-c-api kokoro-tts-zh-en-c-api.c)
target_link_libraries(kokoro-tts-zh-en-c-api sherpa-onnx-c-api)
endif()

if(SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION)
Expand Down
82 changes: 82 additions & 0 deletions c-api-examples/kokoro-tts-zh-en-c-api.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// c-api-examples/kokoro-tts-zh-en-c-api.c
//
// Copyright (c) 2025 Xiaomi Corporation

// This file shows how to use sherpa-onnx C API
// for English + Chinese TTS with Kokoro.
//
// clang-format off
/*
Usage


wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
tar xf kokoro-multi-lang-v1_0.tar.bz2
rm kokoro-multi-lang-v1_0.tar.bz2

./kokoro-tts-zh-en-c-api

*/
// clang-format on

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "sherpa-onnx/c-api/c-api.h"

static int32_t ProgressCallback(const float *samples, int32_t num_samples,
float progress) {
fprintf(stderr, "Progress: %.3f%%\n", progress * 100);
// return 1 to continue generating
// return 0 to stop generating
return 1;
}

int32_t main(int32_t argc, char *argv[]) {
SherpaOnnxOfflineTtsConfig config;
memset(&config, 0, sizeof(config));
config.model.kokoro.model = "./kokoro-multi-lang-v1_0/model.onnx";
config.model.kokoro.voices = "./kokoro-multi-lang-v1_0/voices.bin";
config.model.kokoro.tokens = "./kokoro-multi-lang-v1_0/tokens.txt";
config.model.kokoro.data_dir = "./kokoro-multi-lang-v1_0/espeak-ng-data";
config.model.kokoro.dict_dir = "./kokoro-multi-lang-v1_0/dict";
config.model.kokoro.lexicon =
"./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/"
"lexicon-zh.txt";

config.model.num_threads = 2;

// If you don't want to see debug messages, please set it to 0
config.model.debug = 1;

const char *filename = "./generated-kokoro-zh-en.wav";
const char *text =
"中英文语音合成测试。This is generated by next generation Kaldi using "
"Kokoro without Misaki. 你觉得中英文说的如何呢?";

const SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
int32_t sid = 0; // there are 53 speakers
float speed = 1.0; // larger -> faster in speech speed

#if 0
// If you don't want to use a callback, then please enable this branch
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid, speed);
#else
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerateWithProgressCallback(tts, text, sid, speed,
ProgressCallback);
#endif

SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);

SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
SherpaOnnxDestroyOfflineTts(tts);

fprintf(stderr, "Input text is: %s\n", text);
fprintf(stderr, "Speaker ID is is: %d\n", sid);
fprintf(stderr, "Saved to: %s\n", filename);

return 0;
}
4 changes: 4 additions & 0 deletions sherpa-onnx/c-api/c-api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,10 @@ static sherpa_onnx::OfflineTtsConfig GetOfflineTtsConfig(
SHERPA_ONNX_OR(config->model.kokoro.data_dir, "");
tts_config.model.kokoro.length_scale =
SHERPA_ONNX_OR(config->model.kokoro.length_scale, 1.0);
tts_config.model.kokoro.dict_dir =
SHERPA_ONNX_OR(config->model.kokoro.dict_dir, "");
tts_config.model.kokoro.lexicon =
SHERPA_ONNX_OR(config->model.kokoro.lexicon, "");

tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
tts_config.model.debug = config->model.debug;
Expand Down
2 changes: 2 additions & 0 deletions sherpa-onnx/c-api/c-api.h
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsKokoroModelConfig {
const char *data_dir;

float length_scale; // < 1, faster in speech speed; > 1, slower in speed
const char *dict_dir;
const char *lexicon;
} SherpaOnnxOfflineTtsKokoroModelConfig;

SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
Expand Down
16 changes: 16 additions & 0 deletions sherpa-onnx/csrc/offline-tts-kokoro-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_
#define SHERPA_ONNX_CSRC_OFFLINE_TTS_KOKORO_IMPL_H_

#include <iomanip>
#include <ios>
#include <memory>
#include <string>
#include <strstream>
Expand Down Expand Up @@ -188,6 +190,20 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl {
SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
#else
SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
#endif
std::ostringstream os;
os << "In bytes (hex):\n";
const auto p = reinterpret_cast<const uint8_t *>(text.c_str());
for (int32_t i = 0; i != text.size(); ++i) {
os << std::setw(2) << std::setfill('0') << std::hex
<< static_cast<uint32_t>(p[i]) << " ";
}
os << "\n";

#if __OHOS__
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
#else
SHERPA_ONNX_LOGE("%s", os.str().c_str());
#endif
}

Expand Down
Loading