Skip to content

Commit

Permalink
Support specifying voice in espeak-ng for kokoro tts models.
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj committed Feb 10, 2025
1 parent d5da943 commit ceefa1f
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 5 deletions.
9 changes: 5 additions & 4 deletions sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class KokoroMultiLangLexicon::Impl {
// https://en.cppreference.com/w/cpp/regex
// https://stackoverflow.com/questions/37989081/how-to-use-unicode-range-in-c-regex
std::string expr =
"([;:,.?!'\"\\(\\)“”])|([\\u4e00-\\u9fff]+)|([\\u0000-\\u007f]+)";
"([;:,.?!'\"\\(\\)“”])|([\\u4e00-\\u9fff]+)|([äöüßÄÖÜ\\u0000-\\u007f]+"
")";

auto ws = ToWideString(text);
std::wstring wexpr = ToWideString(expr);
Expand All @@ -127,7 +128,7 @@ class KokoroMultiLangLexicon::Impl {
if (debug_) {
SHERPA_ONNX_LOGE("Non-Chinese: %s", ms.c_str());
}
ids_vec = ConvertEnglishToTokenIDs(ms);
ids_vec = ConvertEnglishToTokenIDs(ms, meta_data_.voice);
} else {
if (debug_) {
SHERPA_ONNX_LOGE("Chinese: %s", ms.c_str());
Expand Down Expand Up @@ -257,7 +258,7 @@ class KokoroMultiLangLexicon::Impl {
}

std::vector<std::vector<int32_t>> ConvertEnglishToTokenIDs(
const std::string &text) const {
const std::string &text, const std::string &voice) const {
std::vector<std::string> words = SplitUtf8(text);
if (debug_) {
std::ostringstream os;
Expand Down Expand Up @@ -315,7 +316,7 @@ class KokoroMultiLangLexicon::Impl {

piper::eSpeakPhonemeConfig config;

config.voice = "en-us";
config.voice = voice;

std::vector<std::vector<piper::Phoneme>> phonemes;

Expand Down
2 changes: 1 addition & 1 deletion sherpa-onnx/csrc/offline-tts-kokoro-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl {
}

std::vector<TokenIDs> token_ids =
frontend_->ConvertTextToTokenIds(text, "en-us");
frontend_->ConvertTextToTokenIds(text, meta_data.voice);

if (token_ids.empty() ||
(token_ids.size() == 1 && token_ids[0].tokens.empty())) {
Expand Down
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ struct OfflineTtsKokoroModelMetaData {
int32_t version = 1;
int32_t has_espeak = 1;
int32_t max_token_len = 0;

std::string voice;
};

} // namespace sherpa_onnx
Expand Down
2 changes: 2 additions & 0 deletions sherpa-onnx/csrc/offline-tts-kokoro-model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ class OfflineTtsKokoroModel::Impl {
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1);
SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers");
SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak");
SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice",
"en-us");

if (config_.debug) {
std::vector<std::string> speaker_names;
Expand Down

0 comments on commit ceefa1f

Please sign in to comment.