From e6b0e55c086bc4e3d1f0fcf248cf46b67dd0aef7 Mon Sep 17 00:00:00 2001 From: Michael Hansen Date: Thu, 27 Jul 2023 15:51:40 -0500 Subject: [PATCH] Add sentence splitting tests --- src/python_test.py | 11 ++++++++++- src/test.cpp | 16 +++++++++++++++- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/src/python_test.py b/src/python_test.py index 38385d0..25f0038 100644 --- a/src/python_test.py +++ b/src/python_test.py @@ -22,7 +22,7 @@ de_phonemes = phonemize_espeak("licht!", "de") # "lˈɪçt!" where "ç" is decomposed into two codepoints -assert de_phonemes == [["l", "ˈ", "ɪ", "c", "̧", "t", "!"]] +assert de_phonemes == [["l", "ˈ", "ɪ", "c", "̧", "t", "!"]], de_phonemes # phoneme -> [id, ...] espeak_map = get_espeak_map() @@ -44,6 +44,15 @@ # ----------------------------------------------------------------------------- +# Capitalization is required to get espeak to split the sentences. +en_phonemes = phonemize_espeak("Test 1. Test2.", "en-us") +assert en_phonemes == [ + ["t", "ˈ", "ɛ", "s", "t", " ", "w", "ˈ", "ʌ", "n", "."], + ["t", "ˈ", "ɛ", "s", "t", " ", "t", "ˈ", "u", "ː", "."], +], en_phonemes + +# ----------------------------------------------------------------------------- + codepoints_map = get_codepoints_map() assert "uk" in codepoints_map, "uk not supported" uk_phonemes = phonemize_codepoints("ВЕСЕ́ЛКА") diff --git a/src/test.cpp b/src/test.cpp index 55177e6..f580b60 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -37,6 +37,8 @@ phonemeString(const std::vector> &phonemes) { phonemeStr << una::utf32to8(phonemeU32Str); } + + phonemeStr << "\n"; } return phonemeStr.str(); @@ -82,11 +84,23 @@ int main(int argc, char *argv[]) { piper::phonemize_eSpeak("this, is: a; test.", phonemeConfig, phonemes); std::string phonemeStr = phonemeString(phonemes); - if (phonemeStr != "ðˈɪs, ɪz: ˈeɪ; tˈɛst.") { + if (phonemeStr != "ðˈɪs, ɪz: ˈeɪ; tˈɛst.\n") { std::cerr << "punctuation test: " << phonemeStr << std::endl; return 1; } + // Check sentence splitting. + phonemes.clear(); + + // Capitalization is required to get espeak to split the sentences. + piper::phonemize_eSpeak("Test 1. Test 2.", phonemeConfig, phonemes); + + phonemeStr = phonemeString(phonemes); + if (phonemeStr != "tˈɛst wˈʌn.\ntˈɛst tˈuː.\n") { + std::cerr << "sentence split: " << phonemeStr << std::endl; + return 1; + } + // Check "ВЕСЕ́ЛКА" in Ukrainian piper::CodepointsPhonemeConfig codepointsConfig; phonemes.clear();