Skip to content

Commit

Permalink
Add version 3.0.0
Browse files Browse the repository at this point in the history
  • Loading branch information
robinhad committed Sep 14, 2022
1 parent 7af6095 commit a163565
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 20 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ Link to source code and models -> [https://github.com/robinhad/ukrainian-tts](ht

Code is licensed under `MIT License`, models are under `GNU GPL v3 License`.
# Support
If you like my work, please support -> [https://send.monobank.ua/jar/48iHq4xAXm](https://send.monobank.ua/jar/48iHq4xAXm)
# Example
If you like my work, please support -> [https://send.monobank.ua/jar/48iHq4xAXm](https://send.monobank.ua/jar/48iHq4xAXm)
For collaboration and question please contact me here: [Telegram https://t.me/robinhad](https://t.me/robinhad) [Twitter https://twitter.com/robinhad](https://twitter.com/robinhad)
You're welcome to join UA Speech Recognition and Synthesis community: [Telegram https://t.me/speech_recognition_uk](https://t.me/speech_recognition_uk)
# Examples

`Mykyta (male)`:

Expand Down Expand Up @@ -53,7 +55,8 @@ tts-server --model_path path/to/model.pth \
# Attribution 🤝

- Model training - [Yurii Paniv @robinhad](https://github.com/robinhad)
- Mykyta, Olena and Lada dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
- Mykyta, Olena, Lada, Dmytro, Olha dataset - [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
- Dmytro voice - [Dmytro Chaplynskyi @dchaplinsky](https://github.com/dchaplinsky)
- Silence cutting using [HMM-GMM](https://github.com/proger/uk) - [Volodymyr Kyrylov @proger](https://github.com/proger)
- Autostress (with dictionary) using [ukrainian-word-stress](https://github.com/lang-uk/ukrainian-word-stress) - [Oleksiy Syvokon @asivokon](https://github.com/asivokon)
- Autostress (with model) using [ukrainian-accentor](https://github.com/egorsmkv/ukrainian-accentor) - [Bohdan Mykhailenko @NeonBohdan](https://github.com/NeonBohdan) + [Yehor Smoliakov @egorsmkv](https://github.com/egorsmkv)
36 changes: 21 additions & 15 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ class VoiceOption(Enum):
Olena = "Олена (жіночий) 👩"
Mykyta = "Микита (чоловічий) 👨"
Lada = "Лада (жіночий) 👩"
Dmytro = "Дмитро (чоловічий) 👩"
Olga = "Ольга (жіночий) 👩"


def download(url, file_name):
Expand All @@ -33,7 +35,7 @@ def download(url, file_name):


print("downloading uk/mykyta/vits-tts")
release_number = "v3.0.0-alpha"
release_number = "v3.0.0"
model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
speakers_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/speakers.pth"
Expand Down Expand Up @@ -71,11 +73,14 @@ def tts(text: str, voice: str, stress: str):
autostress_with_model = (
True if stress == StressOption.AutomaticStressWithModel.value else False
)
speaker_name = "mykyta"
if voice == VoiceOption.Olena.value:
speaker_name = "olena"
elif voice == VoiceOption.Lada.value:
speaker_name = "lada"
voice_mapping = {
VoiceOption.Olena.value: "olena",
VoiceOption.Mykyta.value: "mykyta",
VoiceOption.Lada.value: "lada",
VoiceOption.Dmytro.value: "dmytro",
VoiceOption.Olga.value: "olga",
}
speaker_name = voice_mapping[voice]
text = preprocess_text(text, autostress_with_model)
text_limit = 7200
text = (
Expand All @@ -98,23 +103,24 @@ def tts(text: str, voice: str, stress: str):
iface = gr.Interface(
fn=tts,
inputs=[
gr.inputs.Textbox(
gr.components.Textbox(
label="Input",
default="Введіть, будь ласка, своє р+ечення.",
value="Введіть, будь ласка, своє р+ечення.",
),
gr.inputs.Radio(
gr.components.Radio(
label="Голос",
choices=[option.value for option in VoiceOption],
default=VoiceOption.Olena.value,
value=VoiceOption.Olena.value,
),
gr.inputs.Radio(
gr.components.Radio(
label="Наголоси",
choices=[option.value for option in StressOption],
value=StressOption.AutomaticStress.value
),
],
outputs=[
gr.outputs.Audio(label="Output"),
gr.outputs.Textbox(label="Наголошений текст"),
gr.components.Audio(label="Output"),
gr.components.Textbox(label="Наголошений текст"),
],
title="🐸💬🇺🇦 - Coqui TTS",
description="Україномовний🇺🇦 TTS за допомогою Coqui TTS (щоб вручну поставити наголос, використовуйте + перед голосною)",
Expand All @@ -132,12 +138,12 @@ def tts(text: str, voice: str, stress: str):
],
[
"Вв+едіть, будь ласка, св+оє реч+ення.",
VoiceOption.Mykyta.value,
VoiceOption.Dmytro.value,
StressOption.AutomaticStress.value,
],
[
"Привіт, як тебе звати?",
VoiceOption.Olena.value,
VoiceOption.Olga.value,
StressOption.AutomaticStress.value,
],
[
Expand Down
22 changes: 20 additions & 2 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@
"griffin_lim_iters": 60,
"num_mels": 80,
"mel_fmin": 0,
"mel_fmax": 8000,
"mel_fmax": null,
"spec_gain": 6.0,
"do_amp_to_db_linear": true,
"do_amp_to_db_mel": true,
Expand Down Expand Up @@ -158,11 +158,29 @@
null,
null
],
[
"\u0425\u0442+\u043e \u0442+\u0438 \u0442\u0430\u043a+\u0438\u0439 +\u0456 +\u044f\u043a \u0442\u0435\u0431+\u0435 \u0437\u0432+\u0430\u0442\u0438?",
"dmytro",
null,
null
],
[
"\u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u0438\u0439 - \u043c+\u0456\u0441\u0442\u043e \u0432 \u0425\u043c\u0435\u043b\u044c\u043d+\u0438\u0446\u044c\u043a\u0456\u0439 +\u043e\u0431\u043b\u0430\u0441\u0442\u0456 \u0423\u043a\u0440\u0430+\u0457\u043d\u0438, \u0446+\u0435\u043d\u0442\u0440 \u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u043e\u0457 \u043c\u0456\u0441\u044c\u043a+\u043e\u0457 \u043e\u0431'+\u0454\u0434\u043d\u0430\u043d\u043e\u0457 \u0442\u0435\u0440\u0438\u0442\u043e\u0440\u0456+\u0430\u043b\u044c\u043d\u043e\u0457 \u0433\u0440\u043e\u043c+\u0430\u0434\u0438 +\u0456 \u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u043e\u0433\u043e \u0440\u0430\u0439+\u043e\u043d\u0443.",
"lada",
null,
null
],
[
"\u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u0438\u0439 - \u043c+\u0456\u0441\u0442\u043e \u0432 \u0425\u043c\u0435\u043b\u044c\u043d+\u0438\u0446\u044c\u043a\u0456\u0439 +\u043e\u0431\u043b\u0430\u0441\u0442\u0456 \u0423\u043a\u0440\u0430+\u0457\u043d\u0438, \u0446+\u0435\u043d\u0442\u0440 \u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u043e\u0457 \u043c\u0456\u0441\u044c\u043a+\u043e\u0457 \u043e\u0431'+\u0454\u0434\u043d\u0430\u043d\u043e\u0457 \u0442\u0435\u0440\u0438\u0442\u043e\u0440\u0456+\u0430\u043b\u044c\u043d\u043e\u0457 \u0433\u0440\u043e\u043c+\u0430\u0434\u0438 +\u0456 \u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u043e\u0433\u043e \u0440\u0430\u0439+\u043e\u043d\u0443.",
"dmytro",
null,
null
],
[
"\u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u0438\u0439 - \u043c+\u0456\u0441\u0442\u043e \u0432 \u0425\u043c\u0435\u043b\u044c\u043d+\u0438\u0446\u044c\u043a\u0456\u0439 +\u043e\u0431\u043b\u0430\u0441\u0442\u0456 \u0423\u043a\u0440\u0430+\u0457\u043d\u0438, \u0446+\u0435\u043d\u0442\u0440 \u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u043e\u0457 \u043c\u0456\u0441\u044c\u043a+\u043e\u0457 \u043e\u0431'+\u0454\u0434\u043d\u0430\u043d\u043e\u0457 \u0442\u0435\u0440\u0438\u0442\u043e\u0440\u0456+\u0430\u043b\u044c\u043d\u043e\u0457 \u0433\u0440\u043e\u043c+\u0430\u0434\u0438 +\u0456 \u041a\u0430\u043c'\u044f\u043d+\u0435\u0446\u044c-\u041f\u043e\u0434+\u0456\u043b\u044c\u0441\u044c\u043a\u043e\u0433\u043e \u0440\u0430\u0439+\u043e\u043d\u0443.",
"olga",
null,
null
]
],
"eval_split_max_size": null,
Expand Down Expand Up @@ -243,7 +261,7 @@
"init_discriminator": true,
"use_spectral_norm_disriminator": false,
"use_speaker_embedding": true,
"num_speakers": 3,
"num_speakers": 5,
"speakers_file": "speakers.pth",
"d_vector_file": null,
"speaker_embedding_channels": 256,
Expand Down

0 comments on commit a163565

Please sign in to comment.