From b3d58db15df26b313a3d77ce331c4566787f3366 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 26 Sep 2023 20:06:18 +0300 Subject: [PATCH 001/255] Remove abandoned .cardboardlint.yml --- .cardboardlint.yml | 5 ----- 1 file changed, 5 deletions(-) delete mode 100644 .cardboardlint.yml diff --git a/.cardboardlint.yml b/.cardboardlint.yml deleted file mode 100644 index 4a115a37cd..0000000000 --- a/.cardboardlint.yml +++ /dev/null @@ -1,5 +0,0 @@ -linters: -- pylint: - # pylintrc: pylintrc - filefilter: ['- test_*.py', '+ *.py', '- *.npy'] - # exclude: \ No newline at end of file From 81160b089cbd51ed0e819385e252942c134e3126 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 25 Sep 2023 15:06:02 +0300 Subject: [PATCH 002/255] Update pre-commit tools to more compatible versions --- .pre-commit-config.yaml | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 911f2a838e..18eb034a98 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,17 +1,19 @@ repos: - - repo: 'https://github.com/pre-commit/pre-commit-hooks' - rev: v2.3.0 + - repo: "https://github.com/pre-commit/pre-commit-hooks" + rev: v4.5.0 hooks: - id: check-yaml - - id: end-of-file-fixer - - id: trailing-whitespace - - repo: 'https://github.com/psf/black' - rev: 22.3.0 + # TODO: enable these later; there are plenty of violating + # files that need to be fixed first + # - id: end-of-file-fixer + # - id: trailing-whitespace + - repo: "https://github.com/psf/black" + rev: 23.12.0 hooks: - id: black language_version: python3 - repo: https://github.com/pycqa/isort - rev: 5.8.0 + rev: 5.13.1 hooks: - id: isort name: isort (python) @@ -24,4 +26,4 @@ repos: - repo: https://github.com/pycqa/pylint rev: v2.8.2 hooks: - - id: pylint + - id: pylint From 859283a73420fb0ff44cf61297f0324befa49aae Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 26 Sep 2023 20:09:09 +0300 Subject: [PATCH 003/255] Remove isort for pyi (there are no pyi files here) --- .pre-commit-config.yaml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 18eb034a98..b011613ad2 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -20,9 +20,6 @@ repos: - id: isort name: isort (cython) types: [cython] - - id: isort - name: isort (pyi) - types: [pyi] - repo: https://github.com/pycqa/pylint rev: v2.8.2 hooks: From 72ac2bfa09fbfd1775ba017aa13875e9e7bcaa55 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 26 Sep 2023 23:08:45 +0300 Subject: [PATCH 004/255] Get rid of some star imports --- TTS/vc/modules/freevc/speaker_encoder/audio.py | 8 +++++++- .../modules/freevc/speaker_encoder/speaker_encoder.py | 10 +++++++++- recipes/bel-alex73/train_hifigan.py | 2 +- tests/data_tests/test_loader.py | 3 ++- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/TTS/vc/modules/freevc/speaker_encoder/audio.py b/TTS/vc/modules/freevc/speaker_encoder/audio.py index 52f6fd0893..30a77adc5d 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/audio.py +++ b/TTS/vc/modules/freevc/speaker_encoder/audio.py @@ -7,7 +7,13 @@ import numpy as np from scipy.ndimage.morphology import binary_dilation -from TTS.vc.modules.freevc.speaker_encoder.hparams import * +from TTS.vc.modules.freevc.speaker_encoder.hparams import ( + audio_norm_target_dBFS, + mel_n_channels, + mel_window_length, + mel_window_step, + sampling_rate, +) int16_max = (2**15) - 1 diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py index 2e21a14fd8..d18c098f38 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py @@ -8,7 +8,15 @@ from TTS.utils.io import load_fsspec from TTS.vc.modules.freevc.speaker_encoder import audio -from TTS.vc.modules.freevc.speaker_encoder.hparams import * +from TTS.vc.modules.freevc.speaker_encoder.hparams import ( + mel_n_channels, + mel_window_step, + model_embedding_size, + model_hidden_size, + model_num_layers, + partials_n_frames, + sampling_rate, +) class SpeakerEncoder(nn.Module): diff --git a/recipes/bel-alex73/train_hifigan.py b/recipes/bel-alex73/train_hifigan.py index 3e740b2ff4..1ddf48c21e 100644 --- a/recipes/bel-alex73/train_hifigan.py +++ b/recipes/bel-alex73/train_hifigan.py @@ -5,7 +5,7 @@ from TTS.tts.configs.shared_configs import BaseAudioConfig from TTS.utils.audio import AudioProcessor -from TTS.vocoder.configs.hifigan_config import * +from TTS.vocoder.configs.hifigan_config import HifiganConfig from TTS.vocoder.datasets.preprocess import load_wav_data from TTS.vocoder.models.gan import GAN diff --git a/tests/data_tests/test_loader.py b/tests/data_tests/test_loader.py index cbd98fc0c5..adb272e19f 100644 --- a/tests/data_tests/test_loader.py +++ b/tests/data_tests/test_loader.py @@ -6,9 +6,10 @@ import torch from torch.utils.data import DataLoader +from TTS.tts.datasets.dataset import TTSDataset from tests import get_tests_data_path, get_tests_output_path from TTS.tts.configs.shared_configs import BaseDatasetConfig, BaseTTSConfig -from TTS.tts.datasets import TTSDataset, load_tts_samples +from TTS.tts.datasets import load_tts_samples from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.utils.audio import AudioProcessor From 8e95c3e436b452729c2933e879eeeeb8a2a71707 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 00:32:04 +0300 Subject: [PATCH 005/255] Convert pylint configuration to ruff With https://github.com/akx/pylint-to-ruff --- .pre-commit-config.yaml | 9 +- .pylintrc | 599 ---------------------------------------- Makefile | 4 +- pyproject.toml | 55 +++- 4 files changed, 59 insertions(+), 608 deletions(-) delete mode 100644 .pylintrc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b011613ad2..af408ed551 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,12 +15,11 @@ repos: - repo: https://github.com/pycqa/isort rev: 5.13.1 hooks: - - id: isort - name: isort (python) - id: isort name: isort (cython) types: [cython] - - repo: https://github.com/pycqa/pylint - rev: v2.8.2 + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.1.7 hooks: - - id: pylint + - id: ruff + args: [--fix, --exit-non-zero-on-fix] diff --git a/.pylintrc b/.pylintrc deleted file mode 100644 index 49a9dbdd2c..0000000000 --- a/.pylintrc +++ /dev/null @@ -1,599 +0,0 @@ -[MASTER] - -# A comma-separated list of package or module names from where C extensions may -# be loaded. Extensions are loading into the active Python interpreter and may -# run arbitrary code. -extension-pkg-whitelist= - -# Add files or directories to the blacklist. They should be base names, not -# paths. -ignore=CVS - -# Add files or directories matching the regex patterns to the blacklist. The -# regex matches against base names, not paths. -ignore-patterns= - -# Python code to execute, usually for sys.path manipulation such as -# pygtk.require(). -#init-hook= - -# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the -# number of processors available to use. -jobs=1 - -# Control the amount of potential inferred values when inferring a single -# object. This can help the performance when dealing with large functions or -# complex, nested conditions. -limit-inference-results=100 - -# List of plugins (as comma separated values of python modules names) to load, -# usually to register additional checkers. -load-plugins= - -# Pickle collected data for later comparisons. -persistent=yes - -# Specify a configuration file. -#rcfile= - -# When enabled, pylint would attempt to guess common misconfiguration and emit -# user-friendly hints instead of false-positive error messages. -suggestion-mode=yes - -# Allow loading of arbitrary C extensions. Extensions are imported into the -# active Python interpreter and may run arbitrary code. -unsafe-load-any-extension=no - - -[MESSAGES CONTROL] - -# Only show warnings with the listed confidence levels. Leave empty to show -# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. -confidence= - -# Disable the message, report, category or checker with the given id(s). You -# can either give multiple identifiers separated by comma (,) or put this -# option multiple times (only on the command line, not in the configuration -# file where it should appear only once). You can also use "--disable=all" to -# disable everything first and then reenable specific checks. For example, if -# you want to run only the similarities checker, you can use "--disable=all -# --enable=similarities". If you want to run only the classes checker, but have -# no Warning level messages displayed, use "--disable=all --enable=classes -# --disable=W". -disable=missing-docstring, - too-many-public-methods, - too-many-lines, - bare-except, - ## for avoiding weird p3.6 CI linter error - ## TODO: see later if we can remove this - assigning-non-slot, - unsupported-assignment-operation, - ## end - line-too-long, - fixme, - wrong-import-order, - ungrouped-imports, - wrong-import-position, - import-error, - invalid-name, - too-many-instance-attributes, - arguments-differ, - arguments-renamed, - no-name-in-module, - no-member, - unsubscriptable-object, - print-statement, - parameter-unpacking, - unpacking-in-except, - old-raise-syntax, - backtick, - long-suffix, - old-ne-operator, - old-octal-literal, - import-star-module-level, - non-ascii-bytes-literal, - raw-checker-failed, - bad-inline-option, - locally-disabled, - file-ignored, - suppressed-message, - useless-suppression, - deprecated-pragma, - use-symbolic-message-instead, - useless-object-inheritance, - too-few-public-methods, - too-many-branches, - too-many-arguments, - too-many-locals, - too-many-statements, - apply-builtin, - basestring-builtin, - buffer-builtin, - cmp-builtin, - coerce-builtin, - execfile-builtin, - file-builtin, - long-builtin, - raw_input-builtin, - reduce-builtin, - standarderror-builtin, - unicode-builtin, - xrange-builtin, - coerce-method, - delslice-method, - getslice-method, - setslice-method, - no-absolute-import, - old-division, - dict-iter-method, - dict-view-method, - next-method-called, - metaclass-assignment, - indexing-exception, - raising-string, - reload-builtin, - oct-method, - hex-method, - nonzero-method, - cmp-method, - input-builtin, - round-builtin, - intern-builtin, - unichr-builtin, - map-builtin-not-iterating, - zip-builtin-not-iterating, - range-builtin-not-iterating, - filter-builtin-not-iterating, - using-cmp-argument, - eq-without-hash, - div-method, - idiv-method, - rdiv-method, - exception-message-attribute, - invalid-str-codec, - sys-max-int, - bad-python3-import, - deprecated-string-function, - deprecated-str-translate-call, - deprecated-itertools-function, - deprecated-types-field, - next-method-defined, - dict-items-not-iterating, - dict-keys-not-iterating, - dict-values-not-iterating, - deprecated-operator-function, - deprecated-urllib-function, - xreadlines-attribute, - deprecated-sys-function, - exception-escape, - comprehension-escape, - duplicate-code, - not-callable, - import-outside-toplevel, - logging-fstring-interpolation, - logging-not-lazy - -# Enable the message, report, category or checker with the given id(s). You can -# either give multiple identifier separated by comma (,) or put this option -# multiple time (only on the command line, not in the configuration file where -# it should appear only once). See also the "--disable" option for examples. -enable=c-extension-no-member - - -[REPORTS] - -# Python expression which should return a note less than 10 (10 is the highest -# note). You have access to the variables errors warning, statement which -# respectively contain the number of errors / warnings messages and the total -# number of statements analyzed. This is used by the global evaluation report -# (RP0004). -evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) - -# Template used to display messages. This is a python new-style format string -# used to format the message information. See doc for all details. -#msg-template= - -# Set the output format. Available formats are text, parseable, colorized, json -# and msvs (visual studio). You can also give a reporter class, e.g. -# mypackage.mymodule.MyReporterClass. -output-format=text - -# Tells whether to display a full report or only the messages. -reports=no - -# Activate the evaluation score. -score=yes - - -[REFACTORING] - -# Maximum number of nested blocks for function / method body -max-nested-blocks=5 - -# Complete name of functions that never returns. When checking for -# inconsistent-return-statements if a never returning function is called then -# it will be considered as an explicit return statement and no message will be -# printed. -never-returning-functions=sys.exit - - -[LOGGING] - -# Format style used to check logging format string. `old` means using % -# formatting, while `new` is for `{}` formatting. -logging-format-style=old - -# Logging modules to check that the string format arguments are in logging -# function parameter format. -logging-modules=logging - - -[SPELLING] - -# Limits count of emitted suggestions for spelling mistakes. -max-spelling-suggestions=4 - -# Spelling dictionary name. Available dictionaries: none. To make it working -# install python-enchant package.. -spelling-dict= - -# List of comma separated words that should not be checked. -spelling-ignore-words= - -# A path to a file that contains private dictionary; one word per line. -spelling-private-dict-file= - -# Tells whether to store unknown words to indicated private dictionary in -# --spelling-private-dict-file option instead of raising a message. -spelling-store-unknown-words=no - - -[MISCELLANEOUS] - -# List of note tags to take in consideration, separated by a comma. -notes=FIXME, - XXX, - TODO - - -[TYPECHECK] - -# List of decorators that produce context managers, such as -# contextlib.contextmanager. Add to this list to register other decorators that -# produce valid context managers. -contextmanager-decorators=contextlib.contextmanager - -# List of members which are set dynamically and missed by pylint inference -# system, and so shouldn't trigger E1101 when accessed. Python regular -# expressions are accepted. -generated-members=numpy.*,torch.* - -# Tells whether missing members accessed in mixin class should be ignored. A -# mixin class is detected if its name ends with "mixin" (case insensitive). -ignore-mixin-members=yes - -# Tells whether to warn about missing members when the owner of the attribute -# is inferred to be None. -ignore-none=yes - -# This flag controls whether pylint should warn about no-member and similar -# checks whenever an opaque object is returned when inferring. The inference -# can return multiple potential results while evaluating a Python object, but -# some branches might not be evaluated, which results in partial inference. In -# that case, it might be useful to still emit no-member and other checks for -# the rest of the inferred objects. -ignore-on-opaque-inference=yes - -# List of class names for which member attributes should not be checked (useful -# for classes with dynamically set attributes). This supports the use of -# qualified names. -ignored-classes=optparse.Values,thread._local,_thread._local - -# List of module names for which member attributes should not be checked -# (useful for modules/projects where namespaces are manipulated during runtime -# and thus existing member attributes cannot be deduced by static analysis. It -# supports qualified module names, as well as Unix pattern matching. -ignored-modules= - -# Show a hint with possible names when a member name was not found. The aspect -# of finding the hint is based on edit distance. -missing-member-hint=yes - -# The minimum edit distance a name should have in order to be considered a -# similar match for a missing member name. -missing-member-hint-distance=1 - -# The total number of similar names that should be taken in consideration when -# showing a hint for a missing member. -missing-member-max-choices=1 - - -[VARIABLES] - -# List of additional names supposed to be defined in builtins. Remember that -# you should avoid defining new builtins when possible. -additional-builtins= - -# Tells whether unused global variables should be treated as a violation. -allow-global-unused-variables=yes - -# List of strings which can identify a callback function by name. A callback -# name must start or end with one of those strings. -callbacks=cb_, - _cb - -# A regular expression matching the name of dummy variables (i.e. expected to -# not be used). -dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ - -# Argument names that match this expression will be ignored. Default to name -# with leading underscore. -ignored-argument-names=_.*|^ignored_|^unused_ - -# Tells whether we should check for unused import in __init__ files. -init-import=no - -# List of qualified module names which can have objects that can redefine -# builtins. -redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io - - -[FORMAT] - -# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. -expected-line-ending-format= - -# Regexp for a line that is allowed to be longer than the limit. -ignore-long-lines=^\s*(# )??$ - -# Number of spaces of indent required inside a hanging or continued line. -indent-after-paren=4 - -# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 -# tab). -indent-string=' ' - -# Maximum number of characters on a single line. -max-line-length=120 - -# Maximum number of lines in a module. -max-module-lines=1000 - -# List of optional constructs for which whitespace checking is disabled. `dict- -# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. -# `trailing-comma` allows a space between comma and closing bracket: (a, ). -# `empty-line` allows space-only lines. -no-space-check=trailing-comma, - dict-separator - -# Allow the body of a class to be on the same line as the declaration if body -# contains single statement. -single-line-class-stmt=no - -# Allow the body of an if to be on the same line as the test if there is no -# else. -single-line-if-stmt=no - - -[SIMILARITIES] - -# Ignore comments when computing similarities. -ignore-comments=yes - -# Ignore docstrings when computing similarities. -ignore-docstrings=yes - -# Ignore imports when computing similarities. -ignore-imports=no - -# Minimum lines number of a similarity. -min-similarity-lines=4 - - -[BASIC] - -# Naming style matching correct argument names. -argument-naming-style=snake_case - -# Regular expression matching correct argument names. Overrides argument- -# naming-style. -argument-rgx=[a-z_][a-z0-9_]{0,30}$ - -# Naming style matching correct attribute names. -attr-naming-style=snake_case - -# Regular expression matching correct attribute names. Overrides attr-naming- -# style. -#attr-rgx= - -# Bad variable names which should always be refused, separated by a comma. -bad-names= - -# Naming style matching correct class attribute names. -class-attribute-naming-style=any - -# Regular expression matching correct class attribute names. Overrides class- -# attribute-naming-style. -#class-attribute-rgx= - -# Naming style matching correct class names. -class-naming-style=PascalCase - -# Regular expression matching correct class names. Overrides class-naming- -# style. -#class-rgx= - -# Naming style matching correct constant names. -const-naming-style=UPPER_CASE - -# Regular expression matching correct constant names. Overrides const-naming- -# style. -#const-rgx= - -# Minimum line length for functions/classes that require docstrings, shorter -# ones are exempt. -docstring-min-length=-1 - -# Naming style matching correct function names. -function-naming-style=snake_case - -# Regular expression matching correct function names. Overrides function- -# naming-style. -#function-rgx= - -# Good variable names which should always be accepted, separated by a comma. -good-names=i, - j, - k, - x, - ex, - Run, - _ - -# Include a hint for the correct naming format with invalid-name. -include-naming-hint=no - -# Naming style matching correct inline iteration names. -inlinevar-naming-style=any - -# Regular expression matching correct inline iteration names. Overrides -# inlinevar-naming-style. -#inlinevar-rgx= - -# Naming style matching correct method names. -method-naming-style=snake_case - -# Regular expression matching correct method names. Overrides method-naming- -# style. -#method-rgx= - -# Naming style matching correct module names. -module-naming-style=snake_case - -# Regular expression matching correct module names. Overrides module-naming- -# style. -#module-rgx= - -# Colon-delimited sets of names that determine each other's naming style when -# the name regexes allow several styles. -name-group= - -# Regular expression which should only match function or class names that do -# not require a docstring. -no-docstring-rgx=^_ - -# List of decorators that produce properties, such as abc.abstractproperty. Add -# to this list to register other decorators that produce valid properties. -# These decorators are taken in consideration only for invalid-name. -property-classes=abc.abstractproperty - -# Naming style matching correct variable names. -variable-naming-style=snake_case - -# Regular expression matching correct variable names. Overrides variable- -# naming-style. -variable-rgx=[a-z_][a-z0-9_]{0,30}$ - - -[STRING] - -# This flag controls whether the implicit-str-concat-in-sequence should -# generate a warning on implicit string concatenation in sequences defined over -# several lines. -check-str-concat-over-line-jumps=no - - -[IMPORTS] - -# Allow wildcard imports from modules that define __all__. -allow-wildcard-with-all=no - -# Analyse import fallback blocks. This can be used to support both Python 2 and -# 3 compatible code, which means that the block might have code that exists -# only in one or another interpreter, leading to false positives when analysed. -analyse-fallback-blocks=no - -# Deprecated modules which should not be used, separated by a comma. -deprecated-modules=optparse,tkinter.tix - -# Create a graph of external dependencies in the given file (report RP0402 must -# not be disabled). -ext-import-graph= - -# Create a graph of every (i.e. internal and external) dependencies in the -# given file (report RP0402 must not be disabled). -import-graph= - -# Create a graph of internal dependencies in the given file (report RP0402 must -# not be disabled). -int-import-graph= - -# Force import order to recognize a module as part of the standard -# compatibility libraries. -known-standard-library= - -# Force import order to recognize a module as part of a third party library. -known-third-party=enchant - - -[CLASSES] - -# List of method names used to declare (i.e. assign) instance attributes. -defining-attr-methods=__init__, - __new__, - setUp - -# List of member names, which should be excluded from the protected access -# warning. -exclude-protected=_asdict, - _fields, - _replace, - _source, - _make - -# List of valid names for the first argument in a class method. -valid-classmethod-first-arg=cls - -# List of valid names for the first argument in a metaclass class method. -valid-metaclass-classmethod-first-arg=cls - - -[DESIGN] - -# Maximum number of arguments for function / method. -max-args=5 - -# Maximum number of attributes for a class (see R0902). -max-attributes=7 - -# Maximum number of boolean expressions in an if statement. -max-bool-expr=5 - -# Maximum number of branch for function / method body. -max-branches=12 - -# Maximum number of locals for function / method body. -max-locals=15 - -# Maximum number of parents for a class (see R0901). -max-parents=15 - -# Maximum number of public methods for a class (see R0904). -max-public-methods=20 - -# Maximum number of return / yield for function / method body. -max-returns=6 - -# Maximum number of statements in function / method body. -max-statements=50 - -# Minimum number of public methods for a class (see R0903). -min-public-methods=2 - - -[EXCEPTIONS] - -# Exceptions that will emit a warning when being caught. Defaults to -# "BaseException, Exception". -overgeneral-exceptions=BaseException, - Exception diff --git a/Makefile b/Makefile index 7446848f46..d1a1db8a75 100644 --- a/Makefile +++ b/Makefile @@ -48,8 +48,8 @@ style: ## update code style. black ${target_dirs} isort ${target_dirs} -lint: ## run pylint linter. - pylint ${target_dirs} +lint: ## run linters. + ruff ${target_dirs} black ${target_dirs} --check isort ${target_dirs} --check-only diff --git a/pyproject.toml b/pyproject.toml index 922575305c..934e0c2ebd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,8 +7,59 @@ requires = [ "packaging", ] -[flake8] -max-line-length=120 +[tool.ruff] +line-length = 120 +extend-select = [ + "B033", # duplicate-value + "C416", # unnecessary-comprehension + "D419", # empty-docstring + "E999", # syntax-error + "F401", # unused-import + "F704", # yield-outside-function + "F706", # return-outside-function + "F841", # unused-variable + "I", # import sorting + "PIE790", # unnecessary-pass + "PLC", + "PLE", + "PLR0124", # comparison-with-itself + "PLR0206", # property-with-parameters + "PLR0911", # too-many-return-statements + "PLR1711", # useless-return + "PLW", + "W291", # trailing-whitespace +] + +ignore = [ + "E501", # line too long + "E722", # bare except (TODO: fix these) + "E731", # don't use lambdas + "E741", # ambiguous variable name + "PLR0912", # too-many-branches + "PLR0913", # too-many-arguments + "PLR0915", # too-many-statements + "UP004", # useless-object-inheritance + "F821", # TODO: enable + "F841", # TODO: enable + "PLW0602", # TODO: enable + "PLW2901", # TODO: enable + "PLW0127", # TODO: enable + "PLW0603", # TODO: enable +] + +[tool.ruff.pylint] +max-args = 5 +max-public-methods = 20 +max-returns = 7 + +[tool.ruff.per-file-ignores] +"**/__init__.py" = [ + "F401", # init files may have "unused" imports for now + "F403", # init files may have star imports for now +] +"hubconf.py" = [ + "E402", # module level import not at top of file +] [tool.black] line-length = 120 From 4f859824acf31bcaa7333d64a0835ce39a5617fa Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 00:38:43 +0300 Subject: [PATCH 006/255] Document ruff --- CONTRIBUTING.md | 2 +- requirements.dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ae0ce46048..5fbed84397 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -88,7 +88,7 @@ The following steps are tested on an Ubuntu system. $ make style ``` -10. Run the linter and correct the issues raised. We use ```pylint``` for linting. It helps to enforce a coding standard, offers simple refactoring suggestions. +10. Run the linter and correct the issues raised. We use ```ruff``` for linting. It helps to enforce a coding standard, offers simple refactoring suggestions. ```bash $ make lint diff --git a/requirements.dev.txt b/requirements.dev.txt index 8c674727d3..21c4c3d21e 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -2,4 +2,4 @@ black coverage isort nose2 -pylint==2.10.2 +ruff==0.1.3 From 90991e89b4202c4eebb85b1df853f802e383b9f1 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 00:42:14 +0300 Subject: [PATCH 007/255] Ruff autofix unused imports and import order --- TTS/api.py | 2 +- TTS/bin/collect_env_info.py | 6 +++--- TTS/demos/xtts_ft_demo/utils/formatter.py | 14 ++++---------- TTS/demos/xtts_ft_demo/utils/gpt_train.py | 2 +- TTS/demos/xtts_ft_demo/xtts_demo.py | 11 ++++------- TTS/encoder/configs/emotion_encoder_config.py | 2 +- TTS/encoder/configs/speaker_encoder_config.py | 2 +- TTS/tts/layers/bark/hubert/kmeans_hubert.py | 2 -- TTS/tts/layers/tortoise/arch_utils.py | 1 - TTS/tts/layers/vits/discriminator.py | 2 +- TTS/tts/layers/xtts/gpt.py | 1 - TTS/tts/layers/xtts/gpt_inference.py | 2 -- TTS/tts/layers/xtts/trainer/dataset.py | 1 - TTS/tts/layers/xtts/trainer/gpt_trainer.py | 1 - TTS/tts/layers/xtts/zh_num2words.py | 2 -- TTS/tts/models/base_tts.py | 2 +- TTS/vc/configs/shared_configs.py | 6 ++---- TTS/vc/modules/freevc/commons.py | 2 -- TTS/vc/modules/freevc/speaker_encoder/audio.py | 2 -- .../freevc/speaker_encoder/speaker_encoder.py | 1 - recipes/bel-alex73/train_hifigan.py | 3 --- recipes/multilingual/cml_yourtts/train_yourtts.py | 1 - setup.py | 2 +- tests/data_tests/test_loader.py | 2 +- tests/vc_tests/test_freevc.py | 3 +-- 25 files changed, 22 insertions(+), 53 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 7abc188e74..fa6165b9d2 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -6,10 +6,10 @@ import numpy as np from torch import nn +from TTS.config import load_config from TTS.utils.audio.numpy_transforms import save_wav from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer -from TTS.config import load_config class TTS(nn.Module): diff --git a/TTS/bin/collect_env_info.py b/TTS/bin/collect_env_info.py index 662fcd02ec..e76f6a757b 100644 --- a/TTS/bin/collect_env_info.py +++ b/TTS/bin/collect_env_info.py @@ -1,4 +1,5 @@ """Get detailed info about the working environment.""" +import json import os import platform import sys @@ -6,11 +7,10 @@ import numpy import torch -sys.path += [os.path.abspath(".."), os.path.abspath(".")] -import json - import TTS +sys.path += [os.path.abspath(".."), os.path.abspath(".")] + def system_info(): return { diff --git a/TTS/demos/xtts_ft_demo/utils/formatter.py b/TTS/demos/xtts_ft_demo/utils/formatter.py index 536faa0108..937ee4dd53 100644 --- a/TTS/demos/xtts_ft_demo/utils/formatter.py +++ b/TTS/demos/xtts_ft_demo/utils/formatter.py @@ -1,23 +1,17 @@ -import os import gc -import torchaudio +import os + import pandas +import torch +import torchaudio from faster_whisper import WhisperModel -from glob import glob - from tqdm import tqdm -import torch -import torchaudio # torch.set_num_threads(1) - from TTS.tts.layers.xtts.tokenizer import multilingual_cleaners torch.set_num_threads(16) - -import os - audio_types = (".wav", ".mp3", ".flac") diff --git a/TTS/demos/xtts_ft_demo/utils/gpt_train.py b/TTS/demos/xtts_ft_demo/utils/gpt_train.py index a98765c3e7..80be4fab40 100644 --- a/TTS/demos/xtts_ft_demo/utils/gpt_train.py +++ b/TTS/demos/xtts_ft_demo/utils/gpt_train.py @@ -1,5 +1,5 @@ -import os import gc +import os from trainer import Trainer, TrainerArgs diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py index ebb11f29d1..a7fbc0e821 100644 --- a/TTS/demos/xtts_ft_demo/xtts_demo.py +++ b/TTS/demos/xtts_ft_demo/xtts_demo.py @@ -1,19 +1,16 @@ import argparse +import logging import os import sys import tempfile +import traceback import gradio as gr -import librosa.display -import numpy as np - -import os import torch import torchaudio -import traceback + from TTS.demos.xtts_ft_demo.utils.formatter import format_audio_list from TTS.demos.xtts_ft_demo.utils.gpt_train import train_gpt - from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.xtts import Xtts @@ -91,7 +88,7 @@ def isatty(self): # logging.basicConfig(stream=sys.stdout, level=logging.INFO) -import logging + logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", diff --git a/TTS/encoder/configs/emotion_encoder_config.py b/TTS/encoder/configs/emotion_encoder_config.py index 5eda2671be..1d12325cf2 100644 --- a/TTS/encoder/configs/emotion_encoder_config.py +++ b/TTS/encoder/configs/emotion_encoder_config.py @@ -1,4 +1,4 @@ -from dataclasses import asdict, dataclass +from dataclasses import dataclass from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig diff --git a/TTS/encoder/configs/speaker_encoder_config.py b/TTS/encoder/configs/speaker_encoder_config.py index 6dceb00277..0588527a68 100644 --- a/TTS/encoder/configs/speaker_encoder_config.py +++ b/TTS/encoder/configs/speaker_encoder_config.py @@ -1,4 +1,4 @@ -from dataclasses import asdict, dataclass +from dataclasses import dataclass from TTS.encoder.configs.base_encoder_config import BaseEncoderConfig diff --git a/TTS/tts/layers/bark/hubert/kmeans_hubert.py b/TTS/tts/layers/bark/hubert/kmeans_hubert.py index a6a3b9aeb1..9e487b1e9d 100644 --- a/TTS/tts/layers/bark/hubert/kmeans_hubert.py +++ b/TTS/tts/layers/bark/hubert/kmeans_hubert.py @@ -7,8 +7,6 @@ # Modified code from https://github.com/lucidrains/audiolm-pytorch/blob/main/audiolm_pytorch/hubert_kmeans.py -import logging -from pathlib import Path import torch from einops import pack, unpack diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index dad1814369..c79ef31b0c 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -1,6 +1,5 @@ import functools import math -import os import fsspec import torch diff --git a/TTS/tts/layers/vits/discriminator.py b/TTS/tts/layers/vits/discriminator.py index c27d11bef6..3449739fdc 100644 --- a/TTS/tts/layers/vits/discriminator.py +++ b/TTS/tts/layers/vits/discriminator.py @@ -2,7 +2,7 @@ from torch import nn from torch.nn.modules.conv import Conv1d -from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP, MultiPeriodDiscriminator +from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP class DiscriminatorS(torch.nn.Module): diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index e7b186b858..ca0dc7cc74 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -1,7 +1,6 @@ # ported from: https://github.com/neonbjb/tortoise-tts import functools -import math import random import torch diff --git a/TTS/tts/layers/xtts/gpt_inference.py b/TTS/tts/layers/xtts/gpt_inference.py index d44bd3decd..4625ae1ba9 100644 --- a/TTS/tts/layers/xtts/gpt_inference.py +++ b/TTS/tts/layers/xtts/gpt_inference.py @@ -1,5 +1,3 @@ -import math - import torch from torch import nn from transformers import GPT2PreTrainedModel diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index 2f958cb5a5..4d6d6ede6e 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -1,4 +1,3 @@ -import os import random import sys diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 9a7a1d7783..460fcc69f7 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -5,7 +5,6 @@ import torch.nn as nn import torchaudio from coqpit import Coqpit -from torch.nn import functional as F from torch.utils.data import DataLoader from trainer.torch import DistributedSampler from trainer.trainer_utils import get_optimizer, get_scheduler diff --git a/TTS/tts/layers/xtts/zh_num2words.py b/TTS/tts/layers/xtts/zh_num2words.py index e59ccb6630..42fd364a56 100644 --- a/TTS/tts/layers/xtts/zh_num2words.py +++ b/TTS/tts/layers/xtts/zh_num2words.py @@ -4,13 +4,11 @@ import argparse import csv -import os import re import string import sys # fmt: off - # ================================================================================ # # basic constant # ================================================================================ # diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index 7871cc38c3..be76f6c2d3 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -14,7 +14,7 @@ from TTS.tts.datasets.dataset import TTSDataset from TTS.tts.utils.data import get_length_balancer_weights from TTS.tts.utils.languages import LanguageManager, get_language_balancer_weights -from TTS.tts.utils.speakers import SpeakerManager, get_speaker_balancer_weights, get_speaker_manager +from TTS.tts.utils.speakers import SpeakerManager, get_speaker_balancer_weights from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.visual import plot_alignment, plot_spectrogram diff --git a/TTS/vc/configs/shared_configs.py b/TTS/vc/configs/shared_configs.py index 74164a7444..b2fe63d29d 100644 --- a/TTS/vc/configs/shared_configs.py +++ b/TTS/vc/configs/shared_configs.py @@ -1,7 +1,5 @@ -from dataclasses import asdict, dataclass, field -from typing import Dict, List - -from coqpit import Coqpit, check_argument +from dataclasses import dataclass, field +from typing import List from TTS.config import BaseAudioConfig, BaseDatasetConfig, BaseTrainingConfig diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py index e799cc2a5b..e5fb13c11c 100644 --- a/TTS/vc/modules/freevc/commons.py +++ b/TTS/vc/modules/freevc/commons.py @@ -1,8 +1,6 @@ import math -import numpy as np import torch -from torch import nn from torch.nn import functional as F diff --git a/TTS/vc/modules/freevc/speaker_encoder/audio.py b/TTS/vc/modules/freevc/speaker_encoder/audio.py index 30a77adc5d..5b23a4dbb6 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/audio.py +++ b/TTS/vc/modules/freevc/speaker_encoder/audio.py @@ -1,11 +1,9 @@ -import struct from pathlib import Path from typing import Optional, Union # import webrtcvad import librosa import numpy as np -from scipy.ndimage.morphology import binary_dilation from TTS.vc.modules.freevc.speaker_encoder.hparams import ( audio_norm_target_dBFS, diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py index d18c098f38..7f811ac3ab 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py @@ -1,4 +1,3 @@ -from pathlib import Path from time import perf_counter as timer from typing import List, Union diff --git a/recipes/bel-alex73/train_hifigan.py b/recipes/bel-alex73/train_hifigan.py index 1ddf48c21e..78221a9f2b 100644 --- a/recipes/bel-alex73/train_hifigan.py +++ b/recipes/bel-alex73/train_hifigan.py @@ -1,6 +1,3 @@ -import os - -from coqpit import Coqpit from trainer import Trainer, TrainerArgs from TTS.tts.configs.shared_configs import BaseAudioConfig diff --git a/recipes/multilingual/cml_yourtts/train_yourtts.py b/recipes/multilingual/cml_yourtts/train_yourtts.py index 25a2fd0a4b..02f901fe73 100644 --- a/recipes/multilingual/cml_yourtts/train_yourtts.py +++ b/recipes/multilingual/cml_yourtts/train_yourtts.py @@ -4,7 +4,6 @@ from trainer import Trainer, TrainerArgs from TTS.bin.compute_embeddings import compute_embeddings -from TTS.bin.resample import resample_files from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.datasets import load_tts_samples diff --git a/setup.py b/setup.py index df14b41adc..b01b655877 100644 --- a/setup.py +++ b/setup.py @@ -23,12 +23,12 @@ import os import subprocess import sys -from packaging.version import Version import numpy import setuptools.command.build_py import setuptools.command.develop from Cython.Build import cythonize +from packaging.version import Version from setuptools import Extension, find_packages, setup python_version = sys.version.split()[0] diff --git a/tests/data_tests/test_loader.py b/tests/data_tests/test_loader.py index adb272e19f..172ee7cef3 100644 --- a/tests/data_tests/test_loader.py +++ b/tests/data_tests/test_loader.py @@ -6,10 +6,10 @@ import torch from torch.utils.data import DataLoader -from TTS.tts.datasets.dataset import TTSDataset from tests import get_tests_data_path, get_tests_output_path from TTS.tts.configs.shared_configs import BaseDatasetConfig, BaseTTSConfig from TTS.tts.datasets import load_tts_samples +from TTS.tts.datasets.dataset import TTSDataset from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.utils.audio import AudioProcessor diff --git a/tests/vc_tests/test_freevc.py b/tests/vc_tests/test_freevc.py index a4a4f72679..3755ab3f06 100644 --- a/tests/vc_tests/test_freevc.py +++ b/tests/vc_tests/test_freevc.py @@ -4,8 +4,7 @@ import torch from tests import get_tests_input_path -from TTS.vc.configs.freevc_config import FreeVCConfig -from TTS.vc.models.freevc import FreeVC +from TTS.vc.models.freevc import FreeVC, FreeVCConfig # pylint: disable=unused-variable # pylint: disable=no-self-use From 449820ec7d47877701125b61ff40951f181dc8bc Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 00:44:08 +0300 Subject: [PATCH 008/255] Ruff autofix E71* --- TTS/api.py | 2 +- TTS/encoder/utils/generic_utils.py | 2 +- TTS/tts/models/align_tts.py | 2 +- TTS/tts/models/vits.py | 6 +++--- TTS/utils/manage.py | 2 +- TTS/vc/models/freevc.py | 6 +++--- TTS/vc/modules/freevc/wavlm/wavlm.py | 2 +- TTS/vocoder/layers/losses.py | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index fa6165b9d2..a4a47f61c5 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -231,7 +231,7 @@ def _check_arguments( raise ValueError("Model is not multi-speaker but `speaker` is provided.") if not self.is_multi_lingual and language is not None: raise ValueError("Model is not multi-lingual but `language` is provided.") - if not emotion is None and not speed is None: + if emotion is not None and speed is not None: raise ValueError("Emotion and speed can only be used with Coqui Studio models. Which is discontinued.") def tts( diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index 236d6fe937..88ed71d3f4 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -34,7 +34,7 @@ def __init__(self, ap, augmentation_config): # ignore not listed directories if noise_dir not in self.additive_noise_types: continue - if not noise_dir in self.noise_list: + if noise_dir not in self.noise_list: self.noise_list[noise_dir] = [] self.noise_list[noise_dir].append(wav_file) diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index b2e51de7d6..18b9cde385 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -415,7 +415,7 @@ def _set_phase(config, global_step): """Decide AlignTTS training phase""" if isinstance(config.phase_start_steps, list): vals = [i < global_step for i in config.phase_start_steps] - if not True in vals: + if True not in vals: phase = 0 else: phase = ( diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index d9b1f59618..fc1896ee07 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1880,7 +1880,7 @@ def onnx_inference(text, text_lengths, scales, sid=None, langid=None): self.forward = _forward if training: self.train() - if not disc is None: + if disc is not None: self.disc = disc def load_onnx(self, model_path: str, cuda=False): @@ -1914,9 +1914,9 @@ def inference_onnx(self, x, x_lengths=None, speaker_id=None, language_id=None): dtype=np.float32, ) input_params = {"input": x, "input_lengths": x_lengths, "scales": scales} - if not speaker_id is None: + if speaker_id is not None: input_params["sid"] = torch.tensor([speaker_id]).cpu().numpy() - if not language_id is None: + if language_id is not None: input_params["langid"] = torch.tensor([language_id]).cpu().numpy() audio = self.onnx_sess.run( diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 3a527f4609..d724cc87ec 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -516,7 +516,7 @@ def _update_path(field_name, new_path, config_path): sub_conf[field_names[-1]] = new_path else: # field name points to a top-level field - if not field_name in config: + if field_name not in config: return if isinstance(config[field_name], list): config[field_name] = [new_path] diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 8bb9989224..a5a340f2aa 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -164,7 +164,7 @@ def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): super(DiscriminatorP, self).__init__() self.period = period self.use_spectral_norm = use_spectral_norm - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), @@ -201,7 +201,7 @@ def forward(self, x): class DiscriminatorS(torch.nn.Module): def __init__(self, use_spectral_norm=False): super(DiscriminatorS, self).__init__() - norm_f = weight_norm if use_spectral_norm == False else spectral_norm + norm_f = weight_norm if use_spectral_norm is False else spectral_norm self.convs = nn.ModuleList( [ norm_f(Conv1d(1, 16, 15, 1, padding=7)), @@ -468,7 +468,7 @@ def inference(self, c, g=None, mel=None, c_lengths=None): Returns: torch.Tensor: Output tensor. """ - if c_lengths == None: + if c_lengths is None: c_lengths = (torch.ones(c.size(0)) * c.size(-1)).to(c.device) if not self.use_spk: g = self.enc_spk.embed_utterance(mel) diff --git a/TTS/vc/modules/freevc/wavlm/wavlm.py b/TTS/vc/modules/freevc/wavlm/wavlm.py index fc93bd4f50..d2f28d19c2 100644 --- a/TTS/vc/modules/freevc/wavlm/wavlm.py +++ b/TTS/vc/modules/freevc/wavlm/wavlm.py @@ -387,7 +387,7 @@ def make_conv(): nn.init.kaiming_normal_(conv.weight) return conv - assert (is_layer_norm and is_group_norm) == False, "layer norm and group norm are exclusive" + assert (is_layer_norm and is_group_norm) is False, "layer norm and group norm are exclusive" if is_layer_norm: return nn.Sequential( diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index 74cfc7262b..1f977755cc 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -298,7 +298,7 @@ def forward( adv_loss = adv_loss + self.hinge_gan_loss_weight * hinge_fake_loss # Feature Matching Loss - if self.use_feat_match_loss and not feats_fake is None: + if self.use_feat_match_loss and feats_fake is not None: feat_match_loss = self.feat_match_loss(feats_fake, feats_real) return_dict["G_feat_match_loss"] = feat_match_loss adv_loss = adv_loss + self.feat_match_loss_weight * feat_match_loss From 64bb41f4fa90a8adef7a08b3f5d6806a633066f6 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 00:45:13 +0300 Subject: [PATCH 009/255] Ruff autofix C41 --- TTS/bin/find_unique_phonemes.py | 2 +- TTS/tts/layers/overflow/plotting_utils.py | 2 +- TTS/tts/layers/tortoise/clvp.py | 2 +- TTS/tts/layers/tortoise/diffusion.py | 2 +- TTS/tts/layers/tortoise/transformer.py | 4 ++-- TTS/tts/layers/tortoise/xtransformers.py | 24 +++++++++++------------ TTS/tts/layers/xtts/dvae.py | 6 +++--- TTS/tts/models/vits.py | 5 ++--- TTS/tts/utils/languages.py | 2 +- TTS/tts/utils/managers.py | 2 +- TTS/tts/utils/text/characters.py | 8 ++------ TTS/tts/utils/text/japanese/phonemizer.py | 4 ++-- tests/tts_tests/test_tacotron2_model.py | 2 +- tests/tts_tests/test_tacotron_model.py | 2 +- 14 files changed, 30 insertions(+), 37 deletions(-) diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py index 4bd7a78eef..2df0700676 100644 --- a/TTS/bin/find_unique_phonemes.py +++ b/TTS/bin/find_unique_phonemes.py @@ -13,7 +13,7 @@ def compute_phonemes(item): text = item["text"] ph = phonemizer.phonemize(text).replace("|", "") - return set(list(ph)) + return set(ph) def main(): diff --git a/TTS/tts/layers/overflow/plotting_utils.py b/TTS/tts/layers/overflow/plotting_utils.py index a63aeb370a..d9d3e3d141 100644 --- a/TTS/tts/layers/overflow/plotting_utils.py +++ b/TTS/tts/layers/overflow/plotting_utils.py @@ -71,7 +71,7 @@ def plot_transition_probabilities_to_numpy(states, transition_probabilities, out ax.set_title("Transition probability of state") ax.set_xlabel("hidden state") ax.set_ylabel("probability") - ax.set_xticks([i for i in range(len(transition_probabilities))]) # pylint: disable=unnecessary-comprehension + ax.set_xticks(list(range(len(transition_probabilities)))) ax.set_xticklabels([int(x) for x in states], rotation=90) plt.tight_layout() if not output_fig: diff --git a/TTS/tts/layers/tortoise/clvp.py b/TTS/tts/layers/tortoise/clvp.py index 69b8c17c3f..241dfdd4f4 100644 --- a/TTS/tts/layers/tortoise/clvp.py +++ b/TTS/tts/layers/tortoise/clvp.py @@ -126,7 +126,7 @@ def forward(self, text, speech_tokens, return_loss=False): text_latents = self.to_text_latent(text_latents) speech_latents = self.to_speech_latent(speech_latents) - text_latents, speech_latents = map(lambda t: F.normalize(t, p=2, dim=-1), (text_latents, speech_latents)) + text_latents, speech_latents = (F.normalize(t, p=2, dim=-1) for t in (text_latents, speech_latents)) temp = self.temperature.exp() diff --git a/TTS/tts/layers/tortoise/diffusion.py b/TTS/tts/layers/tortoise/diffusion.py index 7bea02ca08..2b29091b44 100644 --- a/TTS/tts/layers/tortoise/diffusion.py +++ b/TTS/tts/layers/tortoise/diffusion.py @@ -972,7 +972,7 @@ def autoregressive_training_losses( assert False # not currently supported for this type of diffusion. elif self.loss_type == LossType.MSE or self.loss_type == LossType.RESCALED_MSE: model_outputs = model(x_t, x_start, self._scale_timesteps(t), **model_kwargs) - terms.update({k: o for k, o in zip(model_output_keys, model_outputs)}) + terms.update(dict(zip(model_output_keys, model_outputs))) model_output = terms[gd_out_key] if self.model_var_type in [ ModelVarType.LEARNED, diff --git a/TTS/tts/layers/tortoise/transformer.py b/TTS/tts/layers/tortoise/transformer.py index 70d46aa3e0..6cb1bab96a 100644 --- a/TTS/tts/layers/tortoise/transformer.py +++ b/TTS/tts/layers/tortoise/transformer.py @@ -37,7 +37,7 @@ def route_args(router, args, depth): for key in matched_keys: val = args[key] for depth, ((f_args, g_args), routes) in enumerate(zip(routed_args, router[key])): - new_f_args, new_g_args = map(lambda route: ({key: val} if route else {}), routes) + new_f_args, new_g_args = (({key: val} if route else {}) for route in routes) routed_args[depth] = ({**f_args, **new_f_args}, {**g_args, **new_g_args}) return routed_args @@ -152,7 +152,7 @@ def forward(self, x, mask=None): softmax = torch.softmax qkv = self.to_qkv(x).chunk(3, dim=-1) - q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), qkv) + q, k, v = (rearrange(t, "b n (h d) -> b h n d", h=h) for t in qkv) q = q * self.scale diff --git a/TTS/tts/layers/tortoise/xtransformers.py b/TTS/tts/layers/tortoise/xtransformers.py index 1eb3f77269..9325b8c720 100644 --- a/TTS/tts/layers/tortoise/xtransformers.py +++ b/TTS/tts/layers/tortoise/xtransformers.py @@ -84,7 +84,7 @@ def init_zero_(layer): def pick_and_pop(keys, d): - values = list(map(lambda key: d.pop(key), keys)) + values = [d.pop(key) for key in keys] return dict(zip(keys, values)) @@ -107,7 +107,7 @@ def group_by_key_prefix(prefix, d): def groupby_prefix_and_trim(prefix, d): kwargs_with_prefix, kwargs = group_dict_by_key(partial(string_begins_with, prefix), d) - kwargs_without_prefix = dict(map(lambda x: (x[0][len(prefix) :], x[1]), tuple(kwargs_with_prefix.items()))) + kwargs_without_prefix = {x[0][len(prefix) :]: x[1] for x in tuple(kwargs_with_prefix.items())} return kwargs_without_prefix, kwargs @@ -428,7 +428,7 @@ def forward(self, x, **kwargs): feats_per_shift = x.shape[-1] // segments splitted = x.split(feats_per_shift, dim=-1) segments_to_shift, rest = splitted[:segments], splitted[segments:] - segments_to_shift = list(map(lambda args: shift(*args, mask=mask), zip(segments_to_shift, shifts))) + segments_to_shift = [shift(*args, mask=mask) for args in zip(segments_to_shift, shifts)] x = torch.cat((*segments_to_shift, *rest), dim=-1) return self.fn(x, **kwargs) @@ -635,7 +635,7 @@ def forward( v = self.to_v(v_input) if not collab_heads: - q, k, v = map(lambda t: rearrange(t, "b n (h d) -> b h n d", h=h), (q, k, v)) + q, k, v = (rearrange(t, "b n (h d) -> b h n d", h=h) for t in (q, k, v)) else: q = einsum("b i d, h d -> b h i d", q, self.collab_mixing) k = rearrange(k, "b n d -> b () n d") @@ -650,9 +650,9 @@ def forward( if exists(rotary_pos_emb) and not has_context: l = rotary_pos_emb.shape[-1] - (ql, qr), (kl, kr), (vl, vr) = map(lambda t: (t[..., :l], t[..., l:]), (q, k, v)) - ql, kl, vl = map(lambda t: apply_rotary_pos_emb(t, rotary_pos_emb), (ql, kl, vl)) - q, k, v = map(lambda t: torch.cat(t, dim=-1), ((ql, qr), (kl, kr), (vl, vr))) + (ql, qr), (kl, kr), (vl, vr) = ((t[..., :l], t[..., l:]) for t in (q, k, v)) + ql, kl, vl = (apply_rotary_pos_emb(t, rotary_pos_emb) for t in (ql, kl, vl)) + q, k, v = (torch.cat(t, dim=-1) for t in ((ql, qr), (kl, kr), (vl, vr))) input_mask = None if any(map(exists, (mask, context_mask))): @@ -664,7 +664,7 @@ def forward( input_mask = q_mask * k_mask if self.num_mem_kv > 0: - mem_k, mem_v = map(lambda t: repeat(t, "h n d -> b h n d", b=b), (self.mem_k, self.mem_v)) + mem_k, mem_v = (repeat(t, "h n d -> b h n d", b=b) for t in (self.mem_k, self.mem_v)) k = torch.cat((mem_k, k), dim=-2) v = torch.cat((mem_v, v), dim=-2) if exists(input_mask): @@ -964,9 +964,7 @@ def forward( seq_len = x.shape[1] if past_key_values is not None: seq_len += past_key_values[0][0].shape[-2] - max_rotary_emb_length = max( - list(map(lambda m: (m.shape[1] if exists(m) else 0) + seq_len, mems)) + [expected_seq_len] - ) + max_rotary_emb_length = max([(m.shape[1] if exists(m) else 0) + seq_len for m in mems] + [expected_seq_len]) rotary_pos_emb = self.rotary_pos_emb(max_rotary_emb_length, x.device) present_key_values = [] @@ -1200,7 +1198,7 @@ def forward( res = [out] if return_attn: - attn_maps = list(map(lambda t: t.post_softmax_attn, intermediates.attn_intermediates)) + attn_maps = [t.post_softmax_attn for t in intermediates.attn_intermediates] res.append(attn_maps) if use_cache: res.append(intermediates.past_key_values) @@ -1249,7 +1247,7 @@ def forward(self, x, return_embeddings=False, mask=None, return_attn=False, mems res = [out] if return_attn: - attn_maps = list(map(lambda t: t.post_softmax_attn, intermediates.attn_intermediates)) + attn_maps = [t.post_softmax_attn for t in intermediates.attn_intermediates] res.append(attn_maps) if use_cache: res.append(intermediates.past_key_values) diff --git a/TTS/tts/layers/xtts/dvae.py b/TTS/tts/layers/xtts/dvae.py index bdd7a9d09f..8598f0b47a 100644 --- a/TTS/tts/layers/xtts/dvae.py +++ b/TTS/tts/layers/xtts/dvae.py @@ -260,7 +260,7 @@ def __init__( dec_init_chan = codebook_dim if not has_resblocks else dec_chans[0] dec_chans = [dec_init_chan, *dec_chans] - enc_chans_io, dec_chans_io = map(lambda t: list(zip(t[:-1], t[1:])), (enc_chans, dec_chans)) + enc_chans_io, dec_chans_io = (list(zip(t[:-1], t[1:])) for t in (enc_chans, dec_chans)) pad = (kernel_size - 1) // 2 for (enc_in, enc_out), (dec_in, dec_out) in zip(enc_chans_io, dec_chans_io): @@ -306,9 +306,9 @@ def norm(self, images): if not self.normalization is not None: return images - means, stds = map(lambda t: torch.as_tensor(t).to(images), self.normalization) + means, stds = (torch.as_tensor(t).to(images) for t in self.normalization) arrange = "c -> () c () ()" if self.positional_dims == 2 else "c -> () c ()" - means, stds = map(lambda t: rearrange(t, arrange), (means, stds)) + means, stds = (rearrange(t, arrange) for t in (means, stds)) images = images.clone() images.sub_(means).div_(stds) return images diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index fc1896ee07..2c60ece789 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1948,8 +1948,7 @@ def __init__( def _create_vocab(self): self._vocab = [self._pad] + list(self._punctuations) + list(self._characters) + [self._blank] self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)} - # pylint: disable=unnecessary-comprehension - self._id_to_char = {idx: char for idx, char in enumerate(self.vocab)} + self._id_to_char = dict(enumerate(self.vocab)) @staticmethod def init_from_config(config: Coqpit): @@ -1996,4 +1995,4 @@ def vocab(self, vocab_file): self.blank = self._vocab[0] self.pad = " " self._char_to_id = {s: i for i, s in enumerate(self._vocab)} # pylint: disable=unnecessary-comprehension - self._id_to_char = {i: s for i, s in enumerate(self._vocab)} # pylint: disable=unnecessary-comprehension + self._id_to_char = dict(enumerate(self._vocab)) diff --git a/TTS/tts/utils/languages.py b/TTS/tts/utils/languages.py index 1e1836b32c..89e5e1911e 100644 --- a/TTS/tts/utils/languages.py +++ b/TTS/tts/utils/languages.py @@ -59,7 +59,7 @@ def parse_language_ids_from_config(c: Coqpit) -> Dict: languages.add(dataset["language"]) else: raise ValueError(f"Dataset {dataset['name']} has no language specified.") - return {name: i for i, name in enumerate(sorted(list(languages)))} + return {name: i for i, name in enumerate(sorted(languages))} def set_language_ids_from_config(self, c: Coqpit) -> None: """Set language IDs from config samples. diff --git a/TTS/tts/utils/managers.py b/TTS/tts/utils/managers.py index 1f94c5332d..23aa52a8a2 100644 --- a/TTS/tts/utils/managers.py +++ b/TTS/tts/utils/managers.py @@ -193,7 +193,7 @@ def read_embeddings_from_file(file_path: str): embeddings = load_file(file_path) speakers = sorted({x["name"] for x in embeddings.values()}) name_to_id = {name: i for i, name in enumerate(speakers)} - clip_ids = list(set(sorted(clip_name for clip_name in embeddings.keys()))) + clip_ids = list(set(clip_name for clip_name in embeddings.keys())) # cache embeddings_by_names for fast inference using a bigger speakers.json embeddings_by_names = {} for x in embeddings.values(): diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py index 8fa45ed84b..37c7a7ca23 100644 --- a/TTS/tts/utils/text/characters.py +++ b/TTS/tts/utils/text/characters.py @@ -87,9 +87,7 @@ def vocab(self, vocab): if vocab is not None: self._vocab = vocab self._char_to_id = {char: idx for idx, char in enumerate(self._vocab)} - self._id_to_char = { - idx: char for idx, char in enumerate(self._vocab) # pylint: disable=unnecessary-comprehension - } + self._id_to_char = dict(enumerate(self._vocab)) @staticmethod def init_from_config(config, **kwargs): @@ -269,9 +267,7 @@ def vocab(self): def vocab(self, vocab): self._vocab = vocab self._char_to_id = {char: idx for idx, char in enumerate(self.vocab)} - self._id_to_char = { - idx: char for idx, char in enumerate(self.vocab) # pylint: disable=unnecessary-comprehension - } + self._id_to_char = dict(enumerate(self.vocab)) @property def num_chars(self): diff --git a/TTS/tts/utils/text/japanese/phonemizer.py b/TTS/tts/utils/text/japanese/phonemizer.py index c3111067e1..30072ae501 100644 --- a/TTS/tts/utils/text/japanese/phonemizer.py +++ b/TTS/tts/utils/text/japanese/phonemizer.py @@ -350,8 +350,8 @@ def hira2kata(text: str) -> str: return text.replace("う゛", "ヴ") -_SYMBOL_TOKENS = set(list("・、。?!")) -_NO_YOMI_TOKENS = set(list("「」『』―()[][] …")) +_SYMBOL_TOKENS = set("・、。?!") +_NO_YOMI_TOKENS = set("「」『』―()[][] …") _TAGGER = MeCab.Tagger() diff --git a/tests/tts_tests/test_tacotron2_model.py b/tests/tts_tests/test_tacotron2_model.py index b1bdeb9fd1..72b6bcd46b 100644 --- a/tests/tts_tests/test_tacotron2_model.py +++ b/tests/tts_tests/test_tacotron2_model.py @@ -278,7 +278,7 @@ def test_train_step(): }, ) - batch = dict({}) + batch = {} batch["text_input"] = torch.randint(0, 24, (8, 128)).long().to(device) batch["text_lengths"] = torch.randint(100, 129, (8,)).long().to(device) batch["text_lengths"] = torch.sort(batch["text_lengths"], descending=True)[0] diff --git a/tests/tts_tests/test_tacotron_model.py b/tests/tts_tests/test_tacotron_model.py index 906ec3d09f..2ca068f6fe 100644 --- a/tests/tts_tests/test_tacotron_model.py +++ b/tests/tts_tests/test_tacotron_model.py @@ -266,7 +266,7 @@ def test_train_step(): }, ) - batch = dict({}) + batch = {} batch["text_input"] = torch.randint(0, 24, (8, 128)).long().to(device) batch["text_lengths"] = torch.randint(100, 129, (8,)).long().to(device) batch["text_lengths"] = torch.sort(batch["text_lengths"], descending=True)[0] From bc2cf296a37e80a7077b0eca60000f9f5e0b3def Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 00:51:17 +0300 Subject: [PATCH 010/255] Ruff autofix PLW3301 --- TTS/tts/layers/delightful_tts/acoustic_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/layers/delightful_tts/acoustic_model.py b/TTS/tts/layers/delightful_tts/acoustic_model.py index c906b882e5..74ec204281 100644 --- a/TTS/tts/layers/delightful_tts/acoustic_model.py +++ b/TTS/tts/layers/delightful_tts/acoustic_model.py @@ -362,7 +362,7 @@ def forward( pos_encoding = positional_encoding( self.emb_dim, - max(token_embeddings.shape[1], max(mel_lens)), + max(token_embeddings.shape[1], *mel_lens), device=token_embeddings.device, ) encoder_outputs = self.encoder( From 00f8f4892a06eb2c3368aeb008f3080d2335a4b0 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 23 Oct 2023 12:11:08 +0300 Subject: [PATCH 011/255] Ruff autofix unnecessary passes --- TTS/tts/layers/xtts/zh_num2words.py | 2 -- TTS/tts/utils/text/phonemizers/__init__.py | 1 - 2 files changed, 3 deletions(-) diff --git a/TTS/tts/layers/xtts/zh_num2words.py b/TTS/tts/layers/xtts/zh_num2words.py index 42fd364a56..7d8f658160 100644 --- a/TTS/tts/layers/xtts/zh_num2words.py +++ b/TTS/tts/layers/xtts/zh_num2words.py @@ -489,8 +489,6 @@ class NumberSystem(object): 中文数字系统 """ - pass - class MathSymbol(object): """ diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index f9a0340c55..744ccb3e70 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -10,7 +10,6 @@ from TTS.tts.utils.text.phonemizers.ja_jp_phonemizer import JA_JP_Phonemizer except ImportError: JA_JP_Phonemizer = None - pass PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer, BN_Phonemizer)} From 33b69c6c0930e10deebc7b4bbac09ba8cffc8b8c Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 01:15:39 +0300 Subject: [PATCH 012/255] Add some noqa directives (for now) --- TTS/bin/compute_attention_masks.py | 2 +- TTS/tts/layers/xtts/stream_generator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index 9ab520be7d..faadf6901d 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -70,7 +70,7 @@ # if the vocabulary was passed, replace the default if "characters" in C.keys(): - symbols, phonemes = make_symbols(**C.characters) + symbols, phonemes = make_symbols(**C.characters) # noqa: F811 # load the model num_chars = len(phonemes) if C.use_phonemes else len(symbols) diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index e12f8995cf..7921102cb2 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -43,7 +43,7 @@ def __init__(self, **kwargs): class NewGenerationMixin(GenerationMixin): @torch.no_grad() - def generate( + def generate( # noqa: PLR0911 self, inputs: Optional[torch.Tensor] = None, generation_config: Optional[StreamGenerationConfig] = None, From 027a7973057a7f7085993b81f1bf977c7d69daba Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 27 Sep 2023 01:16:08 +0300 Subject: [PATCH 013/255] CI: re-enable `make lint` --- .github/workflows/style_check.yml | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index b7c6393baa..f6018a3f19 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -29,18 +29,7 @@ jobs: architecture: x64 cache: 'pip' cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y git make gcc - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel - - name: Install TTS - run: | - python3 -m pip install .[all] - python3 setup.py egg_info - - name: Style check - run: make style + - name: Install/upgrade dev dependencies + run: python3 -m pip install -r requirements.dev.txt + - name: Lint check + run: make lint From 08fa5d40980c706d4f92227c62db1c435f8d616f Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 31 Oct 2023 15:42:48 +0200 Subject: [PATCH 014/255] Fix implicitly concatenated docstring --- TTS/vocoder/utils/generic_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py index 63a0af4445..113240fd75 100644 --- a/TTS/vocoder/utils/generic_utils.py +++ b/TTS/vocoder/utils/generic_utils.py @@ -40,7 +40,7 @@ def plot_results(y_hat: torch.tensor, y: torch.tensor, ap: AudioProcessor, name_ Returns: Dict: output figures keyed by the name of the figures. - """ """Plot vocoder model results""" + """ if name_prefix is None: name_prefix = "" From 4584ef65806f3912df5e00de0a4160b4dad0c9a3 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 31 Oct 2023 17:02:03 +0200 Subject: [PATCH 015/255] Simplify branch in TTS/bin/synthesize.py --- TTS/bin/synthesize.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index b86252ab67..bed899530e 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -379,10 +379,8 @@ def main(): if model_item["model_type"] == "tts_models": tts_path = model_path tts_config_path = config_path - if "default_vocoder" in model_item: - args.vocoder_name = ( - model_item["default_vocoder"] if args.vocoder_name is None else args.vocoder_name - ) + if args.vocoder_name is None and "default_vocoder" in model_item: + args.vocoder_name = model_item["default_vocoder"] # voice conversion model if model_item["model_type"] == "voice_conversion_models": From aa549e90283405d5384c7b0ec29ff818101c9260 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Wed, 8 Nov 2023 13:14:09 +0200 Subject: [PATCH 016/255] Fix trailing whitespace --- TTS/demos/xtts_ft_demo/utils/formatter.py | 2 +- TTS/demos/xtts_ft_demo/xtts_demo.py | 4 ++-- TTS/tts/models/xtts.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/TTS/demos/xtts_ft_demo/utils/formatter.py b/TTS/demos/xtts_ft_demo/utils/formatter.py index 937ee4dd53..6d7b770ff5 100644 --- a/TTS/demos/xtts_ft_demo/utils/formatter.py +++ b/TTS/demos/xtts_ft_demo/utils/formatter.py @@ -44,7 +44,7 @@ def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0 os.makedirs(out_path, exist_ok=True) # Loading Whisper - device = "cuda" if torch.cuda.is_available() else "cpu" + device = "cuda" if torch.cuda.is_available() else "cpu" print("Loading Whisper Model!") asr_model = WhisperModel("large-v2", device=device, compute_type="float16") diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py index a7fbc0e821..b8ffb231dd 100644 --- a/TTS/demos/xtts_ft_demo/xtts_demo.py +++ b/TTS/demos/xtts_ft_demo/xtts_demo.py @@ -64,7 +64,7 @@ def run_tts(lang, tts_text, speaker_audio_file): -# define a logger to redirect +# define a logger to redirect class Logger: def __init__(self, filename="log.out"): self.log_file = filename @@ -109,7 +109,7 @@ def read_logs(): description="""XTTS fine-tuning demo\n\n""" """ Example runs: - python3 TTS/demos/xtts_ft_demo/xtts_demo.py --port + python3 TTS/demos/xtts_ft_demo/xtts_demo.py --port """, formatter_class=argparse.RawTextHelpFormatter, ) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 83812f377f..3065d011ef 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -274,7 +274,7 @@ def get_gpt_cond_latents(self, audio, sr, length: int = 30, chunk_length: int = for i in range(0, audio.shape[1], 22050 * chunk_length): audio_chunk = audio[:, i : i + 22050 * chunk_length] - # if the chunk is too short ignore it + # if the chunk is too short ignore it if audio_chunk.size(-1) < 22050 * 0.33: continue From 32abb1a7c47ce369a914d56d1711828ce8835b5e Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 6 Nov 2023 20:27:24 +0200 Subject: [PATCH 017/255] xtts/perceiver_encoder: Delete duplicate exists() --- TTS/tts/layers/xtts/perceiver_encoder.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/TTS/tts/layers/xtts/perceiver_encoder.py b/TTS/tts/layers/xtts/perceiver_encoder.py index 7b7ee79b50..d1aa16c456 100644 --- a/TTS/tts/layers/xtts/perceiver_encoder.py +++ b/TTS/tts/layers/xtts/perceiver_encoder.py @@ -155,10 +155,6 @@ def Sequential(*mods): return nn.Sequential(*filter(exists, mods)) -def exists(x): - return x is not None - - def default(val, d): if exists(val): return val From bd172dabbf214e6f11ab091e8346b6e602dff918 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 23 Oct 2023 12:15:56 +0300 Subject: [PATCH 018/255] xtts/stream_generator: remove duplicate import + code --- TTS/tts/layers/xtts/stream_generator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index 7921102cb2..b7e07589c5 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -885,10 +885,10 @@ def init_stream_support(): if __name__ == "__main__": - from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel + from transformers import AutoModelForCausalLM, AutoTokenizer + + init_stream_support() - PreTrainedModel.generate = NewGenerationMixin.generate - PreTrainedModel.sample_stream = NewGenerationMixin.sample_stream model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m", torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m") From d6ea806469dd205ffb67882dd594344c61c66396 Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Mon, 4 Dec 2023 10:38:07 +0200 Subject: [PATCH 019/255] Run `make style` --- TTS/api.py | 4 +- TTS/bin/synthesize.py | 2 +- TTS/config/__init__.py | 5 +- TTS/demos/xtts_ft_demo/utils/formatter.py | 39 +++++--- TTS/demos/xtts_ft_demo/utils/gpt_train.py | 9 +- TTS/demos/xtts_ft_demo/xtts_demo.py | 111 ++++++++++++--------- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 2 +- TTS/tts/layers/xtts/xtts_manager.py | 15 +-- TTS/tts/models/forward_tts.py | 8 +- TTS/tts/models/xtts.py | 16 +-- TTS/utils/synthesizer.py | 2 +- 11 files changed, 121 insertions(+), 92 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index a4a47f61c5..2ef6f3a085 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -168,9 +168,7 @@ def load_tts_model_by_name(self, model_name: str, gpu: bool = False): self.synthesizer = None self.model_name = model_name - model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name( - model_name - ) + model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name(model_name) # init synthesizer # None values are fetch from the model diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index bed899530e..b06c93f7d1 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -224,7 +224,7 @@ def main(): const=True, default=False, ) - + # args for multi-speaker synthesis parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None) parser.add_argument("--language_ids_file_path", type=str, help="JSON file for multi-lingual model.", default=None) diff --git a/TTS/config/__init__.py b/TTS/config/__init__.py index c5a6dd68e2..5103f200b0 100644 --- a/TTS/config/__init__.py +++ b/TTS/config/__init__.py @@ -17,9 +17,12 @@ def read_json_with_comments(json_path): with fsspec.open(json_path, "r", encoding="utf-8") as f: input_str = f.read() # handle comments but not urls with // - input_str = re.sub(r"(\"(?:[^\"\\]|\\.)*\")|(/\*(?:.|[\\n\\r])*?\*/)|(//.*)", lambda m: m.group(1) or m.group(2) or "", input_str) + input_str = re.sub( + r"(\"(?:[^\"\\]|\\.)*\")|(/\*(?:.|[\\n\\r])*?\*/)|(//.*)", lambda m: m.group(1) or m.group(2) or "", input_str + ) return json.loads(input_str) + def register_config(model_name: str) -> Coqpit: """Find the right config for the given model name. diff --git a/TTS/demos/xtts_ft_demo/utils/formatter.py b/TTS/demos/xtts_ft_demo/utils/formatter.py index 6d7b770ff5..40e8b8ed32 100644 --- a/TTS/demos/xtts_ft_demo/utils/formatter.py +++ b/TTS/demos/xtts_ft_demo/utils/formatter.py @@ -19,9 +19,10 @@ def list_audios(basePath, contains=None): # return the set of files that are valid return list_files(basePath, validExts=audio_types, contains=contains) + def list_files(basePath, validExts=None, contains=None): # loop over the directory structure - for (rootDir, dirNames, filenames) in os.walk(basePath): + for rootDir, dirNames, filenames in os.walk(basePath): # loop over the filenames in the current directory for filename in filenames: # if the contains string is not none and the filename does not contain @@ -30,7 +31,7 @@ def list_files(basePath, validExts=None, contains=None): continue # determine the file extension of the current file - ext = filename[filename.rfind("."):].lower() + ext = filename[filename.rfind(".") :].lower() # check to see if the file is an audio and should be processed if validExts is None or ext.endswith(validExts): @@ -38,7 +39,16 @@ def list_files(basePath, validExts=None, contains=None): audioPath = os.path.join(rootDir, filename) yield audioPath -def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0.2, eval_percentage=0.15, speaker_name="coqui", gradio_progress=None): + +def format_audio_list( + audio_files, + target_language="en", + out_path=None, + buffer=0.2, + eval_percentage=0.15, + speaker_name="coqui", + gradio_progress=None, +): audio_total_size = 0 # make sure that ooutput file exists os.makedirs(out_path, exist_ok=True) @@ -63,7 +73,7 @@ def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0 wav = torch.mean(wav, dim=0, keepdim=True) wav = wav.squeeze() - audio_total_size += (wav.size(-1) / sr) + audio_total_size += wav.size(-1) / sr segments, _ = asr_model.transcribe(audio_path, word_timestamps=True, language=target_language) segments = list(segments) @@ -88,7 +98,7 @@ def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0 # get previous sentence end previous_word_end = words_list[word_idx - 1].end # add buffer or get the silence midle between the previous sentence and the current one - sentence_start = max(sentence_start - buffer, (previous_word_end + sentence_start)/2) + sentence_start = max(sentence_start - buffer, (previous_word_end + sentence_start) / 2) sentence = word.word first_word = False @@ -112,19 +122,16 @@ def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0 # Average the current word end and next word start word_end = min((word.end + next_word_start) / 2, word.end + buffer) - + absoulte_path = os.path.join(out_path, audio_file) os.makedirs(os.path.dirname(absoulte_path), exist_ok=True) i += 1 first_word = True - audio = wav[int(sr*sentence_start):int(sr*word_end)].unsqueeze(0) + audio = wav[int(sr * sentence_start) : int(sr * word_end)].unsqueeze(0) # if the audio is too short ignore it (i.e < 0.33 seconds) - if audio.size(-1) >= sr/3: - torchaudio.save(absoulte_path, - audio, - sr - ) + if audio.size(-1) >= sr / 3: + torchaudio.save(absoulte_path, audio, sr) else: continue @@ -134,21 +141,21 @@ def format_audio_list(audio_files, target_language="en", out_path=None, buffer=0 df = pandas.DataFrame(metadata) df = df.sample(frac=1) - num_val_samples = int(len(df)*eval_percentage) + num_val_samples = int(len(df) * eval_percentage) df_eval = df[:num_val_samples] df_train = df[num_val_samples:] - df_train = df_train.sort_values('audio_file') + df_train = df_train.sort_values("audio_file") train_metadata_path = os.path.join(out_path, "metadata_train.csv") df_train.to_csv(train_metadata_path, sep="|", index=False) eval_metadata_path = os.path.join(out_path, "metadata_eval.csv") - df_eval = df_eval.sort_values('audio_file') + df_eval = df_eval.sort_values("audio_file") df_eval.to_csv(eval_metadata_path, sep="|", index=False) # deallocate VRAM and RAM del asr_model, df_train, df_eval, df, metadata gc.collect() - return train_metadata_path, eval_metadata_path, audio_total_size \ No newline at end of file + return train_metadata_path, eval_metadata_path, audio_total_size diff --git a/TTS/demos/xtts_ft_demo/utils/gpt_train.py b/TTS/demos/xtts_ft_demo/utils/gpt_train.py index 80be4fab40..7b41966b8f 100644 --- a/TTS/demos/xtts_ft_demo/utils/gpt_train.py +++ b/TTS/demos/xtts_ft_demo/utils/gpt_train.py @@ -25,7 +25,6 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, BATCH_SIZE = batch_size # set here the batch size GRAD_ACUMM_STEPS = grad_acumm # set here the grad accumulation steps - # Define here the dataset that you want to use for the fine-tuning on. config_dataset = BaseDatasetConfig( formatter="coqui", @@ -43,7 +42,6 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, CHECKPOINTS_OUT_PATH = os.path.join(OUT_PATH, "XTTS_v2.0_original_model_files/") os.makedirs(CHECKPOINTS_OUT_PATH, exist_ok=True) - # DVAE files DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/dvae.pth" MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/mel_stats.pth" @@ -55,8 +53,9 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, # download DVAE files if needed if not os.path.isfile(DVAE_CHECKPOINT) or not os.path.isfile(MEL_NORM_FILE): print(" > Downloading DVAE files!") - ModelManager._download_model_files([MEL_NORM_LINK, DVAE_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True) - + ModelManager._download_model_files( + [MEL_NORM_LINK, DVAE_CHECKPOINT_LINK], CHECKPOINTS_OUT_PATH, progress_bar=True + ) # Download XTTS v2.0 checkpoint if needed TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json" @@ -160,7 +159,7 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, # get the longest text audio file to use as speaker reference samples_len = [len(item["text"].split(" ")) for item in train_samples] - longest_text_idx = samples_len.index(max(samples_len)) + longest_text_idx = samples_len.index(max(samples_len)) speaker_ref = train_samples[longest_text_idx]["audio_file"] trainer_out_path = trainer.output_path diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py index b8ffb231dd..85168c641d 100644 --- a/TTS/demos/xtts_ft_demo/xtts_demo.py +++ b/TTS/demos/xtts_ft_demo/xtts_demo.py @@ -20,7 +20,10 @@ def clear_gpu_cache(): if torch.cuda.is_available(): torch.cuda.empty_cache() + XTTS_MODEL = None + + def load_model(xtts_checkpoint, xtts_config, xtts_vocab): global XTTS_MODEL clear_gpu_cache() @@ -37,17 +40,23 @@ def load_model(xtts_checkpoint, xtts_config, xtts_vocab): print("Model Loaded!") return "Model Loaded!" + def run_tts(lang, tts_text, speaker_audio_file): if XTTS_MODEL is None or not speaker_audio_file: return "You need to run the previous step to load the model !!", None, None - gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents(audio_path=speaker_audio_file, gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, max_ref_length=XTTS_MODEL.config.max_ref_len, sound_norm_refs=XTTS_MODEL.config.sound_norm_refs) + gpt_cond_latent, speaker_embedding = XTTS_MODEL.get_conditioning_latents( + audio_path=speaker_audio_file, + gpt_cond_len=XTTS_MODEL.config.gpt_cond_len, + max_ref_length=XTTS_MODEL.config.max_ref_len, + sound_norm_refs=XTTS_MODEL.config.sound_norm_refs, + ) out = XTTS_MODEL.inference( text=tts_text, language=lang, gpt_cond_latent=gpt_cond_latent, speaker_embedding=speaker_embedding, - temperature=XTTS_MODEL.config.temperature, # Add custom parameters here + temperature=XTTS_MODEL.config.temperature, # Add custom parameters here length_penalty=XTTS_MODEL.config.length_penalty, repetition_penalty=XTTS_MODEL.config.repetition_penalty, top_k=XTTS_MODEL.config.top_k, @@ -62,8 +71,6 @@ def run_tts(lang, tts_text, speaker_audio_file): return "Speech generated !", out_path, speaker_audio_file - - # define a logger to redirect class Logger: def __init__(self, filename="log.out"): @@ -82,6 +89,7 @@ def flush(self): def isatty(self): return False + # redirect stdout and stderr to a file sys.stdout = Logger() sys.stderr = sys.stdout @@ -90,13 +98,10 @@ def isatty(self): # logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.basicConfig( - level=logging.INFO, - format="%(asctime)s [%(levelname)s] %(message)s", - handlers=[ - logging.StreamHandler(sys.stdout) - ] + level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s", handlers=[logging.StreamHandler(sys.stdout)] ) + def read_logs(): sys.stdout.flush() with open(sys.stdout.log_file, "r") as f: @@ -104,7 +109,6 @@ def read_logs(): if __name__ == "__main__": - parser = argparse.ArgumentParser( description="""XTTS fine-tuning demo\n\n""" """ @@ -187,12 +191,10 @@ def read_logs(): "zh", "hu", "ko", - "ja" + "ja", ], ) - progress_data = gr.Label( - label="Progress:" - ) + progress_data = gr.Label(label="Progress:") logs = gr.Textbox( label="Logs:", interactive=False, @@ -200,20 +202,30 @@ def read_logs(): demo.load(read_logs, None, logs, every=1) prompt_compute_btn = gr.Button(value="Step 1 - Create dataset") - + def preprocess_dataset(audio_path, language, out_path, progress=gr.Progress(track_tqdm=True)): clear_gpu_cache() out_path = os.path.join(out_path, "dataset") os.makedirs(out_path, exist_ok=True) if audio_path is None: - return "You should provide one or multiple audio files! If you provided it, probably the upload of the files is not finished yet!", "", "" + return ( + "You should provide one or multiple audio files! If you provided it, probably the upload of the files is not finished yet!", + "", + "", + ) else: try: - train_meta, eval_meta, audio_total_size = format_audio_list(audio_path, target_language=language, out_path=out_path, gradio_progress=progress) + train_meta, eval_meta, audio_total_size = format_audio_list( + audio_path, target_language=language, out_path=out_path, gradio_progress=progress + ) except: traceback.print_exc() error = traceback.format_exc() - return f"The data processing was interrupted due an error !! Please check the console to verify the full error message! \n Error summary: {error}", "", "" + return ( + f"The data processing was interrupted due an error !! Please check the console to verify the full error message! \n Error summary: {error}", + "", + "", + ) clear_gpu_cache() @@ -233,7 +245,7 @@ def preprocess_dataset(audio_path, language, out_path, progress=gr.Progress(trac eval_csv = gr.Textbox( label="Eval CSV:", ) - num_epochs = gr.Slider( + num_epochs = gr.Slider( label="Number of epochs:", minimum=1, maximum=100, @@ -261,9 +273,7 @@ def preprocess_dataset(audio_path, language, out_path, progress=gr.Progress(trac step=1, value=args.max_audio_length, ) - progress_train = gr.Label( - label="Progress:" - ) + progress_train = gr.Label(label="Progress:") logs_tts_train = gr.Textbox( label="Logs:", interactive=False, @@ -271,18 +281,41 @@ def preprocess_dataset(audio_path, language, out_path, progress=gr.Progress(trac demo.load(read_logs, None, logs_tts_train, every=1) train_btn = gr.Button(value="Step 2 - Run the training") - def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acumm, output_path, max_audio_length): + def train_model( + language, train_csv, eval_csv, num_epochs, batch_size, grad_acumm, output_path, max_audio_length + ): clear_gpu_cache() if not train_csv or not eval_csv: - return "You need to run the data processing step or manually set `Train CSV` and `Eval CSV` fields !", "", "", "", "" + return ( + "You need to run the data processing step or manually set `Train CSV` and `Eval CSV` fields !", + "", + "", + "", + "", + ) try: # convert seconds to waveform frames max_audio_length = int(max_audio_length * 22050) - config_path, original_xtts_checkpoint, vocab_file, exp_path, speaker_wav = train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, output_path=output_path, max_audio_length=max_audio_length) + config_path, original_xtts_checkpoint, vocab_file, exp_path, speaker_wav = train_gpt( + language, + num_epochs, + batch_size, + grad_acumm, + train_csv, + eval_csv, + output_path=output_path, + max_audio_length=max_audio_length, + ) except: traceback.print_exc() error = traceback.format_exc() - return f"The training was interrupted due an error !! Please check the console to check the full error message! \n Error summary: {error}", "", "", "", "" + return ( + f"The training was interrupted due an error !! Please check the console to check the full error message! \n Error summary: {error}", + "", + "", + "", + "", + ) # copy original files to avoid parameters changes issues os.system(f"cp {config_path} {exp_path}") @@ -309,9 +342,7 @@ def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acum label="XTTS vocab path:", value="", ) - progress_load = gr.Label( - label="Progress:" - ) + progress_load = gr.Label(label="Progress:") load_btn = gr.Button(value="Step 3 - Load Fine-tuned XTTS model") with gr.Column() as col2: @@ -339,7 +370,7 @@ def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acum "hu", "ko", "ja", - ] + ], ) tts_text = gr.Textbox( label="Input Text.", @@ -348,9 +379,7 @@ def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acum tts_btn = gr.Button(value="Step 4 - Inference") with gr.Column() as col3: - progress_gen = gr.Label( - label="Progress:" - ) + progress_gen = gr.Label(label="Progress:") tts_output_audio = gr.Audio(label="Generated Audio.") reference_audio = gr.Audio(label="Reference audio used.") @@ -368,7 +397,6 @@ def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acum ], ) - train_btn.click( fn=train_model, inputs=[ @@ -383,14 +411,10 @@ def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acum ], outputs=[progress_train, xtts_config, xtts_vocab, xtts_checkpoint, speaker_reference_audio], ) - + load_btn.click( fn=load_model, - inputs=[ - xtts_checkpoint, - xtts_config, - xtts_vocab - ], + inputs=[xtts_checkpoint, xtts_config, xtts_vocab], outputs=[progress_load], ) @@ -404,9 +428,4 @@ def train_model(language, train_csv, eval_csv, num_epochs, batch_size, grad_acum outputs=[progress_gen, tts_output_audio, reference_audio], ) - demo.launch( - share=True, - debug=False, - server_port=args.port, - server_name="0.0.0.0" - ) + demo.launch(share=True, debug=False, server_port=args.port, server_name="0.0.0.0") diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 460fcc69f7..daf9fc7e4f 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -390,7 +390,7 @@ def get_data_loader( loader = DataLoader( dataset, sampler=sampler, - batch_size = config.eval_batch_size if is_eval else config.batch_size, + batch_size=config.eval_batch_size if is_eval else config.batch_size, collate_fn=dataset.collate_fn, num_workers=config.num_eval_loader_workers if is_eval else config.num_loader_workers, pin_memory=False, diff --git a/TTS/tts/layers/xtts/xtts_manager.py b/TTS/tts/layers/xtts/xtts_manager.py index 3e7d0f6c91..5a28d2a8a6 100644 --- a/TTS/tts/layers/xtts/xtts_manager.py +++ b/TTS/tts/layers/xtts/xtts_manager.py @@ -1,34 +1,35 @@ import torch -class SpeakerManager(): + +class SpeakerManager: def __init__(self, speaker_file_path=None): self.speakers = torch.load(speaker_file_path) @property def name_to_id(self): return self.speakers.keys() - + @property def num_speakers(self): return len(self.name_to_id) - + @property def speaker_names(self): return list(self.name_to_id.keys()) - -class LanguageManager(): + +class LanguageManager: def __init__(self, config): self.langs = config["languages"] @property def name_to_id(self): return self.langs - + @property def num_languages(self): return len(self.name_to_id) - + @property def language_names(self): return list(self.name_to_id) diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index b6e9ac8a14..1d3a13d433 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -299,7 +299,7 @@ def init_multispeaker(self, config: Coqpit): if config.use_d_vector_file: self.embedded_speaker_dim = config.d_vector_dim if self.args.d_vector_dim != self.args.hidden_channels: - #self.proj_g = nn.Conv1d(self.args.d_vector_dim, self.args.hidden_channels, 1) + # self.proj_g = nn.Conv1d(self.args.d_vector_dim, self.args.hidden_channels, 1) self.proj_g = nn.Linear(in_features=self.args.d_vector_dim, out_features=self.args.hidden_channels) # init speaker embedding layer if config.use_speaker_embedding and not config.use_d_vector_file: @@ -404,13 +404,13 @@ def _forward_encoder( # [B, T, C] x_emb = self.emb(x) # encoder pass - #o_en = self.encoder(torch.transpose(x_emb, 1, -1), x_mask) + # o_en = self.encoder(torch.transpose(x_emb, 1, -1), x_mask) o_en = self.encoder(torch.transpose(x_emb, 1, -1), x_mask, g) # speaker conditioning # TODO: try different ways of conditioning - if g is not None: + if g is not None: if hasattr(self, "proj_g"): - g = self.proj_g(g.view(g.shape[0], -1)).unsqueeze(-1) + g = self.proj_g(g.view(g.shape[0], -1)).unsqueeze(-1) o_en = o_en + g return o_en, x_mask, g, x_emb diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 3065d011ef..a6e9aefa5d 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -11,7 +11,7 @@ from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder from TTS.tts.layers.xtts.stream_generator import init_stream_support from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer, split_sentence -from TTS.tts.layers.xtts.xtts_manager import SpeakerManager, LanguageManager +from TTS.tts.layers.xtts.xtts_manager import LanguageManager, SpeakerManager from TTS.tts.models.base_tts import BaseTTS from TTS.utils.io import load_fsspec @@ -410,12 +410,14 @@ def synthesize(self, text, config, speaker_wav, language, speaker_id=None, **kwa if speaker_id is not None: gpt_cond_latent, speaker_embedding = self.speaker_manager.speakers[speaker_id].values() return self.inference(text, language, gpt_cond_latent, speaker_embedding, **settings) - settings.update({ - "gpt_cond_len": config.gpt_cond_len, - "gpt_cond_chunk_len": config.gpt_cond_chunk_len, - "max_ref_len": config.max_ref_len, - "sound_norm_refs": config.sound_norm_refs, - }) + settings.update( + { + "gpt_cond_len": config.gpt_cond_len, + "gpt_cond_chunk_len": config.gpt_cond_chunk_len, + "max_ref_len": config.max_ref_len, + "sound_norm_refs": config.sound_norm_refs, + } + ) return self.full_inference(text, speaker_wav, language, **settings) @torch.inference_mode() diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index b98647c30c..6165fb5e8a 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -335,7 +335,7 @@ def tts( # handle multi-lingual language_id = None if self.tts_languages_file or ( - hasattr(self.tts_model, "language_manager") + hasattr(self.tts_model, "language_manager") and self.tts_model.language_manager is not None and not self.tts_config.model == "xtts" ): From 08e00e4b499f72db7f7317cd1c876b17f7d9d0bf Mon Sep 17 00:00:00 2001 From: Ivan Peevski <133036+ipeevski@users.noreply.github.com> Date: Mon, 8 Jan 2024 14:45:04 +1030 Subject: [PATCH 020/255] Fix bark model --- TTS/.models.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/.models.json b/TTS/.models.json index b349e7397b..a77ebea1cf 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -46,7 +46,7 @@ "hf_url": [ "https://coqui.gateway.scarf.sh/hf/bark/coarse_2.pt", "https://coqui.gateway.scarf.sh/hf/bark/fine_2.pt", - "https://coqui.gateway.scarf.sh/hf/text_2.pt", + "https://coqui.gateway.scarf.sh/hf/bark/text_2.pt", "https://coqui.gateway.scarf.sh/hf/bark/config.json", "https://coqui.gateway.scarf.sh/hf/bark/hubert.pt", "https://coqui.gateway.scarf.sh/hf/bark/tokenizer.pth" From b184e9f0fe5b03f17df284d431982cdd040e99f6 Mon Sep 17 00:00:00 2001 From: wangjie Date: Fri, 12 Jan 2024 09:11:56 +0800 Subject: [PATCH 021/255] fix chinese pinyin phonemes --- .../text/chinese_mandarin/pinyinToPhonemes.py | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py b/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py index 4e25c3a4c9..89dd654ab1 100644 --- a/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py +++ b/TTS/tts/utils/text/chinese_mandarin/pinyinToPhonemes.py @@ -94,25 +94,25 @@ "fo": ["fo"], "fou": ["fou"], "fu": ["fu"], - "ga": ["ga"], - "gai": ["gai"], - "gan": ["gan"], - "gang": ["gɑŋ"], - "gao": ["gaʌ"], - "ge": ["gø"], - "gei": ["gei"], - "gen": ["gœn"], - "geng": ["gɵŋ"], - "gong": ["goŋ"], - "gou": ["gou"], - "gu": ["gu"], - "gua": ["gua"], - "guai": ["guai"], - "guan": ["guan"], - "guang": ["guɑŋ"], - "gui": ["guei"], - "gun": ["gun"], - "guo": ["guo"], + "ga": ["ɡa"], + "gai": ["ɡai"], + "gan": ["ɡan"], + "gang": ["ɡɑŋ"], + "gao": ["ɡaʌ"], + "ge": ["ɡø"], + "gei": ["ɡei"], + "gen": ["ɡœn"], + "geng": ["ɡɵŋ"], + "gong": ["ɡoŋ"], + "gou": ["ɡou"], + "gu": ["ɡu"], + "gua": ["ɡua"], + "guai": ["ɡuai"], + "guan": ["ɡuan"], + "guang": ["ɡuɑŋ"], + "gui": ["ɡuei"], + "gun": ["ɡun"], + "guo": ["ɡuo"], "ha": ["xa"], "hai": ["xai"], "han": ["xan"], From 04d8d4b09a8cdab087a46c54b139d55538ce93eb Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 6 Mar 2024 13:27:43 +0100 Subject: [PATCH 022/255] chore: remove unused imports --- TTS/api.py | 2 -- TTS/utils/manage.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 2ef6f3a085..f741f65fb7 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -1,9 +1,7 @@ import tempfile import warnings from pathlib import Path -from typing import Union -import numpy as np from torch import nn from TTS.config import load_config diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index d724cc87ec..32f1779c64 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -5,7 +5,7 @@ import zipfile from pathlib import Path from shutil import copyfile, rmtree -from typing import Dict, List, Tuple +from typing import Dict, Tuple import fsspec import requests From 39149ef564efc3b2abe28ed333c2cc1c1f784e5c Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 6 Mar 2024 13:36:25 +0100 Subject: [PATCH 023/255] build: remove isort Import sorting now handled by ruff. --- .pre-commit-config.yaml | 6 ------ CONTRIBUTING.md | 2 +- Makefile | 2 -- pyproject.toml | 5 ----- requirements.dev.txt | 1 - 5 files changed, 1 insertion(+), 15 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index af408ed551..76bcfa829e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,12 +12,6 @@ repos: hooks: - id: black language_version: python3 - - repo: https://github.com/pycqa/isort - rev: 5.13.1 - hooks: - - id: isort - name: isort (cython) - types: [cython] - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.1.7 hooks: diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 5fbed84397..a83b8c8296 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -82,7 +82,7 @@ The following steps are tested on an Ubuntu system. $ make test_all # run all the tests, report all the errors ``` -9. Format your code. We use ```black``` for code and ```isort``` for ```import``` formatting. +9. Format your code. We use ```black``` for code formatting. ```bash $ make style diff --git a/Makefile b/Makefile index d1a1db8a75..2204bca84e 100644 --- a/Makefile +++ b/Makefile @@ -46,12 +46,10 @@ test_failed: ## only run tests failed the last time. style: ## update code style. black ${target_dirs} - isort ${target_dirs} lint: ## run linters. ruff ${target_dirs} black ${target_dirs} --check - isort ${target_dirs} --check-only system-deps: ## install linux system deps sudo apt-get install -y libsndfile1-dev diff --git a/pyproject.toml b/pyproject.toml index 934e0c2ebd..1cdcc0cab7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,8 +64,3 @@ max-returns = 7 [tool.black] line-length = 120 target-version = ['py39'] - -[tool.isort] -line_length = 120 -profile = "black" -multi_line_output = 3 diff --git a/requirements.dev.txt b/requirements.dev.txt index 21c4c3d21e..4b3c3cfdcc 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,5 +1,4 @@ black coverage -isort nose2 ruff==0.1.3 From 1961687a18c3deea751434026232a8eb3b697ade Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 6 Mar 2024 13:40:56 +0100 Subject: [PATCH 024/255] build: update to ruff 0.3.0 --- .pre-commit-config.yaml | 2 +- Makefile | 2 +- pyproject.toml | 8 ++++---- requirements.dev.txt | 2 +- tests/tts_tests/test_vits.py | 1 - 5 files changed, 7 insertions(+), 8 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 76bcfa829e..4853cb0740 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -13,7 +13,7 @@ repos: - id: black language_version: python3 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.7 + rev: v0.3.0 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] diff --git a/Makefile b/Makefile index 2204bca84e..ac0f793b13 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ style: ## update code style. black ${target_dirs} lint: ## run linters. - ruff ${target_dirs} + ruff check ${target_dirs} black ${target_dirs} --check system-deps: ## install linux system deps diff --git a/pyproject.toml b/pyproject.toml index 1cdcc0cab7..b9902fc372 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ requires = [ [tool.ruff] line-length = 120 -extend-select = [ +lint.extend-select = [ "B033", # duplicate-value "C416", # unnecessary-comprehension "D419", # empty-docstring @@ -30,7 +30,7 @@ extend-select = [ "W291", # trailing-whitespace ] -ignore = [ +lint.ignore = [ "E501", # line too long "E722", # bare except (TODO: fix these) "E731", # don't use lambdas @@ -47,12 +47,12 @@ ignore = [ "PLW0603", # TODO: enable ] -[tool.ruff.pylint] +[tool.ruff.lint.pylint] max-args = 5 max-public-methods = 20 max-returns = 7 -[tool.ruff.per-file-ignores] +[tool.ruff.lint.per-file-ignores] "**/__init__.py" = [ "F401", # init files may have "unused" imports for now "F403", # init files may have star imports for now diff --git a/requirements.dev.txt b/requirements.dev.txt index 4b3c3cfdcc..1c23a1814f 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,4 +1,4 @@ black coverage nose2 -ruff==0.1.3 +ruff==0.3.0 diff --git a/tests/tts_tests/test_vits.py b/tests/tts_tests/test_vits.py index fca9955619..e76e29283e 100644 --- a/tests/tts_tests/test_vits.py +++ b/tests/tts_tests/test_vits.py @@ -64,7 +64,6 @@ def test_load_audio(self): def test_dataset(self): """TODO:""" - ... def test_init_multispeaker(self): num_speakers = 10 From d7633e4106dfdcf6cb0cb86e2a156a3379a5d4cd Mon Sep 17 00:00:00 2001 From: Aarni Koskela Date: Tue, 31 Oct 2023 17:57:17 +0200 Subject: [PATCH 025/255] CI: remove unused check_skip steps There is nothing in the repo that would refer to them [ci skip] --- .github/workflows/aux_tests.yml | 6 ------ .github/workflows/data_tests.yml | 6 ------ .github/workflows/inference_tests.yml | 6 ------ .github/workflows/style_check.yml | 6 ------ .github/workflows/text_tests.yml | 6 ------ .github/workflows/tts_tests.yml | 6 ------ .github/workflows/tts_tests2.yml | 6 ------ .github/workflows/vocoder_tests.yml | 6 ------ .github/workflows/xtts_tests.yml | 6 ------ .github/workflows/zoo_tests0.yml | 6 ------ .github/workflows/zoo_tests1.yml | 6 ------ .github/workflows/zoo_tests2.yml | 6 ------ 12 files changed, 72 deletions(-) diff --git a/.github/workflows/aux_tests.yml b/.github/workflows/aux_tests.yml index f4cb3ecfe1..ceb0c64016 100644 --- a/.github/workflows/aux_tests.yml +++ b/.github/workflows/aux_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/data_tests.yml b/.github/workflows/data_tests.yml index 3d1e3f8c4d..d5bfef3f77 100644 --- a/.github/workflows/data_tests.yml +++ b/.github/workflows/data_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/inference_tests.yml b/.github/workflows/inference_tests.yml index d2159027b6..d7a01adf2c 100644 --- a/.github/workflows/inference_tests.yml +++ b/.github/workflows/inference_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index b7c6393baa..23ff643e47 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/text_tests.yml b/.github/workflows/text_tests.yml index 78d3026d7f..4bcb685887 100644 --- a/.github/workflows/text_tests.yml +++ b/.github/workflows/text_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/tts_tests.yml b/.github/workflows/tts_tests.yml index 5074cded6d..60940ba8aa 100644 --- a/.github/workflows/tts_tests.yml +++ b/.github/workflows/tts_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/tts_tests2.yml b/.github/workflows/tts_tests2.yml index f64433f8df..17cb3f2f56 100644 --- a/.github/workflows/tts_tests2.yml +++ b/.github/workflows/tts_tests2.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/vocoder_tests.yml b/.github/workflows/vocoder_tests.yml index 6519ee3fef..778529b298 100644 --- a/.github/workflows/vocoder_tests.yml +++ b/.github/workflows/vocoder_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/xtts_tests.yml b/.github/workflows/xtts_tests.yml index be367f3547..99cec287eb 100644 --- a/.github/workflows/xtts_tests.yml +++ b/.github/workflows/xtts_tests.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/zoo_tests0.yml b/.github/workflows/zoo_tests0.yml index 13f47a938b..69deb884ef 100644 --- a/.github/workflows/zoo_tests0.yml +++ b/.github/workflows/zoo_tests0.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/zoo_tests1.yml b/.github/workflows/zoo_tests1.yml index 00f13397fa..7a10c6af70 100644 --- a/.github/workflows/zoo_tests1.yml +++ b/.github/workflows/zoo_tests1.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: diff --git a/.github/workflows/zoo_tests2.yml b/.github/workflows/zoo_tests2.yml index 310a831a8b..0e0392f70a 100644 --- a/.github/workflows/zoo_tests2.yml +++ b/.github/workflows/zoo_tests2.yml @@ -7,12 +7,6 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - check_skip: - runs-on: ubuntu-latest - if: "! contains(github.event.head_commit.message, '[ci skip]')" - steps: - - run: echo "${{ github.event.head_commit.message }}" - test: runs-on: ubuntu-latest strategy: From 4eec70680eb50d2fb65a2937eb090e3a9df6678a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 31 Oct 2023 13:34:21 +0100 Subject: [PATCH 026/255] chore: remove obsolete numba requirement for python<3.9 The repository requires Python>=3.9, so this is obsolete. --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 2944e6face..a5e0be7f5e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,6 @@ torchaudio soundfile>=0.12.0 librosa>=0.10.0 scikit-learn>=1.3.0 -numba==0.55.1;python_version<"3.9" numba>=0.57.0;python_version>="3.9" inflect>=5.6.0 tqdm>=4.64.1 @@ -54,4 +53,4 @@ encodec>=0.1.1 # deps for XTTS unidecode>=1.3.2 num2words -spacy[ja]>=3 \ No newline at end of file +spacy[ja]>=3 From e05243c4c873a68af4684128b973596d6a91c342 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 31 Oct 2023 14:16:46 +0100 Subject: [PATCH 027/255] refactor: read/write csv files with standard library --- TTS/encoder/utils/prepare_voxceleb.py | 9 ++++-- TTS/tts/datasets/formatters.py | 46 +++++++++++++++------------ 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py index b93baf9e60..5a68c3075a 100644 --- a/TTS/encoder/utils/prepare_voxceleb.py +++ b/TTS/encoder/utils/prepare_voxceleb.py @@ -19,13 +19,13 @@ # pylint: disable=too-many-locals, too-many-statements, too-many-arguments, too-many-instance-attributes """ voxceleb 1 & 2 """ +import csv import hashlib import os import subprocess import sys import zipfile -import pandas import soundfile as sf from absl import logging @@ -185,8 +185,11 @@ def convert_audio_and_make_label(input_dir, subset, output_dir, output_file): # Write to CSV file which contains four columns: # "wav_filename", "wav_length_ms", "speaker_id", "speaker_name". csv_file_path = os.path.join(output_dir, output_file) - df = pandas.DataFrame(data=files, columns=["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"]) - df.to_csv(csv_file_path, index=False, sep="\t") + with open(csv_file_path, "w", newline="", encoding="utf-8") as f: + writer = csv.writer(f, delimiter="\t") + writer.writerow(["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"]) + for wav_file in files: + writer.writerow(wav_file) logging.info("Successfully generated csv file {}".format(csv_file_path)) diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index 053444b0c1..09fbd094e8 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -1,3 +1,4 @@ +import csv import os import re import xml.etree.ElementTree as ET @@ -5,7 +6,6 @@ from pathlib import Path from typing import List -import pandas as pd from tqdm import tqdm ######################## @@ -25,25 +25,27 @@ def cml_tts(root_path, meta_file, ignored_speakers=None): if len(line.split("|")) != num_cols: print(f" > Missing column in line {idx + 1} -> {line.strip()}") # load metadata - metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|") - assert all(x in metadata.columns for x in ["wav_filename", "transcript"]) - client_id = None if "client_id" in metadata.columns else "default" - emotion_name = None if "emotion_name" in metadata.columns else "neutral" + with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="|") + metadata = list(reader) + assert all(x in metadata[0] for x in ["wav_filename", "transcript"]) + client_id = None if "client_id" in metadata[0] else "default" + emotion_name = None if "emotion_name" in metadata[0] else "neutral" items = [] not_found_counter = 0 - for row in metadata.itertuples(): - if client_id is None and ignored_speakers is not None and row.client_id in ignored_speakers: + for row in metadata: + if client_id is None and ignored_speakers is not None and row["client_id"] in ignored_speakers: continue - audio_path = os.path.join(root_path, row.wav_filename) + audio_path = os.path.join(root_path, row["wav_filename"]) if not os.path.exists(audio_path): not_found_counter += 1 continue items.append( { - "text": row.transcript, + "text": row["transcript"], "audio_file": audio_path, - "speaker_name": client_id if client_id is not None else row.client_id, - "emotion_name": emotion_name if emotion_name is not None else row.emotion_name, + "speaker_name": client_id if client_id is not None else row["client_id"], + "emotion_name": emotion_name if emotion_name is not None else row["emotion_name"], "root_path": root_path, } ) @@ -63,25 +65,27 @@ def coqui(root_path, meta_file, ignored_speakers=None): if len(line.split("|")) != num_cols: print(f" > Missing column in line {idx + 1} -> {line.strip()}") # load metadata - metadata = pd.read_csv(os.path.join(root_path, meta_file), sep="|") - assert all(x in metadata.columns for x in ["audio_file", "text"]) - speaker_name = None if "speaker_name" in metadata.columns else "coqui" - emotion_name = None if "emotion_name" in metadata.columns else "neutral" + with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f, delimiter="|") + metadata = list(reader) + assert all(x in metadata[0] for x in ["audio_file", "text"]) + speaker_name = None if "speaker_name" in metadata[0] else "coqui" + emotion_name = None if "emotion_name" in metadata[0] else "neutral" items = [] not_found_counter = 0 - for row in metadata.itertuples(): - if speaker_name is None and ignored_speakers is not None and row.speaker_name in ignored_speakers: + for row in metadata: + if speaker_name is None and ignored_speakers is not None and row["speaker_name"] in ignored_speakers: continue - audio_path = os.path.join(root_path, row.audio_file) + audio_path = os.path.join(root_path, row["audio_file"]) if not os.path.exists(audio_path): not_found_counter += 1 continue items.append( { - "text": row.text, + "text": row["text"], "audio_file": audio_path, - "speaker_name": speaker_name if speaker_name is not None else row.speaker_name, - "emotion_name": emotion_name if emotion_name is not None else row.emotion_name, + "speaker_name": speaker_name if speaker_name is not None else row["speaker_name"], + "emotion_name": emotion_name if emotion_name is not None else row["emotion_name"], "root_path": root_path, } ) From e4b1b0f73ec471874a1bfa00d8748f206db48069 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 31 Oct 2023 14:18:25 +0100 Subject: [PATCH 028/255] build: move pandas to notebook requirements --- requirements.notebooks.txt | 3 ++- requirements.txt | 1 - 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.notebooks.txt b/requirements.notebooks.txt index 65d3f642c9..6b7e6e8956 100644 --- a/requirements.notebooks.txt +++ b/requirements.notebooks.txt @@ -1 +1,2 @@ -bokeh==1.4.0 \ No newline at end of file +bokeh==1.4.0 +pandas>=1.4,<2.0 diff --git a/requirements.txt b/requirements.txt index a5e0be7f5e..5735cd3bfd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,7 +23,6 @@ flask>=2.0.1 pysbd>=0.3.4 # deps for notebooks umap-learn>=0.5.1 -pandas>=1.4,<2.0 # deps for training matplotlib>=3.7.0 # coqui stack From 4e183c61df791bbac594aabdf40875b502e84249 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 20 Dec 2023 15:18:01 +0100 Subject: [PATCH 029/255] fix(api): handle missing attribute in is_multilingual --- TTS/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/api.py b/TTS/api.py index f741f65fb7..abe9c56b25 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -97,7 +97,7 @@ def is_multi_lingual(self): isinstance(self.model_name, str) and "xtts" in self.model_name or self.config - and ("xtts" in self.config.model or len(self.config.languages) > 1) + and ("xtts" in self.config.model or "languages" in self.config and len(self.config.languages) > 1) ): return True if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager: From 017c84d005011157a3910236a16cf6e22d3e9f85 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 6 Mar 2024 22:45:35 +0100 Subject: [PATCH 030/255] style: make style && make lint --- TTS/tts/datasets/dataset.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 19fb25bef8..d592894072 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -4,6 +4,7 @@ import random from typing import Dict, List, Union +import mutagen import numpy as np import torch import tqdm @@ -13,8 +14,6 @@ from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy -import mutagen - # to prevent too many open files error as suggested here # https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936 torch.multiprocessing.set_sharing_strategy("file_system") @@ -47,7 +46,9 @@ def string2filename(string): def get_audio_size(audiopath): extension = audiopath.rpartition(".")[-1].lower() if extension not in {"mp3", "wav", "flac"}: - raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!") + raise RuntimeError( + f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!" + ) audio_info = mutagen.File(audiopath).info return int(audio_info.length * audio_info.sample_rate) From 02d88b5dec85c2c0c9f0ece787618cdcf1f243b0 Mon Sep 17 00:00:00 2001 From: Greer Date: Wed, 13 Dec 2023 00:21:39 -0500 Subject: [PATCH 031/255] Fix TTS().list_models() --- TTS/api.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 7abc188e74..ab3efd64f0 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -122,8 +122,9 @@ def languages(self): def get_models_file_path(): return Path(__file__).parent / ".models.json" - def list_models(self): - return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False) + @staticmethod + def list_models(): + return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False).list_models() def download_model_by_name(self, model_name: str): model_path, config_path, model_item = self.manager.download_model(model_name) From c86cf9b2ef2c3be0c2e381e21445ae6460685786 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 7 Mar 2024 11:46:07 +0100 Subject: [PATCH 032/255] ci: pin black for consistent output --- .pre-commit-config.yaml | 2 +- requirements.dev.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4853cb0740..eeb02fde88 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,7 +8,7 @@ repos: # - id: end-of-file-fixer # - id: trailing-whitespace - repo: "https://github.com/psf/black" - rev: 23.12.0 + rev: 24.2.0 hooks: - id: black language_version: python3 diff --git a/requirements.dev.txt b/requirements.dev.txt index 1c23a1814f..68450fcad2 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,4 +1,4 @@ -black +black==24.2.0 coverage nose2 ruff==0.3.0 From efdafd5a7f80cb374433d3cd1c8016f3529883f3 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 7 Mar 2024 11:46:51 +0100 Subject: [PATCH 033/255] style: run black --- TTS/bin/collect_env_info.py | 1 + TTS/bin/find_unique_chars.py | 1 + TTS/bin/find_unique_phonemes.py | 1 + TTS/bin/tune_wavegrad.py | 1 + TTS/tts/datasets/dataset.py | 8 +++--- TTS/tts/layers/bark/model.py | 1 + TTS/tts/layers/bark/model_fine.py | 1 + TTS/tts/layers/xtts/gpt.py | 6 ++--- TTS/tts/layers/xtts/trainer/dataset.py | 6 ++--- TTS/tts/models/bark.py | 9 +++---- TTS/tts/models/base_tts.py | 8 +++--- TTS/tts/models/tacotron.py | 34 ++++++++++++++------------ TTS/tts/models/tacotron2.py | 34 ++++++++++++++------------ TTS/tts/models/tortoise.py | 10 +++++--- TTS/tts/models/vits.py | 8 +++--- TTS/tts/utils/ssim.py | 1 + TTS/tts/utils/text/cleaners.py | 1 + TTS/utils/download.py | 17 +++++++------ TTS/vc/models/base_vc.py | 8 +++--- TTS/vc/models/freevc.py | 6 ++--- TTS/vc/modules/freevc/wavlm/wavlm.py | 8 ++++-- tests/text_tests/test_phonemizer.py | 8 ++++-- tests/vc_tests/test_freevc.py | 18 +++++--------- 23 files changed, 111 insertions(+), 85 deletions(-) diff --git a/TTS/bin/collect_env_info.py b/TTS/bin/collect_env_info.py index e76f6a757b..32aa303e6e 100644 --- a/TTS/bin/collect_env_info.py +++ b/TTS/bin/collect_env_info.py @@ -1,4 +1,5 @@ """Get detailed info about the working environment.""" + import json import os import platform diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index ea16974839..81f2f4465f 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -1,4 +1,5 @@ """Find all the unique characters in a dataset""" + import argparse from argparse import RawTextHelpFormatter diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py index 2df0700676..48f2e7b740 100644 --- a/TTS/bin/find_unique_phonemes.py +++ b/TTS/bin/find_unique_phonemes.py @@ -1,4 +1,5 @@ """Find all the unique characters in a dataset""" + import argparse import multiprocessing from argparse import RawTextHelpFormatter diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py index 09582cea7c..a4b10009d7 100644 --- a/TTS/bin/tune_wavegrad.py +++ b/TTS/bin/tune_wavegrad.py @@ -1,4 +1,5 @@ """Search a good noise schedule for WaveGrad for a given number of inference iterations""" + import argparse from itertools import product as cartesian_product diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index d592894072..9d0c45add9 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -457,9 +457,11 @@ def collate_fn(self, batch): # lengths adjusted by the reduction factor mel_lengths_adjusted = [ - m.shape[1] + (self.outputs_per_step - (m.shape[1] % self.outputs_per_step)) - if m.shape[1] % self.outputs_per_step - else m.shape[1] + ( + m.shape[1] + (self.outputs_per_step - (m.shape[1] % self.outputs_per_step)) + if m.shape[1] % self.outputs_per_step + else m.shape[1] + ) for m in mel ] diff --git a/TTS/tts/layers/bark/model.py b/TTS/tts/layers/bark/model.py index c84022bd08..68c50dbdbd 100644 --- a/TTS/tts/layers/bark/model.py +++ b/TTS/tts/layers/bark/model.py @@ -2,6 +2,7 @@ Much of this code is adapted from Andrej Karpathy's NanoGPT (https://github.com/karpathy/nanoGPT) """ + import math from dataclasses import dataclass diff --git a/TTS/tts/layers/bark/model_fine.py b/TTS/tts/layers/bark/model_fine.py index 09e5f4765d..29126b41ab 100644 --- a/TTS/tts/layers/bark/model_fine.py +++ b/TTS/tts/layers/bark/model_fine.py @@ -2,6 +2,7 @@ Much of this code is adapted from Andrej Karpathy's NanoGPT (https://github.com/karpathy/nanoGPT) """ + import math from dataclasses import dataclass diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index ca0dc7cc74..b55b84d90e 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -187,9 +187,9 @@ def __init__( def get_grad_norm_parameter_groups(self): return { "conditioning_encoder": list(self.conditioning_encoder.parameters()), - "conditioning_perceiver": list(self.conditioning_perceiver.parameters()) - if self.use_perceiver_resampler - else None, + "conditioning_perceiver": ( + list(self.conditioning_perceiver.parameters()) if self.use_perceiver_resampler else None + ), "gpt": list(self.gpt.parameters()), "heads": list(self.text_head.parameters()) + list(self.mel_head.parameters()), } diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index 4d6d6ede6e..0a19997a47 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -186,9 +186,9 @@ def __getitem__(self, index): "wav_lengths": torch.tensor(wav.shape[-1], dtype=torch.long), "filenames": audiopath, "conditioning": cond.unsqueeze(1), - "cond_lens": torch.tensor(cond_len, dtype=torch.long) - if cond_len is not torch.nan - else torch.tensor([cond_len]), + "cond_lens": ( + torch.tensor(cond_len, dtype=torch.long) if cond_len is not torch.nan else torch.tensor([cond_len]) + ), "cond_idxs": torch.tensor(cond_idxs) if cond_idxs is not torch.nan else torch.tensor([cond_idxs]), } return res diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index e5edffd4ef..833a909384 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -225,14 +225,11 @@ def synthesize( return return_dict - def eval_step(self): - ... + def eval_step(self): ... - def forward(self): - ... + def forward(self): ... - def inference(self): - ... + def inference(self): ... @staticmethod def init_from_config(config: "BarkConfig", **kwargs): # pylint: disable=unused-argument diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index be76f6c2d3..0aa5edc647 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -369,9 +369,11 @@ def _get_test_aux_input( d_vector = (random.sample(sorted(d_vector), 1),) aux_inputs = { - "speaker_id": None - if not self.config.use_speaker_embedding - else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1), + "speaker_id": ( + None + if not self.config.use_speaker_embedding + else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1) + ), "d_vector": d_vector, "style_wav": None, # TODO: handle GST style input } diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py index 474ec4641d..400a86d042 100644 --- a/TTS/tts/models/tacotron.py +++ b/TTS/tts/models/tacotron.py @@ -101,12 +101,16 @@ def __init__( num_mel=self.decoder_output_dim, encoder_output_dim=self.encoder_in_features, capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim, - speaker_embedding_dim=self.embedded_speaker_dim - if self.use_speaker_embedding and self.capacitron_vae.capacitron_use_speaker_embedding - else None, - text_summary_embedding_dim=self.capacitron_vae.capacitron_text_summary_embedding_dim - if self.capacitron_vae.capacitron_use_text_summary_embeddings - else None, + speaker_embedding_dim=( + self.embedded_speaker_dim + if self.use_speaker_embedding and self.capacitron_vae.capacitron_use_speaker_embedding + else None + ), + text_summary_embedding_dim=( + self.capacitron_vae.capacitron_text_summary_embedding_dim + if self.capacitron_vae.capacitron_use_text_summary_embeddings + else None + ), ) # backward pass decoder @@ -171,9 +175,9 @@ def forward( # pylint: disable=dangerous-default-value encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding( encoder_outputs, reference_mel_info=[mel_specs, mel_lengths], - text_info=[inputs, text_lengths] - if self.capacitron_vae.capacitron_use_text_summary_embeddings - else None, + text_info=( + [inputs, text_lengths] if self.capacitron_vae.capacitron_use_text_summary_embeddings else None + ), speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None, ) else: @@ -237,13 +241,13 @@ def inference(self, text_input, aux_input=None): # B x capacitron_VAE_embedding_dim encoder_outputs, *_ = self.compute_capacitron_VAE_embedding( encoder_outputs, - reference_mel_info=[aux_input["style_mel"], reference_mel_length] - if aux_input["style_mel"] is not None - else None, + reference_mel_info=( + [aux_input["style_mel"], reference_mel_length] if aux_input["style_mel"] is not None else None + ), text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None, - speaker_embedding=aux_input["d_vectors"] - if self.capacitron_vae.capacitron_use_speaker_embedding - else None, + speaker_embedding=( + aux_input["d_vectors"] if self.capacitron_vae.capacitron_use_speaker_embedding else None + ), ) if self.num_speakers > 1: if not self.use_d_vector_file: diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py index 71ab1eac37..4b1317f440 100644 --- a/TTS/tts/models/tacotron2.py +++ b/TTS/tts/models/tacotron2.py @@ -113,12 +113,14 @@ def __init__( num_mel=self.decoder_output_dim, encoder_output_dim=self.encoder_in_features, capacitron_VAE_embedding_dim=self.capacitron_vae.capacitron_VAE_embedding_dim, - speaker_embedding_dim=self.embedded_speaker_dim - if self.capacitron_vae.capacitron_use_speaker_embedding - else None, - text_summary_embedding_dim=self.capacitron_vae.capacitron_text_summary_embedding_dim - if self.capacitron_vae.capacitron_use_text_summary_embeddings - else None, + speaker_embedding_dim=( + self.embedded_speaker_dim if self.capacitron_vae.capacitron_use_speaker_embedding else None + ), + text_summary_embedding_dim=( + self.capacitron_vae.capacitron_text_summary_embedding_dim + if self.capacitron_vae.capacitron_use_text_summary_embeddings + else None + ), ) # backward pass decoder @@ -191,9 +193,11 @@ def forward( # pylint: disable=dangerous-default-value encoder_outputs, *capacitron_vae_outputs = self.compute_capacitron_VAE_embedding( encoder_outputs, reference_mel_info=[mel_specs, mel_lengths], - text_info=[embedded_inputs.transpose(1, 2), text_lengths] - if self.capacitron_vae.capacitron_use_text_summary_embeddings - else None, + text_info=( + [embedded_inputs.transpose(1, 2), text_lengths] + if self.capacitron_vae.capacitron_use_text_summary_embeddings + else None + ), speaker_embedding=embedded_speakers if self.capacitron_vae.capacitron_use_speaker_embedding else None, ) else: @@ -265,13 +269,13 @@ def inference(self, text, aux_input=None): # B x capacitron_VAE_embedding_dim encoder_outputs, *_ = self.compute_capacitron_VAE_embedding( encoder_outputs, - reference_mel_info=[aux_input["style_mel"], reference_mel_length] - if aux_input["style_mel"] is not None - else None, + reference_mel_info=( + [aux_input["style_mel"], reference_mel_length] if aux_input["style_mel"] is not None else None + ), text_info=[style_text_embedding, style_text_length] if aux_input["style_text"] is not None else None, - speaker_embedding=aux_input["d_vectors"] - if self.capacitron_vae.capacitron_use_speaker_embedding - else None, + speaker_embedding=( + aux_input["d_vectors"] if self.capacitron_vae.capacitron_use_speaker_embedding else None + ), ) if self.num_speakers > 1: diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py index 16644ff95e..99e0107fdf 100644 --- a/TTS/tts/models/tortoise.py +++ b/TTS/tts/models/tortoise.py @@ -715,8 +715,9 @@ def inference( self.autoregressive = self.autoregressive.to(self.device) if verbose: print("Generating autoregressive samples..") - with self.temporary_cuda(self.autoregressive) as autoregressive, torch.autocast( - device_type="cuda", dtype=torch.float16, enabled=half + with ( + self.temporary_cuda(self.autoregressive) as autoregressive, + torch.autocast(device_type="cuda", dtype=torch.float16, enabled=half), ): for b in tqdm(range(num_batches), disable=not verbose): codes = autoregressive.inference_speech( @@ -737,8 +738,9 @@ def inference( self.autoregressive_batch_size = orig_batch_size # in the case of single_sample clip_results = [] - with self.temporary_cuda(self.clvp) as clvp, torch.autocast( - device_type="cuda", dtype=torch.float16, enabled=half + with ( + self.temporary_cuda(self.clvp) as clvp, + torch.autocast(device_type="cuda", dtype=torch.float16, enabled=half), ): for batch in tqdm(samples, disable=not verbose): for i in range(batch.shape[0]): diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 2c60ece789..e91d26b9ed 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1887,9 +1887,11 @@ def load_onnx(self, model_path: str, cuda=False): import onnxruntime as ort providers = [ - "CPUExecutionProvider" - if cuda is False - else ("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}) + ( + "CPUExecutionProvider" + if cuda is False + else ("CUDAExecutionProvider", {"cudnn_conv_algo_search": "DEFAULT"}) + ) ] sess_options = ort.SessionOptions() self.onnx_sess = ort.InferenceSession( diff --git a/TTS/tts/utils/ssim.py b/TTS/tts/utils/ssim.py index 4bc3befc5b..eddf05db3f 100644 --- a/TTS/tts/utils/ssim.py +++ b/TTS/tts/utils/ssim.py @@ -207,6 +207,7 @@ class SSIMLoss(_Loss): https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf, DOI:`10.1109/TIP.2003.819861` """ + __constants__ = ["kernel_size", "k1", "k2", "sigma", "kernel", "reduction"] def __init__( diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 74d3910b51..794a87c866 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -1,4 +1,5 @@ """Set of default text cleaners""" + # TODO: pick the cleaner for languages dynamically import re diff --git a/TTS/utils/download.py b/TTS/utils/download.py index 3f06b57824..37e6ed3cee 100644 --- a/TTS/utils/download.py +++ b/TTS/utils/download.py @@ -36,13 +36,16 @@ def stream_url( if start_byte: req.headers["Range"] = "bytes={}-".format(start_byte) - with urllib.request.urlopen(req) as upointer, tqdm( - unit="B", - unit_scale=True, - unit_divisor=1024, - total=url_size, - disable=not progress_bar, - ) as pbar: + with ( + urllib.request.urlopen(req) as upointer, + tqdm( + unit="B", + unit_scale=True, + unit_divisor=1024, + total=url_size, + disable=not progress_bar, + ) as pbar, + ): num_bytes = 0 while True: chunk = upointer.read(block_size) diff --git a/TTS/vc/models/base_vc.py b/TTS/vc/models/base_vc.py index 19f2761bbc..78f1556b71 100644 --- a/TTS/vc/models/base_vc.py +++ b/TTS/vc/models/base_vc.py @@ -357,9 +357,11 @@ def _get_test_aux_input( d_vector = (random.sample(sorted(d_vector), 1),) aux_inputs = { - "speaker_id": None - if not self.config.use_speaker_embedding - else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1), + "speaker_id": ( + None + if not self.config.use_speaker_embedding + else random.sample(sorted(self.speaker_manager.name_to_id.values()), 1) + ), "d_vector": d_vector, "style_wav": None, # TODO: handle GST style input } diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index a5a340f2aa..8f2a35d204 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -544,8 +544,7 @@ def voice_conversion(self, src, tgt): audio = audio[0][0].data.cpu().float().numpy() return audio - def eval_step(): - ... + def eval_step(): ... @staticmethod def init_from_config(config: FreeVCConfig, samples: Union[List[List], List[Dict]] = None, verbose=True): @@ -558,5 +557,4 @@ def load_checkpoint(self, config, checkpoint_path, eval=False, strict=True, cach if eval: self.eval() - def train_step(): - ... + def train_step(): ... diff --git a/TTS/vc/modules/freevc/wavlm/wavlm.py b/TTS/vc/modules/freevc/wavlm/wavlm.py index d2f28d19c2..10dd09ed0c 100644 --- a/TTS/vc/modules/freevc/wavlm/wavlm.py +++ b/TTS/vc/modules/freevc/wavlm/wavlm.py @@ -155,7 +155,9 @@ def arrange(s, e, length, keep_length): class WavLMConfig: def __init__(self, cfg=None): - self.extractor_mode: str = "default" # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True) + self.extractor_mode: str = ( + "default" # mode for feature extractor. default has a single group norm with d groups in the first conv block, whereas layer_norm has layer norms in every block (meant to use with normalize=True) + ) self.encoder_layers: int = 12 # num encoder layers in the transformer self.encoder_embed_dim: int = 768 # encoder embedding dimension @@ -164,7 +166,9 @@ def __init__(self, cfg=None): self.activation_fn: str = "gelu" # activation function to use self.layer_norm_first: bool = False # apply layernorm first in the transformer - self.conv_feature_layers: str = "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...] + self.conv_feature_layers: str = ( + "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2" # string describing convolutional feature extraction layers in form of a python list that contains [(dim, kernel_size, stride), ...] + ) self.conv_bias: bool = False # include bias in conv encoder self.feature_grad_mult: float = 1.0 # multiply feature extractor var grads by this diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index 8810554421..ca25b302c5 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -234,8 +234,12 @@ def test_is_available(self): class TestBN_Phonemizer(unittest.TestCase): def setUp(self): self.phonemizer = BN_Phonemizer() - self._TEST_CASES = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন" - self._EXPECTED = "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।" + self._TEST_CASES = ( + "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে, কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয়, তখনও যেন" + ) + self._EXPECTED = ( + "রাসূলুল্লাহ সাল্লাল্লাহু আলাইহি ওয়া সাল্লাম শিক্ষা দিয়েছেন যে কেউ যদি কোন খারাপ কিছুর সম্মুখীন হয় তখনও যেন।" + ) def test_phonemize(self): self.assertEqual(self.phonemizer.phonemize(self._TEST_CASES, separator=""), self._EXPECTED) diff --git a/tests/vc_tests/test_freevc.py b/tests/vc_tests/test_freevc.py index 3755ab3f06..c9e6cedf11 100644 --- a/tests/vc_tests/test_freevc.py +++ b/tests/vc_tests/test_freevc.py @@ -115,20 +115,14 @@ def test_voice_conversion(self): output_wav.shape[0] + config.audio.hop_length == source_wav.shape[0] ), f"{output_wav.shape} != {source_wav.shape}" - def test_train_step(self): - ... + def test_train_step(self): ... - def test_train_eval_log(self): - ... + def test_train_eval_log(self): ... - def test_test_run(self): - ... + def test_test_run(self): ... - def test_load_checkpoint(self): - ... + def test_load_checkpoint(self): ... - def test_get_criterion(self): - ... + def test_get_criterion(self): ... - def test_init_from_config(self): - ... + def test_init_from_config(self): ... From 309f39a45fd615fc86a78480953bfa6a071b69c8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 8 Mar 2024 14:47:00 +0100 Subject: [PATCH 034/255] fix(xtts_manager): name_to_id() should return dict This is how the other embedding managers work --- TTS/tts/layers/xtts/xtts_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/layers/xtts/xtts_manager.py b/TTS/tts/layers/xtts/xtts_manager.py index 5a28d2a8a6..5560e87687 100644 --- a/TTS/tts/layers/xtts/xtts_manager.py +++ b/TTS/tts/layers/xtts/xtts_manager.py @@ -7,7 +7,7 @@ def __init__(self, speaker_file_path=None): @property def name_to_id(self): - return self.speakers.keys() + return self.speakers @property def num_speakers(self): From dca564a705d2881f2b3d5d0c20ae923b6de0295a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 8 Mar 2024 17:18:56 +0100 Subject: [PATCH 035/255] test(vocoder): disable wavegrad training test in CI --- tests/vocoder_tests/test_wavegrad_train.py | 83 ++++++++++++---------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/tests/vocoder_tests/test_wavegrad_train.py b/tests/vocoder_tests/test_wavegrad_train.py index fe56ee783f..9b10759505 100644 --- a/tests/vocoder_tests/test_wavegrad_train.py +++ b/tests/vocoder_tests/test_wavegrad_train.py @@ -1,43 +1,54 @@ import glob import os import shutil +import unittest from tests import get_device_id, get_tests_output_path, run_cli from TTS.vocoder.configs import WavegradConfig -config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") -output_path = os.path.join(get_tests_output_path(), "train_outputs") - -config = WavegradConfig( - batch_size=8, - eval_batch_size=8, - num_loader_workers=0, - num_eval_loader_workers=0, - run_eval=True, - test_delay_epochs=-1, - epochs=1, - seq_len=8192, - eval_split_size=1, - print_step=1, - print_eval=True, - data_path="tests/data/ljspeech", - output_path=output_path, - test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2}, -) -config.audio.do_trim_silence = True -config.audio.trim_db = 60 -config.save_json(config_path) - -# train the model for one epoch -command_train = f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} " -run_cli(command_train) - -# Find latest folder -continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) - -# restore the model and continue training for one more epoch -command_train = ( - f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} " -) -run_cli(command_train) -shutil.rmtree(continue_path) + +class WavegradTrainingTest(unittest.TestCase): + # TODO: Reactivate after improving CI run times + # This test currently takes ~2h on CI (15min/step vs 8sec/step locally) + if os.getenv("GITHUB_ACTIONS") == "true": + __test__ = False + + def test_train(self): # pylint: disable=no-self-use + config_path = os.path.join(get_tests_output_path(), "test_vocoder_config.json") + output_path = os.path.join(get_tests_output_path(), "train_outputs") + + config = WavegradConfig( + batch_size=8, + eval_batch_size=8, + num_loader_workers=0, + num_eval_loader_workers=0, + run_eval=True, + test_delay_epochs=-1, + epochs=1, + seq_len=8192, + eval_split_size=1, + print_step=1, + print_eval=True, + data_path="tests/data/ljspeech", + output_path=output_path, + test_noise_schedule={"min_val": 1e-6, "max_val": 1e-2, "num_steps": 2}, + ) + config.audio.do_trim_silence = True + config.audio.trim_db = 60 + config.save_json(config_path) + + # train the model for one epoch + command_train = ( + f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --config_path {config_path} " + ) + run_cli(command_train) + + # Find latest folder + continue_path = max(glob.glob(os.path.join(output_path, "*/")), key=os.path.getmtime) + + # restore the model and continue training for one more epoch + command_train = ( + f"CUDA_VISIBLE_DEVICES='{get_device_id()}' python TTS/bin/train_vocoder.py --continue_path {continue_path} " + ) + run_cli(command_train) + shutil.rmtree(continue_path) From d80f7f4ebabc9984fbd75880f96bc48114a59471 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 9 Mar 2024 16:43:42 +0100 Subject: [PATCH 036/255] Fix fairseq (#11) * fix fairseq mode * Added line to fix fairseq model issue and made code cleaner. --------- Co-authored-by: akgupta1337 --- TTS/utils/manage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 32f1779c64..ca16183d37 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -260,8 +260,7 @@ def set_model_url(model_item: Dict): def _set_model_item(self, model_name): # fetch model info from the dict if "fairseq" in model_name: - model_type = "tts_models" - lang = model_name.split("/")[1] + model_type, lang, dataset, model = model_name.split("/") model_item = { "model_type": "tts_models", "license": "CC BY-NC 4.0", From 26ef3200c737ed6dabc6548ff0a52ec4e0f2a051 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 9 Mar 2024 18:05:24 +0100 Subject: [PATCH 037/255] build: remove unused dependencies --- requirements.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5735cd3bfd..a543a421b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,8 +7,7 @@ torch>=2.1 torchaudio soundfile>=0.12.0 librosa>=0.10.0 -scikit-learn>=1.3.0 -numba>=0.57.0;python_version>="3.9" +numba>=0.57.0 inflect>=5.6.0 tqdm>=4.64.1 anyascii>=0.3.0 @@ -38,7 +37,6 @@ hangul_romanize gruut[de,es,fr]==2.2.3 # deps for korean jamo -nltk g2pkk>=0.1.1 # deps for bangla bangla @@ -50,6 +48,5 @@ transformers>=4.33.0 #deps for bark encodec>=0.1.1 # deps for XTTS -unidecode>=1.3.2 num2words spacy[ja]>=3 From 24cf471406c557a0e41152b76ae2c07283580095 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Mar 2024 10:29:09 +0100 Subject: [PATCH 038/255] build: install aiohttp through fsspec extra --- requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a543a421b9..0278d27e3f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,8 +12,7 @@ inflect>=5.6.0 tqdm>=4.64.1 anyascii>=0.3.0 pyyaml>=6.0 -fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail -aiohttp>=3.8.1 +fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail packaging>=23.1 mutagen==1.47.0 # deps for examples From c59f0ca1ce5113c650a123fbe9634b3c27c42cc7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Mar 2024 11:01:57 +0100 Subject: [PATCH 039/255] ci: switch from pip to uv --- .github/workflows/aux_tests.yml | 4 ++-- .github/workflows/data_tests.yml | 4 ++-- .github/workflows/inference_tests.yml | 4 ++-- .github/workflows/text_tests.yml | 4 ++-- .github/workflows/tts_tests.yml | 4 ++-- .github/workflows/tts_tests2.yml | 4 ++-- .github/workflows/vocoder_tests.yml | 4 ++-- .github/workflows/xtts_tests.yml | 4 ++-- .github/workflows/zoo_tests0.yml | 4 ++-- .github/workflows/zoo_tests1.yml | 4 ++-- .github/workflows/zoo_tests2.yml | 4 ++-- 11 files changed, 22 insertions(+), 22 deletions(-) diff --git a/.github/workflows/aux_tests.yml b/.github/workflows/aux_tests.yml index ceb0c64016..d7612d6d1d 100644 --- a/.github/workflows/aux_tests.yml +++ b/.github/workflows/aux_tests.yml @@ -33,13 +33,13 @@ jobs: sudo apt-get install -y git make gcc make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make test_aux diff --git a/.github/workflows/data_tests.yml b/.github/workflows/data_tests.yml index d5bfef3f77..c1b7e50d19 100644 --- a/.github/workflows/data_tests.yml +++ b/.github/workflows/data_tests.yml @@ -33,13 +33,13 @@ jobs: sudo apt-get install -y --no-install-recommends git make gcc make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make data_tests diff --git a/.github/workflows/inference_tests.yml b/.github/workflows/inference_tests.yml index d7a01adf2c..a396add216 100644 --- a/.github/workflows/inference_tests.yml +++ b/.github/workflows/inference_tests.yml @@ -35,13 +35,13 @@ jobs: sudo apt-get install espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make inference_tests diff --git a/.github/workflows/text_tests.yml b/.github/workflows/text_tests.yml index 4bcb685887..adfba0260c 100644 --- a/.github/workflows/text_tests.yml +++ b/.github/workflows/text_tests.yml @@ -35,10 +35,10 @@ jobs: sudo apt-get install espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make test_text diff --git a/.github/workflows/tts_tests.yml b/.github/workflows/tts_tests.yml index 60940ba8aa..72fa0a9f29 100644 --- a/.github/workflows/tts_tests.yml +++ b/.github/workflows/tts_tests.yml @@ -35,13 +35,13 @@ jobs: sudo apt-get install espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make test_tts diff --git a/.github/workflows/tts_tests2.yml b/.github/workflows/tts_tests2.yml index 17cb3f2f56..0ac42993d6 100644 --- a/.github/workflows/tts_tests2.yml +++ b/.github/workflows/tts_tests2.yml @@ -35,13 +35,13 @@ jobs: sudo apt-get install espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make test_tts2 diff --git a/.github/workflows/vocoder_tests.yml b/.github/workflows/vocoder_tests.yml index 778529b298..b877942a8f 100644 --- a/.github/workflows/vocoder_tests.yml +++ b/.github/workflows/vocoder_tests.yml @@ -33,10 +33,10 @@ jobs: sudo apt-get install -y git make gcc make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make test_vocoder diff --git a/.github/workflows/xtts_tests.yml b/.github/workflows/xtts_tests.yml index 99cec287eb..08093e578b 100644 --- a/.github/workflows/xtts_tests.yml +++ b/.github/workflows/xtts_tests.yml @@ -35,13 +35,13 @@ jobs: sudo apt-get install espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: make test_xtts diff --git a/.github/workflows/zoo_tests0.yml b/.github/workflows/zoo_tests0.yml index 69deb884ef..3337d375a2 100644 --- a/.github/workflows/zoo_tests0.yml +++ b/.github/workflows/zoo_tests0.yml @@ -34,13 +34,13 @@ jobs: sudo apt-get install espeak espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: | diff --git a/.github/workflows/zoo_tests1.yml b/.github/workflows/zoo_tests1.yml index 7a10c6af70..03147423a1 100644 --- a/.github/workflows/zoo_tests1.yml +++ b/.github/workflows/zoo_tests1.yml @@ -34,14 +34,14 @@ jobs: sudo apt-get install espeak espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\/hf\/bark\//https:\/\/huggingface.co\/erogol\/bark\/resolve\/main\//g' TTS/.models.json sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_1_step_3 diff --git a/.github/workflows/zoo_tests2.yml b/.github/workflows/zoo_tests2.yml index 0e0392f70a..8111860bdc 100644 --- a/.github/workflows/zoo_tests2.yml +++ b/.github/workflows/zoo_tests2.yml @@ -34,13 +34,13 @@ jobs: sudo apt-get install espeak espeak-ng make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_2_step_3 From 392b20597df653a3739324d66d1c48dfedaf90ec Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Mar 2024 14:55:55 +0100 Subject: [PATCH 040/255] build: update dependencies --- requirements.txt | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0278d27e3f..dd9e93da39 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,11 @@ # core deps -numpy==1.22.0;python_version<="3.10" -numpy>=1.24.3;python_version>"3.10" +numpy>=1.24.3 cython>=0.29.30 scipy>=1.11.2 torch>=2.1 torchaudio soundfile>=0.12.0 -librosa>=0.10.0 -numba>=0.57.0 +librosa>=0.10.1 inflect>=5.6.0 tqdm>=4.64.1 anyascii>=0.3.0 From 7673f282bebcf3331e3ac695f4cc4c0e46ddd3cf Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Mar 2024 20:16:00 +0100 Subject: [PATCH 041/255] build: make dependencies for server optional --- README.md | 2 +- TTS/server/README.md | 5 ++++- TTS/server/server.py | 5 ++++- docs/source/inference.md | 6 ++++-- docs/source/installation.md | 4 ++-- docs/source/tutorial_for_nervous_beginners.md | 5 +++-- requirements.txt | 2 -- setup.py | 4 +++- 8 files changed, 21 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e3205c1bd3..e11da3e788 100644 --- a/README.md +++ b/README.md @@ -156,7 +156,7 @@ If you plan to code or train models, clone 🐸TTS and install it locally. ```bash git clone https://github.com/coqui-ai/TTS -pip install -e .[all,dev,notebooks] # Select the relevant extras +pip install -e .[all,dev,notebooks,server] # Select the relevant extras ``` If you are on Ubuntu (Debian), you can also run following commands for installation. diff --git a/TTS/server/README.md b/TTS/server/README.md index 270656c4e3..9536e0d55a 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -1,5 +1,8 @@ # :frog: TTS demo server -Before you use the server, make sure you [install](https://github.com/coqui-ai/TTS/tree/dev#install-tts)) :frog: TTS properly. Then, you can follow the steps below. +Before you use the server, make sure you +[install](https://github.com/coqui-ai/TTS/tree/dev#install-tts)) :frog: TTS +properly and install the additional dependencies with `pip install +TTS[server]`. Then, you can follow the steps below. **Note:** If you install :frog:TTS using ```pip```, you can also use the ```tts-server``` end point on the terminal. diff --git a/TTS/server/server.py b/TTS/server/server.py index 6b2141a9aa..d117494060 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -9,7 +9,10 @@ from typing import Union from urllib.parse import parse_qs -from flask import Flask, render_template, render_template_string, request, send_file +try: + from flask import Flask, render_template, render_template_string, request, send_file +except ImportError as e: + raise ImportError("Server requires requires flask, use `pip install TTS[server]`.") from e from TTS.config import load_config from TTS.utils.manage import ModelManager diff --git a/docs/source/inference.md b/docs/source/inference.md index 56bccfb5b2..2c57f6182c 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -84,8 +84,10 @@ tts --model_name "voice_conversion///" ![server.gif](https://github.com/coqui-ai/TTS/raw/main/images/demo_server.gif) -You can boot up a demo 🐸TTS server to run an inference with your models. Note that the server is not optimized for performance -but gives you an easy way to interact with the models. +You can boot up a demo 🐸TTS server to run an inference with your models (make +sure to install the additional dependencies with `pip install TTS[server]`). +Note that the server is not optimized for performance but gives you an easy way +to interact with the models. The demo server provides pretty much the same interface as the CLI command. diff --git a/docs/source/installation.md b/docs/source/installation.md index c4d05361f4..8aaec01c9e 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,6 +1,6 @@ # Installation -🐸TTS supports python >=3.7 <3.11.0 and tested on Ubuntu 18.10, 19.10, 20.10. +🐸TTS supports python >=3.9 <3.12.0 and tested on Ubuntu 18.10, 19.10, 20.10. ## Using `pip` @@ -30,4 +30,4 @@ make install ``` ## On Windows -If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/ \ No newline at end of file +If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/ diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index acde3fc4c2..db753e801b 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -112,8 +112,9 @@ $ tts --list_models # list the available models. ![cli.gif](https://github.com/coqui-ai/TTS/raw/main/images/tts_cli.gif) -You can call `tts-server` to start a local demo server that you can open it on -your favorite web browser and 🗣️. +You can call `tts-server` to start a local demo server that you can open on +your favorite web browser and 🗣️ (make sure to install the additional +dependencies with `pip install TTS[server]`). ```bash $ tts-server -h # see the help diff --git a/requirements.txt b/requirements.txt index dd9e93da39..6d5fbc245f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,8 +13,6 @@ pyyaml>=6.0 fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail packaging>=23.1 mutagen==1.47.0 -# deps for examples -flask>=2.0.1 # deps for inference pysbd>=0.3.4 # deps for notebooks diff --git a/setup.py b/setup.py index b01b655877..2465f1a6b0 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,8 @@ def pip_install(package_name): requirements_dev = f.readlines() with open(os.path.join(cwd, "requirements.ja.txt"), "r") as f: requirements_ja = f.readlines() -requirements_all = requirements_dev + requirements_notebooks + requirements_ja +requirements_server = ["flask>=2.0.1"] +requirements_all = requirements_dev + requirements_notebooks + requirements_ja + requirements_server with open("README.md", "r", encoding="utf-8") as readme_file: README = readme_file.read() @@ -115,6 +116,7 @@ def pip_install(package_name): "all": requirements_all, "dev": requirements_dev, "notebooks": requirements_notebooks, + "server": requirements_server, "ja": requirements_ja, }, python_requires=">=3.9.0, <3.12", From 31a9201639bdaff4b5c1ce5971930a632578148e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 11 Mar 2024 11:24:23 +0100 Subject: [PATCH 042/255] ci: combine coverage reports Based on https://hynek.me/articles/ditch-codecov-python/ --- .github/workflows/vocoder_tests.yml | 29 ++++++++++++++++++++++++++++- Makefile | 22 +++++++++++----------- pyproject.toml | 4 ++++ requirements.dev.txt | 2 +- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/.github/workflows/vocoder_tests.yml b/.github/workflows/vocoder_tests.yml index b877942a8f..62dcbb5934 100644 --- a/.github/workflows/vocoder_tests.yml +++ b/.github/workflows/vocoder_tests.yml @@ -7,7 +7,7 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - test: + vocoder-tests: runs-on: ubuntu-latest strategy: fail-fast: false @@ -40,3 +40,30 @@ jobs: python3 setup.py egg_info - name: Unit tests run: make test_vocoder + - name: Upload coverage data + uses: actions/upload-artifact@v4 + with: + name: coverage-data-${{ github.job }}-${{ matrix.python-version }} + path: .coverage.* + if-no-files-found: ignore + coverage: + if: always() + needs: vocoder-tests + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - uses: actions/download-artifact@v4 + with: + pattern: coverage-data-* + merge-multiple: true + - name: Combine coverage + run: | + python -Im pip install --upgrade coverage[toml] + + python -Im coverage combine + python -Im coverage html --skip-covered --skip-empty + + python -Im coverage report --format=markdown >> $GITHUB_STEP_SUMMARY diff --git a/Makefile b/Makefile index ac0f793b13..8b704e69df 100644 --- a/Makefile +++ b/Makefile @@ -11,38 +11,38 @@ test_all: ## run tests and don't stop on an error. ./run_bash_tests.sh test: ## run tests. - nose2 -F -v -B --with-coverage --coverage TTS tests + coverage run -m nose2 -F -v -B tests test_vocoder: ## run vocoder tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.vocoder_tests + coverage run -m nose2 -F -v -B tests.vocoder_tests test_tts: ## run tts tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.tts_tests + coverage run -m nose2 -F -v -B tests.tts_tests test_tts2: ## run tts tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.tts_tests2 + coverage run -m nose2 -F -v -B tests.tts_tests2 test_xtts: - nose2 -F -v -B --with-coverage --coverage TTS tests.xtts_tests + coverage run -m nose2 -F -v -B tests.xtts_tests test_aux: ## run aux tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.aux_tests + coverage run -m nose2 -F -v -B tests.aux_tests ./run_bash_tests.sh test_zoo: ## run zoo tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests + coverage run -m nose2 -F -v -B tests.zoo_tests inference_tests: ## run inference tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.inference_tests + coverage run -m nose2 -F -v -B tests.inference_tests data_tests: ## run data tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.data_tests + coverage run -m nose2 -F -v -B tests.data_tests test_text: ## run text tests. - nose2 -F -v -B --with-coverage --coverage TTS tests.text_tests + coverage run -m nose2 -F -v -B tests.text_tests test_failed: ## only run tests failed the last time. - nose2 -F -v -B --with-coverage --coverage TTS tests + coverage run -m nose2 -F -v -B tests style: ## update code style. black ${target_dirs} diff --git a/pyproject.toml b/pyproject.toml index b9902fc372..50d67db97d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,3 +64,7 @@ max-returns = 7 [tool.black] line-length = 120 target-version = ['py39'] + +[tool.coverage.run] +parallel = true +source = ["TTS"] diff --git a/requirements.dev.txt b/requirements.dev.txt index 68450fcad2..7f76b2400a 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,4 +1,4 @@ black==24.2.0 -coverage +coverage[toml] nose2 ruff==0.3.0 From 3453bd960b99fdb2e8ca7bebf2ca3541d6a49a75 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 11 Mar 2024 14:08:27 +0100 Subject: [PATCH 043/255] ci: merge separate test workflows into one matrix --- .github/workflows/aux_tests.yml | 45 ----------------- .github/workflows/data_tests.yml | 45 ----------------- .github/workflows/inference_tests.yml | 47 ------------------ .../{vocoder_tests.yml => tests.yml} | 26 ++++++---- .github/workflows/text_tests.yml | 44 ----------------- .github/workflows/tts_tests.yml | 47 ------------------ .github/workflows/tts_tests2.yml | 47 ------------------ .github/workflows/xtts_tests.yml | 47 ------------------ .github/workflows/zoo_tests0.yml | 48 ------------------- .github/workflows/zoo_tests1.yml | 47 ------------------ .github/workflows/zoo_tests2.yml | 46 ------------------ Makefile | 9 +++- 12 files changed, 24 insertions(+), 474 deletions(-) delete mode 100644 .github/workflows/aux_tests.yml delete mode 100644 .github/workflows/data_tests.yml delete mode 100644 .github/workflows/inference_tests.yml rename .github/workflows/{vocoder_tests.yml => tests.yml} (61%) delete mode 100644 .github/workflows/text_tests.yml delete mode 100644 .github/workflows/tts_tests.yml delete mode 100644 .github/workflows/tts_tests2.yml delete mode 100644 .github/workflows/xtts_tests.yml delete mode 100644 .github/workflows/zoo_tests0.yml delete mode 100644 .github/workflows/zoo_tests1.yml delete mode 100644 .github/workflows/zoo_tests2.yml diff --git a/.github/workflows/aux_tests.yml b/.github/workflows/aux_tests.yml deleted file mode 100644 index d7612d6d1d..0000000000 --- a/.github/workflows/aux_tests.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: aux-tests - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y git make gcc - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make test_aux diff --git a/.github/workflows/data_tests.yml b/.github/workflows/data_tests.yml deleted file mode 100644 index c1b7e50d19..0000000000 --- a/.github/workflows/data_tests.yml +++ /dev/null @@ -1,45 +0,0 @@ -name: data-tests - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make data_tests diff --git a/.github/workflows/inference_tests.yml b/.github/workflows/inference_tests.yml deleted file mode 100644 index a396add216..0000000000 --- a/.github/workflows/inference_tests.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: inference_tests - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: | - export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - sudo apt-get install espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make inference_tests diff --git a/.github/workflows/vocoder_tests.yml b/.github/workflows/tests.yml similarity index 61% rename from .github/workflows/vocoder_tests.yml rename to .github/workflows/tests.yml index 62dcbb5934..da5352d1fc 100644 --- a/.github/workflows/vocoder_tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: vocoder-tests +name: tests on: push: @@ -7,17 +7,17 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - vocoder-tests: + test: runs-on: ubuntu-latest strategy: fail-fast: false matrix: python-version: [3.9, "3.10", "3.11"] - experimental: [false] + subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} architecture: x64 @@ -27,28 +27,36 @@ jobs: run: cat /etc/os-release - name: set ENV run: export TRAINER_TELEMETRY=0 + - name: Install Espeak + if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + run: | + sudo apt-get install espeak espeak-ng - name: Install dependencies run: | sudo apt-get update - sudo apt-get install -y git make gcc + sudo apt-get install -y --no-install-recommends git make gcc make system-deps - name: Install/upgrade Python setup deps run: python3 -m pip install --upgrade pip setuptools wheel uv + - name: Replace scarf urls + if: contains(fromJSON('["data_tests", "inference_tests", "test_aux", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + run: | + sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | python3 -m uv pip install --system "TTS[all] @ ." python3 setup.py egg_info - name: Unit tests - run: make test_vocoder + run: make ${{ matrix.subset }} - name: Upload coverage data uses: actions/upload-artifact@v4 with: - name: coverage-data-${{ github.job }}-${{ matrix.python-version }} + name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} path: .coverage.* if-no-files-found: ignore coverage: if: always() - needs: vocoder-tests + needs: test runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/text_tests.yml b/.github/workflows/text_tests.yml deleted file mode 100644 index adfba0260c..0000000000 --- a/.github/workflows/text_tests.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: text-tests - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - sudo apt-get install espeak - sudo apt-get install espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make test_text diff --git a/.github/workflows/tts_tests.yml b/.github/workflows/tts_tests.yml deleted file mode 100644 index 72fa0a9f29..0000000000 --- a/.github/workflows/tts_tests.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: tts-tests - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - sudo apt-get install espeak - sudo apt-get install espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make test_tts diff --git a/.github/workflows/tts_tests2.yml b/.github/workflows/tts_tests2.yml deleted file mode 100644 index 0ac42993d6..0000000000 --- a/.github/workflows/tts_tests2.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: tts-tests2 - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - sudo apt-get install espeak - sudo apt-get install espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make test_tts2 diff --git a/.github/workflows/xtts_tests.yml b/.github/workflows/xtts_tests.yml deleted file mode 100644 index 08093e578b..0000000000 --- a/.github/workflows/xtts_tests.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: xtts-tests - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - sudo apt-get install espeak - sudo apt-get install espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: make test_xtts diff --git a/.github/workflows/zoo_tests0.yml b/.github/workflows/zoo_tests0.yml deleted file mode 100644 index 3337d375a2..0000000000 --- a/.github/workflows/zoo_tests0.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: zoo-tests-0 - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y git make gcc - sudo apt-get install espeak espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: | - nose2 -F -v -B TTS tests.zoo_tests.test_models.test_models_offset_0_step_3 - nose2 -F -v -B TTS tests.zoo_tests.test_models.test_voice_conversion diff --git a/.github/workflows/zoo_tests1.yml b/.github/workflows/zoo_tests1.yml deleted file mode 100644 index 03147423a1..0000000000 --- a/.github/workflows/zoo_tests1.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: zoo-tests-1 - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y git make gcc - sudo apt-get install espeak espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\/hf\/bark\//https:\/\/huggingface.co\/erogol\/bark\/resolve\/main\//g' TTS/.models.json - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_1_step_3 diff --git a/.github/workflows/zoo_tests2.yml b/.github/workflows/zoo_tests2.yml deleted file mode 100644 index 8111860bdc..0000000000 --- a/.github/workflows/zoo_tests2.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: zoo-tests-2 - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.9, "3.10", "3.11"] - experimental: [false] - steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y git make gcc - sudo apt-get install espeak espeak-ng - make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - - name: Replace scarf urls - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS - run: | - python3 -m uv pip install --system "TTS[all] @ ." - python3 setup.py egg_info - - name: Unit tests - run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_2_step_3 diff --git a/Makefile b/Makefile index 8b704e69df..a24c41fc0b 100644 --- a/Makefile +++ b/Makefile @@ -29,8 +29,13 @@ test_aux: ## run aux tests. coverage run -m nose2 -F -v -B tests.aux_tests ./run_bash_tests.sh -test_zoo: ## run zoo tests. - coverage run -m nose2 -F -v -B tests.zoo_tests +test_zoo0: ## run zoo tests. + coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_0_step_3 \ + tests.zoo_tests.test_models.test_voice_conversion +test_zoo1: ## run zoo tests. + coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_1_step_3 +test_zoo2: ## run zoo tests. + coverage run -m nose2 -F -v -B tests.zoo_tests.test_models.test_models_offset_2_step_3 inference_tests: ## run inference tests. coverage run -m nose2 -F -v -B tests.inference_tests From 34e5c7dc591dd984c1c242250fd6c5340992fc50 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 11 Mar 2024 16:31:37 +0100 Subject: [PATCH 044/255] docs(README): remove obsolete performance results --- README.md | 6 ------ images/TTS-performance.png | Bin 48047 -> 0 bytes images/tts_performance.png | Bin 197490 -> 0 bytes 3 files changed, 6 deletions(-) delete mode 100644 images/TTS-performance.png delete mode 100644 images/tts_performance.png diff --git a/README.md b/README.md index e11da3e788..d6d6521d40 100644 --- a/README.md +++ b/README.md @@ -73,12 +73,6 @@ Please use our dedicated channels for questions and discussion. Help is much mor | 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)| | 📰 **Papers** | [TTS Papers](https://github.com/erogol/TTS-papers)| - -## 🥇 TTS Performance -

- -Underlined "TTS*" and "Judy*" are **internal** 🐸TTS models that are not released open-source. They are here to show the potential. Models prefixed with a dot (.Jofish .Abe and .Janice) are real human voices. - ## Features - High-performance Deep Learning models for Text2Speech tasks. - Text2Spec models (Tacotron, Tacotron2, Glow-TTS, SpeedySpeech). diff --git a/images/TTS-performance.png b/images/TTS-performance.png deleted file mode 100644 index 68eebaf7e6dd503333f2bb8b85e0bd4115c2011f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 48047 zcmeFYcT`kOmoDCfCN(+ONR}L%oO6<#5fmHANm7%egeC_81w=APQUL)$$)Q0&MIuSI z)sFc|*VA98C*9|J{HI57#z_meec7KJPt|9)^Jx$AX-keTn9rvygGc`Se0Z@yxqR{{ zRPHJ&JXH5;>2SsBYI8+%-*--ut0z_I&pD&fIZnzxe}l*|d+QYhy59dxs49@-d@3>Q zyh!3iKJn`XA5VMRnU+c5Ien2l8{33m)WTDF+>wy>%dR8klfE-~ac}-lq5hs`^0VFY z?XwP!p}F6yLXWnVw`6BG?uscrmQOtu?LMN1>MG^U^d?q)ZSOofc;eHRRTK#2nhi0i zP2CQPEnbSI^QE4P-g{(vpHQ%uf~QIlMqYAc_O_-sy@e(7fyG=;R^YEU&+1hJDYkoN zZSc1nFJ#>Wzij%Uzfw0m(C3o4I11f8&~x{$(v-sn*Y-krdZ_MU6DntpT)|b9Ixp_2 zErE~6#w@NltKtY>K$m;4WAbPD?D6G4lt^jv55++sRq{**M*~E7WCJilc2*%YUokoF zF^vsTxP;gr;Rb^Qu3ilSB`y!)uP=>+o{zlB>vwqUKh{IuUXS7>H+Wv6`8befksHfp zRImRj4Gmj5T(ibdi>H-P>qX_u2cqG#uiq@2z5QYKaMP!5)RU**%(ef+%-v*JjHHr> zRjmgzz39RSX%acv2Ft|T4p{*&oNB*|ozW%yc@U6pSN?9<#KNXGS4^VDb!^&qb|F%k zD>wLSjgg-8MWWW5Y41J0F}K#uL+KC)z6Sov(6iHR2fp*KX2F@mQ3rlMj~y%9-N>*x zR3%w#1aXBPrDG6CaXYj{HoZr?Zk5}l|?7oxP zuMf}DCCf%HENs7X`CPGjg$_*0fBtoAk~e>(`8O}mfUnUv*CEMHHRqbcM^TOk zvX1)1T^A=4S7&3P9z`$he@D7+JgF!Xc~cTh{bR>2g1~{+v@W#?GVS?T1&^{p&hEML z&_@Z~&GNijN`seHsjF>PW7#hrJVFau$=Sh}86BdW7Tyc@WTjF@%yW#X{yBFNP9i*{ z_;OTDm|8~p?rAzYQZO3D;D~*prbvx~|ajxAbWp`yNN{x88ji|5YV3doa_kxBpz%2Irg(15;Is#7%`g z;5SF=*9CbEmZ3Y(mf%|lZsdN!EQ=mT#_O+PkGFXSxdg53ci!&biVrO(tuQf~}_56&_vHs{A_2JKlUV%r^ zP&3^4)=do0WK#nh*0->2!8jus`lq5YAw|RWnYc@-vym68vQ(4r@(2HXbi|w*R-Dfg z9$^1c=eUO7@&h*zw12RX!N1?R8E?J4?VHHCH%X6sv>$XBGQ!JIuH5(SW1sru-X zYX7+Dd2G?iS0X|vj2DMf&g{7Ccxnj_wl!4??J_^yDOa)9nNp$0QS};-L95iU_ZIua zdtbmZ1m5@FldTV_Tk(SE(M=vpKa+oG9i8~-g}+o+*^9blq8jS>{-7|SYvCtCv?5{X z*i%^qRVeg5ljD${R98dExiGTVJa&BTPQl2_f$=@VHoGusJVNm!aB?B1koZr<=;KG> zOzNG6kNs}fNcrLlHh&9!KL6_(X2`4xG4v1LOq=-}M&Dh0fqncim*OV!Zc&&pZ==v8 zd9R8%?Q6OC!3=$)Lf?f_(m;t4UuZ^^9ewqAIRQ;eu^7jws5JF0eA^4 z1fnSp<~AI3+M|cYqxZb-+!i-_hX$w3e%)GGMDfYAA-`-s6B!)U6C!U5FVQG`>go{D zk+gCfU%{$?QT^xlp^?mv)i8NZt$5jSI?WTak|has`}^KRj_L0?1aJ9yI6e>ovtd&6 za)xQ3^wd+mn!>FU@i2%8?l4AY;9&$QDq35te6cirWy;rVL+GK4=X5qy{3NUhs}LEX zoJioV{U#&)+t&_|tIDm?d6QpKc(|--L>ahWW11d5)mjt8I<Bk>5<3cof~su2Q-o;e8`zd8F|omte5~$oR|M z;n5eu(`44BtR@%wJqfM`zNcJ!-hqxGuV41Y#1meo4gc1tE&qebjCAP?c1GBQ)O`p;}ny_W*y27lqPxQ=@8WNEVCO!3M18P-(*6E(R_mFYPM@}HbpzM zuxCyD)4Zy-1yfS7bPZ~uqW=uf^AlOlbJJAZGOemz!r=nH@XfPhUG1o1ZjU?h7zAli z$`+@3bAktd^KOar9%D z(0m9JZVcJ+kcsTz$FN=Y<$NAZ*RaNW<&`1Mf-P;fwS-512~IPqpzaq)SUfodt<$Bb zD733_<^D3z%{5?bSq;Ap;^Y|0U@;F!7boSd!f=lvO@6^}Op1q%wG)8i*TMCSZGZ9- zav$7ofW*3`__c!Hixcx3cSc_nj~_uqR)DYww|6V1p_ z*W>)!OI$K833(=!9Zq%il$ZHACv&+g&N8Aa43jK$q&hoR?tKf%uf!=f=6Ixh|Bz65 z_P&8jwer#GW^j6bu%OCqR+{M$seXOuhqYhwZ~kOYuBFDiD?ZL!GJJ&jojdGVjf(d{eEs{xQA~sr8PXEewe}ig zkvbkur1rr|B7mKN=CP-voDh}jA}4K5x5A-F)0IN_Xh&z+@Gb2!^Co5x5f+wamh+=X z<+rb0i^fveg>MahKvJ6}tNRBKw%np@F3Veb)Zl4qQJiBsG8MUcP6?J%+m^QBO_S1u ze2!S~ZX9im$7dtnGR)f_Cc<57l_`FTSNjQ!sMAk&aiN5$tKK06 zG4MNmh09*1&y@Bd*4XUBo58l9o;yvVM4i}W3!Tc*9dBRG&~gsqG#Dt1Yii%GsL6Zy z?4DA3`+Z(-RUerR;U>k9)k`Mwm(BXG9qF3eqJ9=I1US)=E9;Satb4^TWGs1JEj5M- zrNO3iL|30yW(?Zu?%Yijp4rIc2xL$ukI5$^A+Fo%*5C6rJA(|bTbV(@4{sI46s^DV zVBk~BaT8tAJAk^8g27 z|GFC0pmPbkqkkB&C%Ne@zmjn*+hx1j&w)?b7^1(Pi$}N5W_JAWc5={9$;sI$NJX(a zb=*F}U#-q5}Dm{Fw%WMi(FI_fyqv(jS68H`wHY2gR(hOKh!Wekb!fTw#$uLP=`Pn8`z>5Ay<;b{>M|02YT0G~cCBoNDV$e0 z(i?)zc7`oY?(|_;6iO8Ko(W2ooCn3K85k$-G7cfFbDt;3ZF{_!soqTe@ddZzv3xr; z|M?!feXEinRk`5)`JP6~RB2{PNGq7%$Oi-5Q?MjVT46+PmOiv@0x3U=o>2IxN&YK@ zTz~ZYaeb6TwDNH=COg(6YHfD&GDHFwokpz5Yu9nSB6sVdxc(k~3`>2^icj90$<3HO zBx3pNhP>5R@Zf3i-chZ+VZ)c{qg1j5aqjthu z8^{%IEBd4qJ#jKlUr4<(~Mq6NOIdLwh*rfU@nCU$hyBl8w62##+R^bXvW244( zLnjeascpQ^B5%`hE(ALr%LtivRxD}O;~tMFR4f|MMmgH0kHp7dVouEQM$*c=ae%zbax{RKXPgX6Cko_AG5uZW&Vt*88I2| zU8wqDt1s5%&#%;u5SFzMy4c#e?r#uscOq7AIgQne@w7+jHHvhrh!DjWl+}!RzZMj5 z(6C<$bF#GUmiOhMeErh}6jbrYRln(H@4L+j$21pzTXK(lBH@fYkCAx_A}@>yuHeDT z)#!*!m#%E%i7~eFjJGT6mP+M#yL^>9DN|d4@x?jBYTpG?zDLQs8Gs$Am+lz%&ZayMRy9|c)4p*9$+)xGZBD`8;}Ox8EW@#bq&hBsB{YBg4@_QZ;njmi6($)3(9q z`6jnZzXfjG_SG?%;-c<|)s4Us4545%Bz;#Ofoft7b!T*Nm3^3*M9eu7%!WQI6_EJ`kU+ERt+fV}P{|JRVa0YB}cC z#LfcbXHGNJ4LKAu3AKLoJLNtupXh0xKkj}_8l#2Xt5_FiXZI&Ieb@`Vb)Lk|iukzr zR6F}ApS>AIJ8g&WJ+V$?N6M0HE)n7Apzz{uW95ViQJYC@3%>S(Vre@*tUT|Jl)US@ z>lSuecj08#O01*Cw^PQz)VckJ>j+DdhX;m!Y4VAZDTe3;+c(??ULb_Xc5w?~yOP$b zOYAwgvScY+XrvS&^ST__XkdmR)b3mXoe8h#f4T1`&67xxtZdfj8|B` zMyT&>r0Br8(&0j}mcprx>$BbO1ilxDU#o%7^r}&9!*I4kMARz#ZuIe6ImHb;T>Exr z(vDqGm0oZCJ9ibnY2q8J{o>c2Qo4nuAib1rPUTK969+TqvF&+M;?(L?v$R zjhFN)VGVMRUmGS&VM&}{)=D8URWFzuNNS<*hptE60dzb&o}yA4kWptGkr!?R^4;{U zD3PTtjcZ%+Y;2>@#tI)O7Ofp67y}6*TD4#98$6R%62Y@&N$qp;9u+D`5%4F9PN1p% z$~Nk5@a!;rWO+~sFEJbaHJy$(rhPOk@gpXa{G##U-L<=}saO+^rO)aJ(c8llpeEw} zIA|1xR)5BDvOn2LUS!L$86sVnVE%E$7loP5lgDHuSU}2r;E#h4t^C3)@rh3%+Bb)e z#l_$^S6xNOm2VO0!aY14sBw$Jr)M1s*-1k~l%&6PYUt)wVw|{MNhvygFYr0rAzhzL z&uY)a=~efp7f(TrGB9+ho8Nm@moF0|*4Ug>9l2_G{-XNZgtw#PXK3qtC0^e|Vyv;# z5y3$51!pd*0xWO@HrbTGLR;x7-lyo%rth5kysttjd6V#aDXQL*iW+oT7l@P3jfXsr zG-Js%OHH|yUoEOoH5|OXeE+PY<~T5s-|FWtMg02=by%Cq;v*}@Y5F?Sk)ya9-5(J34S*8X+o0a zSJI{!;EXz2Z&JScLKgok0cG7GZc3A$7{u4|O|(?u%|`cV9Z7VS)*1V2?&sQfztk;o z37L=U3R2#0)nu1d^{ETyxYV@5#Kx=lRh2CiJ6}Xaj6~VoF-OqcMy-PrJIaJu%uO=A z;jzSSKoPFGMOqm9Ta6l5KJtU>F%_L{6xsc<;Kk$6)s5dS3l zB;ry38m~G(A2LoMl-hHT$%)RyAOw$mo^bo&-Ir_FkbnQj;wLPujLhBV4()p$buD z|65OMyC$Sm8x2?@$Va&O^om%x`(vqlFFUl>f(XU1i<821RLOkkn@bqAiOb7%NsvPt zlSPb^Pw%K0@N5xv48iO$`kgFW^i^Avi%MXw{WE@hjRq*YJA-5=hAgquQ90e1&!oPX zTT)43j!Q3au1bt7D>ru>TJs{a62*w$(-EwE@vlWIWP0%^4V+1QIF5SBqIKT5!xwkx zX|G>;C)tdcYT(O<*%$oL2t}3#eb3gbkI-crl zMCB0|4bKR|umhrTA}^j3gT$Ih10s5U%+&e3rJM15i5rPX>xP>#6byMpy?$*@+x`-r zdg}#yqK2T#rpcb|(_mCS1xU#c13QUkXuf}6tBiVAu8U*|zu%10=e_N0_=5Ii_=LF} zcBUPh@{*C-lMkJP=G|-l#=^Ntvub4aeCn8w(FtS`sRM0NMBxk_zJ&J}ho`A=2UXYx zO0W#T-O+9x(8m+>kuWwESS0VCUy5ahn0n%sO*W7welz z<*xK_&~E0Su)Uoeoh-h54|mug!HAB)ykAL`n18KhT#9=)B&!VTKQoI&@C(j#58J=7~ul8;0+zL>jC zI<`WziXM6p={8DhPrLpy?Rtk%D%zE|ib@HBqx{A1F*xBhxz2kpVB4#x z8ody9i2b90`$ZxhLH4MV>%CX8!}!v_3GJ$A51*pf#khqE?~wMqtjT)&ol;OVjgsok zUW}oQmJR*AU;B^i8A0ol?eAxmX0;3DnaF4EHW}esfbtedQw%TOu5@4;!Kf!d3X<6m5Q zYil@w1n(q?EZ}?3+tL?D!zt3^gol;tT1x^?v%{G>ny;qsga-LsZ9uO^#zMF6?z5?) zr$aF<;44?1M4!)rPi=Pp;WsuXgtyK0H#XV$KB5i*JmqceA+lJV(1illn3=%U%eSf} zG!yq_Hk$I?w7A3;$YO<~W0s+VO%ctH$-Nt`9BV!KeyKXt*uK>KI{Pqp^$>GE5vmuq zJK_TZVMID9E9+}0EC1U^JwQ#)lV`H(T?%yF79*OfW^@}&^C^W=4@jvz(oA@fQNt#V zKTwOq{EovZ#SIb!@4&e7x5|lO-Jk#ju4z{{S*w&d0!j2?MfBB~DeS)NaQdQ9LaUgWz!OY4fomGjWPzMM3 zfbev6taz-0IA89boJZt6)~Mqg|IPvkxxA;j@5xiI39E32yNPgiMY=n;`eb>J1j?zo zcL_a=4Me{}|G;>1>LtoQ;QXFSZ5J>eHF*y&lZu5{axVyrv zY~8KxVE(QMfEgf=jJ!X>%EsBwo5|YF!O2aQdAqrtnaRmkmf2KPM?eRmZ0G2t9_VRj z6sT)#6X{sRJdlVx`F_C`qY^ZWVv!Tf|_?w$_( zf|8Pw`~pJ!LPC6i1fN%co41uepPLuUHN;;SDt2Bro&cSl+})V2F|DlKeY|CvnSp+$ ze}G?;UDY3G{D*?;_ZxUGZ(Dv1;6(&j4cH4Pp8e~!5}ql1$x;%3CP^xq|IZT=~T@bPrHk+HSm zw{x*`1t#JJAPfFGd2c8Ce>v#it>?Pu|Ck7%?w|PoPWs>Mbu*V6yGp6J+xT27)liXT zzMij?t-Fnrt<=qrHg+}=q9Q^ze1hVV_Ix5DR$_b-;$rrE0^*{=R^kFelGgT;e}mF+ z^YXTGv$4B|0>EKT0FIcDn7x&#umqp5n1nE&h?R&WpOvJY9iO12u%v{osI|R-knP_f z^gNw_NU(DG`>d{^Y_Flj#YKc|1g-dN1%$2nM8rhJ_^btl#Q1D%030z90dX6fzh-mo z2Ps8;4OwO(n7|GAO^d#ZmAAdSrz;R%PHwjDeqR6TGInycGxD~&Hk+WBpopY^gouEM zkhr*nfXKguOzb?p0GeN;3JSo4gl>9lZKUo3m{tJYoLsFO?D!FG4mTawj*$YE1DMw8 zIywO4n|5F|Qp%ooR^INO#_sMevdq_WW4eaC2`VO;e}s#aw!6*Ez#CyZ+v|w?N6aZ% zIq=`S%JBbZ!v6=7k)ykx+y6hDe~JDRMbXpS&)w5m&r{F(v7L?ge~$AX3I7wx5Xg33 z-kt#(|HY#IH#wQV2&)c^b@vSTn|>oZkH6mjN=YtGH>P4@x=904RyKc`-^n9y&VTTa>xK~jl=pMAyC#=_kRYFcFrR>^ zv4D`2h=3G8!N0uCB*TB5VgGHlGS@jsM@Q=K_K>;GQBoS$L^Se2AY7d6Jpa9&{*5>P zA98;i{|}D-SHu4<_Sa}-cSHaX#g5*3es2G%`u{-iF9dBT8#^~I_y26_e;4`7EPpFR zz?}bT0}32aFZusjF#o|4z~cWe|NVn~|1VWAG5wE{|CW9K$6WtouK$(={#(NT!><1^ z*MG|b|1IJFVb}jPa}oUO$Zh8a?ECzH1GTo7gE0t%1=80xR=v8q(tev?T3RX~An?7! zh?kdF>vgVHSq^Ym|MTaMUQLN!Rq@Hm$=TVNPI-Y&MWJ4G$>rtc!NI}d;h}zQnf9B! zy}iBt{e4bO&g0|brlzLo=xE)lB0LK6>FH^FJiPPsb2>UY78VwEcJ_gR0W~!>YinzM zettGKHvRXdPEJnzPBw$3Mz*%LM@L6lSy`AE7!JRu<=cXOWm`uc*gu`4SpamYwYN=hgx zDNj#NJ32ashKA5+^u@&m3R$Kt^))s&e);lcZf@@L z=g+LHtV&8sVPRpanaQcCsmsgDj~+d$s;YuO2qPjQ`uqET{P@A7uB@V>A}%g&XlN)e zFQ1T*u(PwXva%u>;v?wpAS)}&YXe_jU!R$o(S27$bDJh8C@3Hxz}VQhy}iA#u<-Tk z*ZTVU3=9ln0Ur17-%n3Zr<0c!@p0YU-2C+E)9m{P9v&XFvJwL96dKa>1qB7~-@l)n zoK!D-;qLA(=Ihr^#ed&Xv@tEi}`oR%OaCbqD! zP+nectIt^)r8HghpfgAJ?c28s3JPrW#KesBnkCtqnwl9I8G7=J_J&+j)eowxt8bCv z`99(|F)i&~))mW6n;3z99(we%+%?f9#Cr#+kdr}2q zyp{ZRzKW{9&HSW(AAj3ef7f*7^}RKK1%K*eopR^)*)EH=*XJ*-M>*RkxnoS&z_k9a z|2vLC$pWBu(~l0Lnn551SX(N-9|kClv}KlL3ljuF15XDS1PTNGo&p2~9vJYzfu{j@ z5E%awn97CBo1MATA?ZMt^-Y$$8@b9RNhBy&j4<3yDcd*zl&^A9Hq+7lNGALGmyO8Ce&$fN}G;-_~p&zF)iLF9O=p zmkjS_{ZsX+rr0!WM7&Lk<86`rFiUn>Z7G{Cf8z902%&#|dg5NVo8gbV{%W=^!5v2s zy3mu1QRVd#?|0SLblKu95^UKH^3uOkHTRj#X|Jl}#K+3Fdcc}4y{@6+Z!|ZJtR!V< zK>n{2(W!X!9rek~qcIs`EfUV0W%6%7%-3E13b0cazNpGMjjVOpo@#_ZRpzRm9OvI= z3?Xh`{0{zbOfhiIC^zBYl!FQCP-dmHP*#sIebAM&cx;wg7k#0JT}#XDMK-Ida3RaFn(TD%$3+hqP(O}} zOdKKWq*$YMxNPP*PLG`s5__A^zKIL?v))j`VsWda4!`eL(O`V|kUOtWO{C&l?Zj_ie3kf3Gui)-eW*@hg-=($KaqoLvOm(b$t4P zfnaG#S=@b#_1cvhKx)p~2u!TOFPZLkypE%mCAjImi+Iru=JN+%L#MOL*mz*bo+4J7 z(Jzf{lt)SQNI@TCgo0ND9v9tGCtL6*K`1tLjd2p9D6qp^J0tTMTaY?GaEo?T3UC{k zh(iB747CmZEqu0W+5!Sq^HI_`o@LdLEG)eF%mUwOuaZBdUYhW0hXT4L)et)14=622 z&@+by^pov?$dbT5ixBMVuDEUQtH#*Q=F?R1A7J-uqv+n)P~8He=0OT9+LuqUaCo6! z5*ASrLR`tO?ciJLWj400^(Zx1%8xRB5rb#|%~fwo+7?WAP=jj1cs2SW%5gdWekf(Tb`ED&S0T zNcwt`&$$zW{&iS(%K6aI`3~kpUMLpe_sTE5cV2AkaDPgFw1iXx z1etQYa~wPrg^~b%0!x78f7~Z~yMuSLG`{*>!looBDNXM_PljIxBz|QjyJA#Z(F5cw z>`a~PYps)YYvDoNBpmU5F2QUrXeNBR|IL%&dnpcEQqX;y^DF_{pq+cQA@((@yy~EWlIB4Us=7*o_bs?PI z;!S8i_e>{ysuyp^r-ih6Dn2|lA4w`g5_uM*)JQ? zc7D3xJ+e?VaQQn31rGBQilW$F8t~1A$el632K<7LShIQH_O9Q>-){Nrlqj9`j5tFCmkRBFUGxxZNSwZ^Ohb1cMH$&`feo2Q%B&Kr z7|^L0NbrcYO~t35k%~lr*rAlRI9r9-l+i)ukl{59b~|bgmeCkm6!>Tk*&%)ScoAp97ch@*-W~QA;y98Opyp{C_I>6GA zDlM@oS-IW~IA>&AW8l!M(}l#btFNsPCA#S1>AZaAn%_3+=$5+(R0PLu#_hI54Fu_^ z&kKfRVpszs^qeR(H#y{cE9CtShg{#@lIfb`Q)j8INWGm%y-28{4>nW=G5&3hFY@mA zRRmI47dD_@H{87P_t*B1*PLzX@>XH`+EM5;1Jlj6AUky;cCSz)K=!`0WVS$ z0`n(@ix3ul`(|nJ$2o|S>ne!W1_hQL1P|Q*DJ6#su*QWVF5Q8SpvcEhsXXh+?Vzd# zVtDl)eCKG}>docBZu|Ad>0{i3DYqkwpmqMD&T^~$G2=C+a*1Ks@A4JEuiwD#Sl2R7 zso^&A-mfLeh2J)+CVNRy^owz`s(H-TnB1R!Uq%Z@Yryd2f?LNmCmCK(qLIxWs1NZ& z82MTU!x4B-`^Ga~r#TO=Cg&96zcCD;gQFE;)Sb6Y4(A~H^(e640& z$$CLx5y+lMa3P$Z-!MG_YZZ|sPE*~3$+pZhqPiZgSQp(E#2StFY9L}vEH zZ_9iTJ(!L-^0^(p;?7wAzKQ{kCfXM*8=vFwLn!_T@$J_5RnDnx^$L6z+e8IN|A47? zT3j5GP+Od^e@FPeFw~E36?`?-nz{W$iI&d;jm*`azFaw)^th~ufJA>tx9N9X8`>{7 zB~dA}aLRoJSAwx4=N;ggEu_0#U+aYQ)pjY&e|e(azU5Nw2)63UX(qh& zy#M9%K>z3LdKO@43l|;zR7ci=-`&%U>2EwMq3sNwrQa__5vX^*DEV{I(A<2FZKTKf zy0FabG3z7mjEWh>+g&8h_CiT(#K-Lhj&dWd=-e{^ROg zWFb4h`}Wo{OsLFLxdJ}flWOTqZ)}SRP^$w>(g}7Chz!NHnEnQy{peD6-j0vI1=ruw zxBD<@no>)F@b{Kb4y5G>2V~Jm+tq`SkwJm481p%9#ID~N2+3r}0QH#Hsg})JQptt__vRI~<%{A9rYYpg z!MhpgTMvoeX?t5pT+!DY5Mk3od_4zqU_*o-g93k+f4neLOblRjq?%X5_-dGkEj-SK zhGBo;5=i0bCn)f0U}B&M-S$|lKl?liP^agM%2FQj{bDZk?8)0iDFlYQVYBoLtOcMe z*;kP{`bXWpQ*6$k7=vbK1!s;}zrVzRBGQtUJIC)dbU4FzemKl_i%GTq zp3rk!Mpnt5>GmwJ>LJ4E$$H-^Ef{%SuO!(I#WDLu_O;qf4Pe@fcWTtz0XT3=HD8vi zsObP;;o)Nq=vi?;j8OMk5?FQ725-;VhaP-uEXHe-z^k3jNlQC_g`8@clgQ0r-q_+} zH`uD%)Rhu95DD?Dkqf)}Fmi#Beq%%QJGekJg`DLb)P>EAhVh)Jvpa?#^2Wigo*24l zQ-PFxtV8~TZjN8{5>xFq@RZ~--u6e`Pj5zBK7_g;zycMx~3~TaUB}(avL$*mzJ^Tr-oH5+~ zAx;H7Uwl( zv8Up+7WFKd(_v~}9}%#TzorWkQsKb9!3iy}1s}CGu6yBK*LLWJHI%8&Z}tB6(^t)> z6o5sWnQZtQERPz|3?Qg~D4cXwzB~0HY=$3x$yvE8%D~a=cmXp-|n?PFV%NuO-ATLj!s4iO2?_Z)0FLqBvPCDLk;uaA zw5@}HoVlN1#t&6c_9K{jgH2;SE%Cx16=?9f_$>jslhP)pG2h<6ZEI>Qcy&62_{YaI zqxf9DNN#W*rTWLNp_>c50h`xlVX{`;i1|60)Ihz_AG7k)dAY6 zFR!gFy8NALfD&{ zd0B~rav?JVY?Jh`hP*rJVGs#u;s!bNn@bsF;{g`be?R9H$s-!)rM5vXHZQ+sg^Yl- zvZ}#s^FiU!{spg*s}S>)!eK40zD3X}l5$`-mSwo-kw~#6qbJ$i&}^RK zasDx?*5}a%rZoBU?J=+c=gZu9YHfx+T_44E<$Y#cXv1>ig}LVBGz`&2{Ui%StZU| z@*^Y!yP;rT2Z5r(xA4un*$zH$(B^qkc^NY0?)Y^NUad|d!1W5o#4b$)BSj-aD0}+F z7mrWEA-=7Up6$;SGNF#hE1f`w_CKv8fxqOsxe_3!JLC-tVG#B-!@RvzNi=eL3we7Z zS}!0ABDHfn0TMqInjzZI@g3~@n)Rkr^_<`0ba;i+;&iHaXX*n_VhhcH7fPo>;dN4@ zB{p#D08DY2u>Ol3CrwV)uO~U7R=ufhOXM1zB=Xie=1+A>)E*n4|`|XVv$f% z#vw4XIa|m+2sR?$@+h(aoPNlA6D%{G@Q&6@)=yaGVZ<9N1bTOAFrj!JC`ql>DGyk~ z6BmJJf}CDCj5vVlv1@wOOf9z_xG1ABN&HaszZHM-0O=hiyD(o|`}37;+dObIb0{qX z;#B$hf4mZR)=ddKKtgYh8r;qL#e5dS%pKJ?}JGFw#E-Gt)rKdx5{p|Md5ypO!P7LaUr|$BYk$<^e0!*Y({bS zb3Wx$$jO%{IE^2LnHOB2EP#}hZu2oYpd(tgalCcqYcA$sR+o=4zORhKu^HAJ4F%&zIW3QTu8MSx;MV&5H~l~FYEP~Pf&gi zqnT-Lqm8hq?aA#oH#Tl4==(FEipEq-w454j&dYc$o$!?QC+$>i!4lfUL=P6~EUh>ohgYx3bgHEEL9K0bPk>KWf;C?jJ&SC-s=8HTnDu-V zE#Rbj@z>johA??`IS8C75@WJS8Ta(ox8*>k5<%qom}2cO(oD`>V<=i0sNgw7VCV(| zbSFRw&J>$1A@h9+@U6MVI{Pl)6Z`40n zdcqI42_5$2JZHHCCgClc)P8VHGgLMj=`HfuVn|5y!nXi&TG90ND*@MEx=B8PAD_n^ zg+w~UG@%(9{h)M8*CX7#QDZZ^-R-QwyRKK&spe>lsg?DO5nKxBK2&ul?N$VY;pH`f zfP-y*z#(6vTK+^MOu|Uu8So3tL?a~K;KOg>*CMdpt`c%qd@Se&9$<*egCw7D3zEw8 zR7c|c+(1rPb;Vg@YB(;ixdIN!Zt&`MmF}W3`{nBm?8Bn2`Vklz)Rt>^ub%`3K6{@q z$r*E|eYA-@nxAQ%P^BXSA~py*o@{oOe{=q3rOofZ8?$!A8}nnq2pDm@^0_6>`UDPi z{LTRubi5JWe06}c&>oWPj(Z)z4{Oy) z@d6cf1HN-69~`^ed9*tbvxM|OgmW}ohWR#1Ligv`20St1Aj=P*-|o(+W`J96BlDlc z@>uI#tC)k`SgxsV@x>+YzMgI z*yycVs+$8Znnrk3;x<{5G@xh9Q^JmRdvzQ(Sg5S#k*oxLOkkJpEFbikFw+E>Kifkc_Pz;!E<^`DY6s5nkT_T;xo|gy6x(H({UC zuBi=hNwH`3Cf(H*HTRzNh^@+u3*X2=6T9rpgEe z{!)N9>S_slW8oQWm0}SyNC|Vg37x@sD!dTU-m?PS=6w}uOYIEF>@aHPfcc*|gj%2P z_#I>M<*g#Cj%QCl_2sQ0pI{*2I|=d+{K5H?pTPXVIlJ?rfKhUpTk_A$e%SS* z?gNED?`p@UhcV<2*G4AH|CGz*I6v^0vUwQP`_KKPV#Ooa@sDH+OOo}dvNm9k9P(`i z-vjj|<>`PIY_O@SXJ!S-zhk!&F{`zs50yEB_gdq?7D_juH?Si4{2&FeBS*u#?E#DPbkk;ZaPz@T(&$nc zq{SnDUbYs?+)}TpO_kyrk6T&MTIdaXM_2o$leBKRl$ z!$qfcrHl^ncNjG$bSfVb(m&P*1w1wh_T5lxMPoWZ_dkBz-H-*oe?t8WAi5AQRA2k4 z%z`Pu&FzMc32@c?6NjT++s~|dTgd;#*IP!#5p3daVD5=cquIT; z&;Rv(*Wu&B2K)lRo-1={Mh{(C(-n6n)~`}ZW?7`I!&{EFB7 zrvU{qh~n!|_`Um^cIDu;EG+@&fvQ{0t81tL3irgj_=~H%VcY&Ylqh6tKY0F2Qy2#L@Am+w(KRiE#In=d={NIMEP z=ngK=KjNKe;(5IW7Qr|tyYBZ5-hP^iO9#KR|K$Q8Jm0$iLh-rj`FLoa@#S?h`AvWK ztbFsH;8KZlf9!Qc^B=haMflgd5Swimj3V&wiGHoY=3OERrYWN;R{$DZI zZR`A#-}Cr?)589@*!B@$6#lC!`mY}OpT9n=)2=^U|22U7Z#8W&1ZDxK+hfyzt2fkW z`vW=d`VZFppElI%|F@;^RZiVLA^#Pl%+A7!!>i33ccZdc6HwLv$*gkdz7To|<5O#X zlY4&u?ZX8wUcDL^Z-HF=rAP0AVWdg6!@rF5zs0sg5e})uKdCKbLH5gWbOA|#?KltuB9R6^r(a^U~h8UydSjTG_2d}hTyr02VNQtvwe=n?eL9_sHi)bZvW58=a{nYzZ24mg z12&Ed1f9Dl0$VodB=Pt>{#v}9Mr3_6Qb-_9-EFBT_smF_%6M+fQ^BCvq&V6uPwO$|Iy+X4M4DIf=P%=;iEQYSDE;0pVY7w7RUOib1 z?wR)en>4SU%b6yUw(ljGFYx}fY>XuF{k!bA&!^JDdxc`@)@FCzGwk_y)$RbFXTU&p zKVQ4C_?C@1$u+1l|KsEOmZnVU(dTDD-|9V}-UaA&EbC=JVS`VmR{Z&v#F6t9O5zA` z>yO)aypO7)f`u_noWI?pB=2uebbfKwRHmZYIM=y)=6CG&)xLT*hDWQRZ1eMl3KnPk zhB*FYpAg)lZ9B}(*1CGeaP02ld*$-YRM^5ecA5R~Of=H*g5}uV#N_^ZEooz5rxz$V zk(@)~o8j2a$>{EbXa%`fH8Osd89{jd;@GW`?VZszUbW>ilAhUD^*kT9f1ZNUX+CsK z$d$JeXWi$KDY)0gu-lu|bBUvC48$RIe6&@2 z{x9Ea8J#S=n9sgTN&BbxudA(M_t*1$mmfn=1Rf>dhWm+L-TBg;BNHlENBIQ*)=?(A zyHxLc)@*BC+qQ9EkGGoBlT6p^cT;Km*Bf@FO(0}J4QeQ#47_|I+s6AEUhYxeUU{3{vU&Iidi`CL zfeo`~y0@Y&0QUwX>D~x5qdLf@1-Crw_qz1;$~G$&^J)!$@j3=x`DV0M z?Q^)sWqE;6A@5FVRQ-KjBj1nG`{NVVC7i(zZ-jy52=T++G`2?y8rJRm%NN&Tl+x;V zJW?Ilf7Zz1LO*}f!@V-coAMzW3LB~ZE$b{2R}t&c!j7ad5YV0)J1Ya{l(P-5Y<9no zgtS?bMBZWNQNI0KLO{v zw)J9YO*-l4tAEQtVAet4|f;%N<+P3Lrdib;r+D87hy|#XBttop7-t|SQ%-#Q>%QdstxL9P=s92)^f&Gg-w%Yn)a_Rugk8|TY4b1AnprLj z_(|?v?mW6hecoBzB3JEC@N{enfB(+E7L0y3N)+1CLW#;-IE?;ec2509$oO-d8U@Z3 zoHtY1c;iHjN%5dOAKnDl{Fk zcN}j^>iTA_lJ~jWV1f-$5tTdln&Cxh;6X^{cma6-zR z&zaMr45{#O4eOPdG4zXWG$KN^<;moMVnc^dfKM&*mtwJgrtZP&2rU~YX@07KTVY0U zLuf0u7e{1JV~|^mpXslr_<(kuX_zfsN#(f)(&fa`sg@Cdi-J^bHK0^cs1{l$8L*?} zORFH&u|l%TK&my?IRdW?RU;B?P#SmG>x8 z9DG?+JFB&=O99x&XU=5KrXIRdq9KL-fguGC{jD*LWtB2go02qM0UtQLSC2)adWX+2 zmG2!P-T{N<7aQzkmma6hG5ws36GaL_wMoBd0(31v$uY=yE_;PBd?Vf>0er=?05Vjc zi&R27_&6fsY>~I0&NQ_$_~}#$p$p8ze91RDPKHG$~U0lgY|x!i*<`d znW%Q{9A@r2akC#Hp<8$Ua^Z@l+8yeu^|VKeNecRvS=SUEN;F81jqN6xBI*E~Xlh$x ziqJ#k7sig5+drKZoy!lu3-fgvQi^W-PfI01tPV>_&~gu#nAZ!d(B&+YS(U`h7`=ef zPAbW=)66-_HpD*F0( zopaO+1wQ^%>f$fz1Rc<*%1>i{)CP(Hvk3`oGqn$^rRA}A)(uZZRcliI70&V9^|wcz zJM}{|q3Kl(#FqKHJypMj|8!(#$}W4j8^iuoEWXD)PY|vwM|KXyLrh9;dR#j|ciz=4 za;jZA%}dfTvlnd;cf_KnkE*(Wk@r?-?Q&q7VA`}MJ?FZO6tsDNY`&dyHve^2?*^J_ zWy`ZLFPXB&;gu=X3is);`Ce_f96wd}Ab9^>{X1;OE^a3uV?uUA90zoOswN{TRxR)GII*nYtR zy#11BRh)gCaBID!5Wg_9%}>RjX`44eCU3|Dq`J}^J7DJ=f?sH%nhI44Yk`OPRk;q6 zN;opb)pu#-!p3Zp%ZF&xj>5q6dKdF-EK&Ygc!zCbgB&DYC>%vOLXNxYHg;VCB0m^@|!fHdMYI^Wu0 z_R4Y;9ix8|xm}TJds-@#uc&gf8vV^UnZ(yV5ONI78RUMERo~47(6TbtrLSRkJQ$JU zq0UAKYu5MUXKc;$9`Y7ii%FSdg&8Bm7 zr@u3(rJ_=pUm&QHCSCVP^5Z?RL*pG60cNmNw zNGyJ)jimkfqbxVRl0p!g;TN$>l^&V#G+Ow(qa0Wo+w#Q~Wv+2X4Z^#60?>^vlmjJ_ zAfp8{6HtQj|hzuNzx-Nkjq$v-i`aAilPqm z>7Z__(T&6!L{d9BU8BxjZkeiP0kXziJToPq>X4@j4y!rPDk(hP*^7VKH84I*=_BWSC1%R^|(byv3w`&eaU<>Pp*Fvp6TnPP+~o(1lp zw13~jHI$d&V=qjlyYTd^$`QsF#`EN!k`irDIU9eyswMlCQ9en4X`@$BZt9(rjzjo< zt%uU~{+6}_*4jS{XJ(BdtuN(FxT*w2D?Or^40+l%w+<{&ZBJ#HAFsNUQw~?!C^ z5DiGMT*_RDd=@w47I`7dvLbT^)0b3e2=(#(eC@B8VCN0 zWgagFN7G1o$IAS_X?n`zVxM+3uqxV?HjG=pd-rTA6hZ50T3v%?{g15lqF7V1RLW*R zLMQ7W%Cp0`R3ZVbDMPe}-IzR^@NI9`J^8+CMVbxuy2w8O-kpGTu96 zU&OYBY$)I63*wM-h&{&OQaF?Q#QSqH4}%xpNOD=~^PA+;A)a!K(HMAa@%!Ep*X`Y7 z<-Nk-Q1+UfpYlG=nXaD2L80Nl$@AH+6bd<^&_UOldFY>0x`nS2u=v0U75 zuS8*a&ot09gc)7fd|_*hcebg*TwKeXsUtPD>2oklQBOQgg(ywbsx+lkO^cwlHP@V| zj(*GtPp=soe)F?>Iy#6jiMb8Gndi4s{$WDdGeu{m?I-(7x61L8UHDsA^%U1Z z7fl_Q4^LW@A!RMu3qAAz<4LOV#l29qCDyB+4$?ruI)8YYOyqPswIl{wG5q|{MSm#8 zkBUe%Q4>3Q#JNHS@V{RFDzbA@@RuXgt9U~m=P4clI-)i^4}xf=e;s|?#R8oYIl3HL=5#;u-s8e+)D(wT}@ze77aTZOE8hXV+Te zPp12R#pc)Cl8`o8>GJ3V`?vK0qAIPG!wQTs-SG(J>Tk~R(U0|lYMgV~Fh41T@}xfq zT)hH{21EtMKQvKava=w)nuFq1y&PVv+ZYHU1jd{hjz9B{dw0YDBdPiQ9DTb6`6PZl zY2uMdWaP&4okOl^oi&SdxQ)rQV+k8Cdzq4oZ;Y>8Rz2M1bEilW({oXDT@B3XZKp;0 zwTEgv3ShS7ZQ~XW8Tw;2_D{W?JJe3Oq#iEiE*FC4l`xC6hOBu}f^sjqDHll~H&I2E zoQtJe{60vjVos-j;0_!qYE;ZNy;Ixf-P_sf5-APjUBvAf`t9b;lBMsP3WoHmH7LmD zGke1o`tphgu5cJlD9;2Q%02|i zQVC?r6t0n0{*_PQi}e4P2-t(n-`26+pWsaK(gBZzMfWV-Z3tVM?wT4^1++1j{kLS7 zjO!PQ_5O-Bn2XoNBbg_O$>jf?{03|VfNDqP4lIjbBlgSQWu_XsMzIxK2OTztTw;i0 zmXJ_LOJ?o^F4S6@m1lu-tpldC5W{%k+M=GgL|1!K4^vkid zudbsNdD>3?GtEDUI0d_8n1Le!IqIEwi*lwTq=}GKb!SSExH=8%P6J-sINVEIbDQlSyFVs?u zO=B&V5Pe^cg$HulqKXTA0CA{7AM*@G(pb1$9Uz0*JHUsB*j~#~+A_{ET#ht;`_`C1 zaoqvN2XY&fes!LasL99N`;^9yQ@SQ&l6`1UzE9! zgwVng_BbF#eU#GYBQc zhE(AG60ex?;fQ*_da@RJ?aG(2Odc&^V$V7b+&+j+KZ{zgaq9^?!N<4>_IW8=ZPhrH z^!Rd3TZ8i1$#IAiRv2L@2exIhce@b6fyBLB!mobumts}ZVpRz%Y;tm_X>70YQr<23 z2$6ik*j2A&e%#+8=q%U3t6U}u+s3g*9BvR@62!{nJ%(EV_CEwH_oBh6HAAEYMokih z7BhXzW?DahnWkZj>tIL}b2N`L!Fvn!$awW1!E*=V{HKy$O0_a44rJ<=>W9Rv* z&0eu)Ha3e$8f%f|q2>h1?DRcmTw;H8QIuG3~Y$k263RhX$~zFBd8zP9}e-J`z7)ns7o(~97ogJ0K(P*}J zX+YO&K+{2>guL%(;t#rWj~{K^F}LpfM%M+>u-r-{Z?!K>a&hoEkj6&ZDIq}GKL{iG*O=y8YNQt@5dZV`{ugXt$MK-6`rTog$-R1U z4juVX0cABw*jn{7okf0d1Cvc;6Icc8*@tVFt=`5nd1LQUpd`3PHuo6js`0-kAP@(W|l5ZV)frPMYLt31sR-8+J z4tebI`9?EYLEu@6lz46>>!p=Gdn{KTft($g>om{bnJU+;4ic8Y%(yhI-##M4kuw^t zX|qY#)0;m>R@_F6ZkaGWp<*+FC~v;Ha;>-`C!3WW(;6jKvB>TatV3L$EzPk0_GuiN z9BK!9Xj_6kzENi{<$9#lNJOyRVmv7k`wsN8$?(!qH&mrPlH0NIXSNs|+G>`Gv;8_^ zOPeF4{E9yxPwvCExUTGSg;Vd3Fl#PzP(ipx$dE-Sv0pp{cAh8tX0$KzF>Mj2FC4Zj zJ|Qi&?-me*39K&|!cn-yvX?j}mD8af1}NEkXA)k`=gEuvT2g_$Bwz$t1Kp}1X~N#AUr5?Oj;kYK8istG zqT+pxQPdQuhf(CHBcb_~j59-NQ;2lbLeZ2XueeddI$UxMaJ|Xt0Ok`_2S$+XAC3X! zW-1W!c4z^Bm=xZZNx7P`j`tdKjdmHAa#$dg=ShiVD2}e-Hm61q5GnrYNrPZyYEELm zr0-pJRQJ1t%-~W_Hkv^>ie$IN$un`lS7Kz0nd27W);e$VTd$|9(r_v*cbxpM3u%2s zl%!14!O6cVpOqxt<$sT z3$D`cn`z!~gmD!JuIIw2PU2Dy(Qy*?Rca|X^673czi(;N`?-hQHMRuY2pPc-iGtj* zq1&6*H>_Df?5_y5^|Govd`_#_Yw}FEb<(yGN~=WJ$rd4~bxA~mNa<@w3|LT?F}0Y& z!SZ&~^vcHDShl6Uve)Ctn}$0lq}Jcrppu9a(O^lO=fa?v6lEF4iO8Y7Z^as=$Z1L= zaW(hsU+@2y3((x4XWi`7Q^Ko18n!w$OsgF2w4z>RRK(5>jogExIs6jjOxIb%vWZX~ z(yKhtEH@1F{-RUGqF;r#o@P8}L!F7D8YFX{?hO%Uo=%v0l?mBN(S}WRvFNtg#+5cV zNQOU(@w5WkQhtSTjBivIWYHz^0F&prqN31nwGnSKbZB_0VjX)e+k@Z{O%WZ3z>T8W z0s8IHQWa=4>g@%40u?1>SP%0yJny(=Ra}^?!Yj1t>c4^-DP;Q1OCiwH9jqY+C&{V$ z87;0cNJ+83{0^cMAlCA2w~Ev`cYIxjj}&B-Xdwh7WdtZvR>U;e3Nq>GNHJ^%f^fh8Z~&#Ar;elE(-%KI&DJB& z&W$txct6pd;y|v3n2SRX9vSlHz-m6y4K~=n?xc$HPkFTMjc;Bob<^Z$H#se;Nvv(P z=nQSp7*6xAqC4{^gQ$>!MZ6Dvq&+PK>bi;Ci78Hne5*knLi>&`NMT-i9R#H%K&i;PR z2&0$O%2ZqN-Ip@HF^8hEGW3PcMn^w;+JCz9x(f%NWcQFIk!WCG` z8XK_Xb4#e6WKJDAM_wuEUXmp?pTAN+W$&Y)>=~t|-07v4ik9eFucSa%IEB7^GsBuP z?=&rF)qk}Kk-dep_IC_@yqWgXL+Cw2rbQtFCeLb)Sq z#tw#;4Fc^f@8xgbArO>z4|43iqRN0he3vB_9w-J1aSSSl#>C^dS?(Use%5259l z(G4aSNSc8OI(@RGGzpiRAK#5E#!EZ7BfS?87Q#w9aYHt<~rwx<J;vTxdahqas|&V-t=vPGw3n#RO}(SwpO z?#o#6UaJ`=$G#Zpg$Q!7#9*aMaSfS@E$RYUjQxC~`XAfXhOYLZC^??cy44N>6?9HA ziI4AZ@pQ+~b78d0)oYQ+SC&;(W_F)7Ajx@sTks10(=}HD2oi1~GWH*jS4hFXsV=LV!^W7#tRrHzDa@>g{mzoPOT*{O#pp6<8sDSP zGmm|(-=$S+5jr>J$doJZRKM-7Vp@Qivl*yIP5w&((227<5g5y-&Ch{c(6popp-RR~ zOl&Q+1Z;(NUlnxD#IqD+C^!Xug`{0~A|mITuHFaR^vm7hkYHFLE@nrn@rW(7jLgF? z^29yRQG%(YJ-qPG|Fdf?#!X@1IMz%cs(X(Rv55jA+RpEN+YU1 zL82Cah%3I4prA5r?wW96%Uu6^F(H&8xTPS=IdQthBv~A?9}fyoraq=*I-JJG46NQv ze(y9pzhykKc=kAYuf{em_i>-mGDvB$A+7YieCA#4q|6>J&rOW{fQdm{R=f@UqUiZ4 zVHckf&8Zv7OOM)BWnyT)_AFj*BW0rbpToVzhBaxhOlN^NaLa`&GwPoB7xj;ta?z1CJT5x1-t7t?~g3bGdA;ZGlB|hys3L#mprQT|}q*Fpql` zYjks|75rdGFKCyzC4!bR(tNeUJ9yO}otV4iKR&qHISpsuF>TgIwkbND3Sj~cG zE&)(?3V`Z>mCS-frFD1z7b@|k^%GH81PUb{Ho46{H5ZV9nC+xJQc%tv)a>LZVQZcG zZw;(WrA^Mj_QnJXr`U3rWOQEU5qE0k&3wC%TU90Rc# zKV>V`D6<@ind&M*;ors=SN6dLW!QV1{<4-__~{g?*a4oe@<)E_)sF9$l)Hp>)Af3i zzL`oM4ho*$UPoc{RwL|wb)Sud_#h6PL{swjCyo6(2x=n=I3*CLGKlHFsq3`uImPBT~?*!P5GwVLKxeb~w+0x$u{nDzg7VrTBDk zU0B6C_BQgc*ONK?aM6*osYa<*apUSk_~_ZqO+$SB5BCm-pKLoh*Yev5omNS{M@bxe zm>&m-6wA=KW`~ccC>)lFeeefl-{Jr!qMp#n8`R#SL5y!1xjM;gt0XX%hGf|)Hq`^i z!!v@;3)!6-BzsNKq-`>8G2&ab1i;8HXOa6_9QJr1XvfYvl^NL6K)CWyzb_)a=4U~^G_jRro*&t+DXYsHrm9VbFX?>7#i|uGp95Xu1k@}N(kUWJ z=@;ctccc8vQ@J+8XF^K0hy@|%$1PIaa$=8?&N`VGn)HsVKWP(=Bj;N((k@coAW!)= zM+nMHlIHSEA^DW+vNavS4|-&~tG4yyE5-@!t%)WlD|+n(+hOw`Q*f=VmEuJ#x-Kj< z^OfdNhosQ&A!z%}REkElk|syPJ8lRi4wo_*E&gFBsHbjfO#j!5>#GwflS&NDc9~q>6 z=`ex!c@0r$X$nOslouy5P-~!Ey6wAFF_ANH9vlwS8W!o#P1r9en>)9&5T0*@0Uo&_ zJXN2$|MokM&ewe%8O4cxI7Iaj-RhqtD}VleDZNU&oH5`M8iP2qquhUNZSWC11(2FV zT9Dx%kqIwBk(Jy=P1H~{n9;Ad@TTqzS%@ZS8FHt_b@pypl&y?R>bJNkJ| z@9ZMtdfGE6p5&#?Ag2E@0UCgp%JI{4M@SSbGy6y2wNfUPhqQ<13*e9bZXw!1;G~O0 zr^26y-wZa9sAj-?GmnP##8_^WQ_by~X(3(Wh;jU?a;;BI5OLBk$|l*BK97Vm+SW=U zIdPX`*GJ5Fb!_(fVkpjJ?dp172~wVkjuZNRYngZ{Z4eerKQg1zd(pZ>k~ou}HcYp* zUE~DAn)7!xHG`2(LHNT9OZX#-)8VhJQYzc58at+i86H^Jvv}il!@9FQPe)BRA#sC3 zvIT?KtHiPe09?iP)uOGRqp>|dhhCg6@%b{cre6em5y#_?!wFAN7KN5;R!Bj`jA>4<-d^%jzp=5PB41O*?9y5pl{c)0#Qar18>u3ktD$ihFq)#$ug_BJ@;eTCJNxXvxFc zy4FUSx+VF%qX{4qPgsp;lL^jO}WMR;)jKmm(rJRwkVPn|{7MHs{ zDegyb=`kot>95uJJUruhuimH496uo&I29Yq#$h;!EBZyXnTw}L$EpK^Mk`R30~UPb zfczdcwq=;lO7P`STh(T^%Fk^vVjImyfQU=;sES|CD=o{)l%%s)DeR8O@6U!2Xi*r5%7p3h zW(^{`E*P^a$xqNf1^BSzO+{>XGrw$>mj@iqigH`VR7%!lOuDLM9HY+#47L*W{Im7A zN_~sxTq~;c_yNmVH^Q9S)jS~~R@Ng7jelNLHMl)W#;L%gNi{Z68BZs!RqZGP-;q$; zDe*-83!#1I#!*!>hcf5#lBg?u!jPp!S%QsshgbfIVaTplw44N|?uZzi9)TQg2_P(0 z>4(Dr3nnZtjk<^m!5#d|T{THzEy_w`-?Wf)wn+(rjHT?#hxc!M6U=dEK1eBeuK)@< zmBv_Q6&e|0Su#an`!?P#W)Et{QIaZ=j!~E@c7}UPenoI*HUgSjH&gdC2 zZ#KkAguONp9l{NCQmCh6{q(T6KHLIcrvj17J=KGvqRz5ITn;#%bm*rO=vsrJ2rE5V z32F|4t{Lc{?{?g%{7F?D#DuNoYzNz;ew>K?}K?Y(( z)8fi=Plx_gPg9|zKprIw9${NE+I?|g5&SLPxBcjASZ)+q>%@n^kcQrQtVjPRCI)SV zT0t}%Sn@A0g_@`QSZRaPM}5|inZ+2{+?3b17_g$~oIn}dpSkl-y48{_(xTbBma&T& zRKFE+Twm7`j6KAJ_#Hdj``qUsifK`&*}UoyCxz%ML}bleS4YhRr+fQWho#5z^lS0k z=VG_4Mcojy?T@&Vi=>#P>mChzkgY~}joXiV(Hy2bTk9L_-2 z?VaJia=}*ZDpS-}Q*~A1YIIxa8S%q3&IozaDZK)3C)yTT@Xv%04tBhhq9nVg=Hj8&&P&9~peJ9O1GvKCF zh|N%IJ+Z_&#G?@^(#ssP7M?1wlb2Z86p^K0K$Q5sg|)3(e^w{iD(PIwDaa-xgPyD- z^4MiRh#)eGP)}}!O~`o28QX}mfhKO;^TW7$h#HIcT#~vN-WV<~BHaOKq5IZJ_MgZN z4-#&e36Hn+Y5~7_YXT>GtiJ9@v5gmd*-^toO{CFSHAhk(@hQ$#8Lb+KXv~GXZNdPV zzs*63GF;iEmG`c_%wMxAX?#BCh62Mv*Ngp+;r-{Au(GLx{1^-ez?B^*M{*#UD!XqB_3m z6=_WJnxCU-pgc_P<_;{RKDkEmC4`O4d&=zdh#ukzH--YZlV%W~W(r4)3(7?2&rUxP zWMK`+=b2s^N!z*?eAFb;jr`nM#W{FF5`sCFXdh{uc(Y|isIe%$)H?(8{yZHQH`nw# z8`;QnimIi7OXB*C` zB1pkfoEU~!)jAva8kd{MwJr7H)`?AW|2mN{S@xpz9v@q^dTCVupobf3shY(jtbuOu zeUiJmb8r}{)fR^pC+!-`iY58@vK(DofUd8wo4U=lg`O68o;FlmW z1Lza4_ji_&(~{H9_)^wc_32c;47iY4^4S*t#vs5SRV6c2haT$V zeRhO=Enq&DjT|s=?ck?%t75-t`#!2V9R`MV$(U23(k!>wxwTPmz~EuC4p5Sd$T~bn zCN0f^eYM{Sv(bpaX^-6j1H?& zWrLlHbWjV0y}y-dJY{yL0m#`YDVOz<_JPYarja5CzOfBYyY@NN?G zeh`935!M%>n*8LAFPC4KSO56PA0crBd;0dTo8w=Lqn+)%LMta4m%xCf$Rh3}= zqwvrEj^-ap!+e&=*^CMVahX;p&J)wL9fU)8wW>*;X>P7@mK49is7E#7|{14Baqf9RM)4ApQghGOuwd z)!8b30)2nQ0Fj&zF%8RSe#dydP#VHj;#k?2xj;%!uis-46s(EArm=1vZT8p_%uUUt zFGm;g`8RZlshWfdvE7IlPe!6(J4CG#21QzBl5-qv(`S{!sM6M@E)m(Z@8TR3{eSb`o)cupHTTXs5!6hI=K;bg5Ouo}5)QRB`A8Yf>wv5t)NULC| z(8Qu(w&XjH_Wk(RZ^#hoIh@gNXbhso=?Ua`7*M%|i+TF>0T%MUb($juryN@)N%b(O zBx#GW^=?JnAD|N}r+tY2lc`JaC6Qfe+lAciM(*2bwSFwj^OW7u#Zl>_&>lr2W|xS} zz4we1IVq@F8ai5o4Kh8gLC^``yi65t?qYgehDG*;-PgoQMd^vlFY8N0UI23Sg__1f zH6xc`c_o%K^|1*>3OUwDM^uJjZtrsyt|9u|xjG(stIwK3{Jzt_G2I=tP1|KI;^TEv z2#M!v!q&p5WsN;KJ}$qaZq{R-k=vZj%<&{k5{Y4fSr^LC3>tEzkm^P~MgD}!w0-UG z=FSMS*;*o>^>K-ds{QzVH~lvkxgT4JVgmB^iz#LK^|W1d7cQ~c;E>hq(Fm` zzf*d0l4w+iEsL&FI=s^T2aB5piWQhzdbkAlWQ>l9fRUtW5~2X3(WRa^lvP# z&~qlHVVPCy%dk{W&*){0tfX@wD`pieC-L{6;?Pw(A*Nh?UCzFEK6swXO{uP2O_C$g zNUl1Mo|U%s9vCBbW8>rU++%9%y$%SIG&r8nLxLvi@%U5{wI7Wia@J}^kR+nF|z$VRLan6n_Wv#ttHu9SD;&ctio&~ zuUT)`JNeJ+n`OT_O9NSX8+-)`VIPXrtDhZW*Z&?-Jgt4Ja8~xTNS@^BF|MC=BbTZ2 z&y?Hzl@a3pZY!8n^GLE6O`oN6ph!2zbvk_NEKHuvyU8%MXr~UF8UHRKMR5{@S#2KFIG%V~#F~btjzeo#_i%KTgfr(pq^t2MggULgzGNG-hd+i$>|1|RGHu4CY$5wn zE8s76%JT0s=OEdau#0NN~#6eaaryY|gsk`4DCzA$NUG zHTlvHC3v`)P}~E(-(P4&aly87DdT9z7->Z%Esso+fzCB`l-h@0T$(~-&XI=B#ZJnQnwmKzLK&P$1 zEl3YsWg1Q89ID{>+^?lp$Q<@KO{;jcj&kV6l1GcR)*&P7)sUWY340m-gookK=o6Z0 zkkR6FR`d=OhqfvCcBKac8zK2?+Me?K32j8N*)GJ!4EJiWrjq5^2|P-${4i}AKUIal z7?s&RyR9S0%6vJH$7LHkL*yfkbBaEhfA%^5dXmgmvKn8?sU()O@7E$3A1h}v0Ty=&x@dyy;!bdf;4T3c4+Lj% zcbCQC@T>YRzW?CVxtoiw>7J>c>8GZ9p57Dq+owSXzv-}Ld7v)9x(JkpSH3-5SoPGLficMHeDSZ{-Cf4eG&RM~HQ+S4uYsbmg zqDwK$tw3He0oQbZHj5gAfrE@Vvw4k4zHN?k%$nBSK-VtOi5$!&alH={a#Mi8SEBK- z$NeLLkMr~2cGk0W=Zp-?oyMSWFA zVsg@jm*!>jYeVUDld3c0424~(*ELJz&GSVkCHVs)k^D1r%((N&WyCoJso#sh`Mshs z0Au7zW`RGZJuPB5PN8OWaI#tY>mJ=RXOe29pzE0B!C&E)eN8r20l6pjrO}V#27;v* z0`s8VC|4M%7LH5iG!lyXlyN^>{gr!Xi1>n2iSg!c;~+3EPq6x)4Qn^}I055jl+a@< z2VS(NIH{tuZ^?h>qUo(G!-E9^W$Fm`k}9zy@-yAV37oTPi#lQ2Q|unG`sfAtzL~2E zkH-&06DncEKO8gd)OuG$y-C+L!#lCKz5?GT^Ut9K~g{GX&aCRtB6#gvc$CH4@}=>a`sSim&q`ure;uakMEo(_L=nW8>d z%+)Zd(^S+`QMoW*^*c}h*W}79JG6G0H+ABTuQO>)JhdK}9=CWKr8CkG1ZS7H=NIip zF;0NX;y^V>PB4`&OBfNRU(R~f8@RMAc1Ai#GQ_ghC!IDm8B%}7mO zIuMYq>&9x#RU)mSC?bGGG1(l3(ENkwz{X_1t3Mmk*p##L0Gtx7w?5iiDD(zsk=AV) z1l5Q9;5xhlyG&&T=@p4d^BT;zZC#04`b2@n)P54o<4RLXj~WD zhwEOA)ryCKYUY=okwRCX#~f3ed$i_{s=6??<8?Jx0}a>scg*1CTr<)bTN}ImDn(-P z!UJUR=EK!igPQ4sOyr?Md$cI*K3q~;OOioGOdD#}b@9{}P_!#($oQWk*V@MZl9joH zeGOeJah>-*+a1vQ=)tkb>K*qup1E3Nkl~^}ixh#4O>z8%3`}87CCI168bjw(m9@ag zXkJ#xk>+P}FIHd1-@gR+CJG@3Ry9awg_EFSXhSKN4%f}wk;mD>bm=U8@eMoN zzY)(~YS_a~>7x}QjX3;C&}Wg7z<4(dX2d5{#^0j1(|76m*=5xVMDBG`g0*{tp7VjN z8?_bq1=CZ3B?A!&Ldd~B$KWFAzcHY5aS@Rya}D{SwiF4%uX(_~E_{w`yO8CGoEZ&R zFk}4sY<_We(R=Z`+g0edLVMEFgG_F=rPiZ=tU%i=9H?L#K?XlNU!rNt%(n*_Mp%Ge z)Z_Hmi>=}b%!kc-{+1QXjPQ!eZ#5m19e+S2%Vl5VeYlRT4uGZ{*Vbf*YcrMWaQsA7 z&VEXItfzWb{@0JAELPe? zWOHZ&rfw18?2aZpWiD%zgkm^fH_h&E(+{^JzTQ5vmLQ07fz_}>X(p5y{j5-|g^Upu z?@+TE2z4Hio*hc!Q=Q-WT?6CBBV3uFSR$r`QH)Yf?G;@aX1@Y;C$~fcq8bsPc#;9y z32a1XZQsEog|1W{2fZfm9}I(KpLOIBU62XK$iaYYM0<`h@$E_iZh318aeuxywWI4!hAB@3JTq<{knsee-Pe(XW-Qk$}yWAz5Mr3s!;!^eb= zvMM9~#xw_UeUFF(%fOz#8&f3Q_ts3dYB#tMCsFFzk&3Z3U=i3dN0)p`>p;VV5}4)f zkvfsUQu?0$bL`)db(IrvrvkWT0T4zqICVylr1{KnRPfFHHt0iy%S=P!W>azOJH z)L)6%yO&ZzW>*KvQDxK7`6tDF%kqg|w|77*Rn?2b7jgl%4# zd0sojULg3T#gccFO-Lb3Jv8Y4*5K?GTZvZW;BcfK&=imUp%M2&ws-^gK!UjGRE~8E zxjycU zCKhakwK5Ih8yfP3u7$1Y&1nTzQ$`$eWh9?{b) zTL9}%QJ8I-L=8;B%Rkz)vc+XdNa~KQF-MMnJ_U8%>eOlEv5zQgVS2gZd}odTRXRRo zyP9l`o_=)3<+Bf0=i^QMm>&V0m{%qqzK#n`{_RadbXo9NkVz~+qhR{gf$%D9DQNv4 zhjVhUP4itcU;Y;22mGOI@96<8q3+Zu{m;k9x25LRVk*_L-;JflQv=wFOo z(R7XD+8f#G-*9+x!_X<#U<%}lfR`qbP1ZCXIf0Q|{y$NrjVOEiTK9~`8N|V0OkzsR zh3_m4l@B963nFj9WNTB;>8)IY=)D~|-YsAsCMjp$Hu+g(-a$?59V~tg zgZK!t?inj2e#ThQj%NGV({33W?=P6^M7{j^wEUQ)>;b*B{`O9**h8I9DtyeNNDY-Q zpv<;`wZlvHx_QMk-Xs)j$<5{Ruodm*(mbDmGATZ0rv&1}GDF*k{BBB$)2xy+w-+0l zrzkwmxXMugP66x9OmLk-McIaqyW#WJOBqLb0$V!pSv2heK@M4`>_qcXtC%-jQ6*9L z|85&Km%`I2+E0!qHBHNZ`UEV`JEXHE8}MrNX%Fii^lz|SCk+e?WlkcfOogE~u8hS@ z3LWf>Mf5G#1yR|nG5d4ltO-wR`fx8hR=hFeq~Z$}*^@<0yILDP}dnFeR=-YDhvf?k$L#^l7+$5F`} z{QT0}x8{pzGM?Bv0N*`g;Q9qIhk=A#)zWP4=MFlDLgKJtb;|h6tKD@Cf?Rgr+P--S zrwnnbtQ}GyUn&J|R961uFXA@%)@<#@;9>-Qmfrpe;Z1X!j#G&iD@%tD#dqx_2Kk&5 z?i)#cBA^iQRoA+ga8;PW=H=WM=5|l5T6qH-+?NXRwZRyIoADf~<;`(yi<0TWUt~#Y z*M;4KVpR&J%J1r@Bz4zye*JmBYm%bM?;!9+wrVU!Y_6pGx>#cEyoJs)uAI+RD(#+{ z&T_4qGOUNBS$;Wvp~S&Ixu^Icy(h(SVfrQ)3u^f67ZoGKO|QviDX#D-o!a`&N^fLi zxzv%L*v_%{8e3tpFvF-uejxh@RawPZEb@0I;Dyx z^zO&oKKL)}J!Z^?3HK^!!w&y(pF}2D)9uQ{ddg{7R#pgr?jf9&!Ez1v#SqKUuvT$$ z*JyQ(4Q1B z7kn@d@0pdCYvOCz*2RVj8RaBRO1;Nz*+!3i9G`l>R@bSGCHknoKkYo;k^F=^FTFmb zYIcL;ZU7d#h6`)-Q;x>5r<8nXQhxr;%Du3OANsDD9^{vrnORJEa^SS}RvcN!zQJVq z>h$?6L9`XncI;%O4ub>)JFO$|9kA`tq+Mp(1Ck+#nn7#tSY5DlZ7p;uiYe0e1&wtkwG6Z*m1W#jE-C3lor}K8}gJZ%MQjwkqGv(Rq)`Z>leM-PG73Y#mJb(!^BGl4SSZBq|d=%e4Vx$tL?O#Jb z$&E_^$mrgfC{NR4hBl*_JZlfd@g`p{y@JyrYyf5vI(3Jo9E4S-x)Q@LZ2U?-p zQ{@>ie+d(RKRKFv+f0a_omypBEyrGLwaCX1f(K)l<$A(eh{V_VUsGs!s^4Vu?RTR} zWI~&Z4J0C66qDQ*3Qr3{GzNC~dYqNDGr?>`q_ERk{DQA)!Z)}-QWIv|$h<6MKd0-3 zPyc8rIeX>AW>p)&PTtg=*uKwuvs}#h5i3?8SG-Apva)t-c7RY*Y7NYHTA>BN=^%r@ zVcj{DQpCM#xhvL_&pQ(qz{g%pIVOsNT1))?7x>!VZ44!Hvc`9aIbO&r#BIjq&i42# z4;k+z%aAAN_DJ$bskg4!s-R3XH`r>f?%3mR$xxFAw#NiL3XSiI zo6&NFE_gM#KDFRU$X26ePo4A{dZB`&tp&UGD8OfC?_KZ!a1y#Y zrA-z3{av)O%c6gOOf?0iE1R>=U5G4CN}BVQl^v_%Gl?}f5r2R9#!`N<{#~hSUa-|& ziT76)TMo&o<};-IJQd|R*UKpzV?)9BjN8wTiTIQywvKw#$I(0cS(E-uUT`cak^Fet zbKXUk7Ph+9cMObn^NU&t6JTy1wq!2y=*B6OZhemL&viAzdM=)nmPxur27h%*WLm$)pXw{^g34532mtlPS2t$mBeigK5|RZ99DQlOzym^?p`=*}uW;>aaR01sGKO z{;~afC(;J3&i`%ULJnnfO1YyS=az@f1bjE_{D>0PF&^>dXXsC0!zLpnclJbM0|W9 zXvS)BJTj1DfTY2kNXAd~_r>8>Tqn)UDT=$>7hVnKABZnhI0Gw{9CqjQqk@Z1U~qJk`(wx+iA z>sJ(|OW{7v3xvbLN&@9$m^ks#^ruMrm*&m$$L}cRKK;tHY@LestIuXU z%jI7~Ce3p77H^_Goj=X09t2No@k=oVT?54nQY@Q}$-NNsq0zMHS_xh~Kr`Wd8?9zm zFcPIm<>E%41Pet8pWRnnT8lT2aRsntJ4-(rV^Y(U?Q;atgoU}k{l0>|EMj;cC%y3| zg<%5O-PxyQ^SC2!mo36|Y`|EYF^4Dzf!B#`pywKXs5%W^ z@d&&_aT{|Sft3)3rp=%oWR7<=Hf^JCMU2jVU;fhJ4j!m9MsP1uPp8Q65#KM0q zBkk2}j1P2JiwcQ`0&G}C&j`~UNff;0=47+T8j?zm@?xXCt%C_A|Nf2iQIKh#Mmw4+qmg6pGRBRHC&{TRORZX+@7bp( z8vlXCP%;k>V_M~`P+31X#?pq3f$`>@uoOpT=zdpsZyn!Ky%FbgXqT@`FtA0&R2*8W zZ5YHa%FQQ3WW=w*@hi`rArC!+I^N-_ubf|4vG_}!w7_sEnw!=nO(-A9-=USYAo(P^ z{m7A(g6_JCyr?_^33>gruO>EEe4PnnYb#BCB5~|o{$%Rbz9dcx(Wwxb-#;CrWD>M% zBZ-c%1uGhrILOuBBhKlr02EfAZ7;>uNj-@NxRu#+c0Eesjuq<{Zo=tC1RN7yH7su=Li|Ee^0Rp)=bFJ<_nxnJWKUZir`^(t7ux#FpDTA`Mz4dymnN`Eyk^%2gLl zhN8jV{ZS4kyWYDK`Zir{RBn>g=i@9OXjvFXbVPX>qki?7$@XdA(cB#Yba-~7`7Bbm zz%mxIxL_Y0$5W!p5c6_{vflZtxGy!4f=zcRT%w3YCm2T$(>u14aV z&o7UZcwi@*1fj_6wszSKY4Yv9BkO2Q=iZ5V!#QK(&6UM+W=)Hk=N&kpyHwTX??yZ# zMmFM7bOq8B!%pfWI-JRRIt%)$IpXRWNSn2LUxtp(q1o5O6$9AFDsZeP;REv=gB^9H z@VhWCQ&ZAG=g+NixOk5)2DhRQSnWxq!ymjW&o?`d`#~HzR&EoCMblc(lUCMnkL6Fc zT`NL^`}$Rrznigh5W?PZ-(l(D|N2~Bj)>lDp3xFhF?_{m7&Qx0V+VL&B*I^h4v{xo z5z$u{8t@%{^5zodGrP(7{)g5>ht@f@y7_06YM{;(US%sm(sr!cU=gC`f(zSFy(@zdd(5FX{+s2ZSU5I4>Uh?%TpR z)}u@DQfhX`e^D|D?Ni?vO66XKni=I?gn%5hn-RCDKTr$?k9y1HepC4?sKwEiIqH3^ zpy~QS^4xH=0%Lnvnw=V(Bi^4P_wmIbOm7nsDM!zxI;Ix?bmTUYd8R8mr<5L(O?wej z(4w1a0*lZU6jSN#s4(#ERDTYOC!ftzYJr*x1M-8CNJHOypHOM!O?CVWyO0PnI#UwP zbtCv9aZ1#)BVe^FLQ-S;As3^iO{>V!`~OCMS2lj9Ev19L6J#$3?(^$?)(55`+%8SCF~y; zIS*LW+Uy<%xz&azrMtGsp|@>E2~CptWs__fQj$$L$MiKVQY=by8K^zl_80w_s{{@O zC;}=#f(OP=H1Qq1Rs0TiEXKx1oB8RzKf?6>b(q1DA`r_F?=<^Odw*5h;i80gqDmE| zb)1tw^_^w)oT}P{*5k+fbLN9cuv&4>*^b~x%z ztM%J3vwJ1ch3YtwL^XQ}1&Pd8Ka2vCdd>-Fhi<6fO&|T;x!T_u=;|t-*DRe89}qd@ zI?+|cYP~x)os@qLc}+!CwK4n=4O17>uv=r-*dcTa+gvNO_etvCcyB=FqHaOHy@`wP zlFM13XB}K)~Fv>7v%Raj+{aIk*F_`u#=n7-gBg=AMf>kyHZ{)xRFA~ zLuqjNt3=IWpDCk9ytXIfV0>S=^2eo9xekgDOQ%YljK?lXL;M1K2G+xq@cR!wT;fJnzV+zs++>tfaN1$byt zy{8y7$bnoyzbyB0^k9TP*h1d*J4uepJu#nsox95|H6h!|4{I$J8{ z`_F87-z+0jbB^fkm8)PyK?Nu6>+OoOX}+G1?Y8riPUV-)uz4gTY%~Rcq^1my-zRRg zjxnUQi=J;Dx#+pp`1*tepBsfD{jv`eACH|8<2P&;SMca@nZiwT*b07uYl``wZj|oA z;6H&yZ|*3zrm0G51k3!24j(HDVXKNb(=IGh+YPLI;be0rx`C^#5}UGfd!H+$@H^Xl ze(Wq(oXjwjLHf!mh{o4%k#2D`uIfTu1zKT$U8tLEy{q>0A^)LXAtVB(-vk(Ej^=m- z@*lALC<=1CLvG={L%RxR68KRDsTsL7 z+ka8bTl?niI~t^=H>;k3Yta~-KmY%9vHuT@&kpnN_Tba-pI8n@rq=7ORYciVt;4%3 zSSndJbYvza$ZN`EX|8HZ(+%k-nLd>RLOStWyZ+&GoYP27^o?z!{ttC8KEnQpYSPtc zL*>3O){`lCVVVx9&fSvm4#&&A@-xZhyAU#@u>`c2^_-{6sXKv}Q(5yrZU zKxZ7!L;H^-8A)g^iGIR+n=|HravothnZr5HKCco;wMbxOq}RDXucav1{}Cf~na>t8 zZ+#-Vs84>HT1_&d9!CYe3f;0zy~obJlRT%oy8(^&c>3~;R_5psgS8(xYONw89R4E) z-)Ha;NN|*kUV7L5!rZ|>v*-EyNTv37e0)@gj5K6S<^Jj8WzvKH%rT{$9}uK^wlCf* zicrpZShwBCJRAoy&ymk`zaaB8eX3`v}zfy$Sveecdh>C^(9@yGG(k)niI8X9lLYdPPsT7 z3ew&gP`3PMmW?B zoxFVcd}7uBu;i!}w@BrneylCdGWk4GYC9Yhq#Lv43glV?OQJVL8KeEXHV^z}tdMH< zbHx=gwmkg(@dmE*?sD@T!sX*tT~nIks{z$>#=ka=x97NZ?PV`VQ=YGPa8Y}we_i09 z)mdZz>c;c%d9ThX0ZFTTrGGVXLJm^fIawTgqIOs=do`(GG`vvoLS)Be`JZKNhj7}m zj}jTHoeOB^@O;$XV%xj3m#c`TaVf{Ob`86Drn>nb&L9d+jVi&2+b__LbIDjY;lsIx zDT}y^@efGaBB6OO6UL*B`5(M&?r=bDzUL69xvs>^D1@quiz^tMLB9P=2}Sb=H>|c) zZ1KJ4m$nOrzxvzKK9sXk!iH3a&qc zBkV^McgtD!AuX31^x!{b{#$|eV?E0*C!333`mzManKWVZ|IPQ5#m2$|@6aF9%_Zyi zLd}R@sPflQ{lRNy{bBT#5eaBuYZsp^jJA~-sQr_7ZMXk}c;_$Uhue{q6ng*XT^*J7 zsoIP-L9UY@OGH=7l>9fwo$cAIs+^n96x68Cxg42jyuI^|Qp9fsh1IYb-UuFaElE0h zJrrY%Wby4_jtZ>Jv0|U-ZO5*-(BJpY<7GIa^>&Vr+QhT2iJ=D1SF6(B-u)ZnGnc%} zTAWubb!XpO9yNSHH8BS|+k}QtBO)W)R?0cHienz;641~sXf7_qZc0JMA3gY8cxK7t z(0(*9&sa56W3#*gl+ipbUwQ2?d3pKM5qPCCr#g05FL=mTY`0JfUZxeVUZhc<^nyX% z)oJ#u*qK{%eQ&yoy1oe&HEDQ+`lL!(mBqDD_`Tq&aFj}FOSneodjl0z6=a}$T zoAv!!LhjY205v_Ch|y z`nqXlvtiTDTL*-(K~zATN--R7K)*&FS&803%}zG9Dyn~KNR}-Ay`_8Fe%<)*Hv{6& zS}RKZA-&kn@bf9ZX{*T!dS+V z(X?epU)m6_uH(&&KR$hZ7YcL`pjb2FujI_y3ln6Y5N%pp93&k;cim0r8f;J&x%*k6 zVz>FWMs~@=W91j?9ysbTU+h1=N75Zu;!G;+07n4v)v_hn%d>`Y$oNuckZhF}pE{UT z7km=$R+v3LmxMNmh|ev`#${#tXu*)smaj9l-_)*~HvXQ=@#&dbHaXJUVbw2ZPTuh5 z6EYuDDv_7jKz#ZHuB$%*IPj#93h7mWQ`yYIkE@WB+r62g`j!xhrH(e_*W26zj2tuV z*bBm}{=E>TO&XsB?xvP21tSZCk1`j;Sv%b!$Wn*n{#4?g1ZpKyeqaY4WhQZzk_EHo zRjZnDwp)>tVyt`DlWQFUJ1eikRymA)3Ajg5(& zY_6RLg~^97btTv{YcUr+r9DODQSzbwrN7bOz@qntOqL)4fqKKap)LojHHlB($Kh3U z*uuL&)~gthZTec!o;O&GepqfGLHU0^n&KP+57UR)eb%EXew(u`i4WaV=N^qs)1pO( zk7b;eq=5SUdG(ramaRc%Hq{*1Ran#9$N}Uu9uVP1T|1mVqCf8^D`w5cQjP4o8=}%5 zwk0(s#0zTJ&50Km=(0q{?PE1rou*AdnlBq*=_LT;lZdyLh3Yj;8GTNb`A47 zLU|3dt6Ug9{ZyeJMa^43>jI5*QswNMpgTFyVpqdDFjD4dmp%J+of{X~|>t9C$oi9pB;@ zub#p}S`~^OZ4q=l&{Ki!8nNWA{Xt6;eNp=P?SD|k<>Byt+@kiOpI!TGJ=50Ls%3H) qp^|+`cR=s>M-}z|5*~w)Jo*EFj&Xn7&oL~#uJyjF%r(n1pShmuYO5ZnVWy#=pg68} z>!v;h#Ss_<#X-uWhryHgGDivEuLEBCs)`io_VaV#2Wt0Qrd|{jXF18g52)#1{!T$b z>E>W~+xxbbrlhr-3;*5wZue~X{axHaYYGZ!e@XC97aQ-p9R4oOu3nPk<+Y0)j#ULPC6?1)o=dtM^@hK3A_xf4t=9dv4lzS$jITdpo$f za**G9_nw=Nx6H+hHIVfwc{{`&Oad;HnK$koAHR_Mn^{_XL<_fm3mard__jP1N_e| ze!NRsfV?Gt*`h!8|M#ar0Ay*T1%3&WEKSG7(PI=8@)T+}6%73k%#R+)Fzg#_Tv8%( z)E>Qs;kih;9$|4$sYk2#_UTws+K0mHDtRgiFYjG@b6L+i=K52;1p63b!1P@?Y7x7B4i)u}BcZR5FNR?0Ff7H^*|Jbrf%L`y zV)W0k?}|*{j)-92X2nyVl&3gw=pX*LbxEE>@;IH_g@1neeF4xkNI3t0-}=8R@V_hY z|MLpyifyaCr2JF2gnwoDjDuDWzP|gQ%X*izrnNHNrnNBCOT0FADS}V+Z?w>N>XT2_ z5rW75e8+)j?^xVw?k-jnWmg96vXg31@~g$=V~&t=R|}z-tADFu>8mNU#G-bO-q}9& zYdcQUUNf(FZFmxL25nZ}UUd7-#U<%;Z+0k=M_+qPs=W1@XCiEe?;w$Ha^EE ztdjE^m3Sy`#!ISS=0W*_eDo~rbDQ(!ALil%6}`Cm>pS75)xO2Fmw8?mzB7Iwd0gwQ zZQDpTq-wEtV=Uik0KL={5G((PX7={ZYL8A^nal8qXCh^dbz@w+Vo;mp>uJ+xq>52a9n%ry$+WWo^4m^4dS3{eSY6{KD(c2?j4}JK*`AqEaeXD(6 z^-H7o>4eF9uMOK3UH>tgZ(M?2)0!)L+r;WR<><4VSsE?Ld~M)Ix5FpcoarQIIY~_?i64V1Y>M}sj7>BB3Rs8qXaSr!bE~z)p?s}$TegtRy9sTgLQP+_#lIZ#8Vi`= zWJ>aI45OHFJpTD<-<>m_m6m-7gGQqp&V9(FLAScy*5?7&s6 zn?|;lppy_^$GfWBbmYbV-0|pjQl&X&bqhJqPD+>ZLvIb#?b)mkISB}R&+2;ho7yqi zyyu?&ZOg1RDBI=NDd15FlZEj%Bvt;Hhx~z~&O@b5g+`4hLO0)MJm_3%(`B!(+x#fd z>QX*x+mEqg4^1C9nrJ3JH<5nB1)tdg&dC>6%dBu-6SN3BboGVbS@j?bE*r3k^+p#K zyq;eni18rnyeyt^f4uOn5^{dK@?@Ei)c`LQ+$wO6(`x}ckcKcQIQv`vR4HBaY5g%U zLQ94GUjsb(gce9z^mpbThtu(47{q(TvxU!gq{0I&Fc|EX*nME!yGIMnJ|5hWzQs^3 zzc7St`~Ky9?a_p*IychA+}3d^gm>lRA;VG#(&NRk+Ukv3-lfeH&Nduc??t z9SuM8ej1w63iA?mOKdHSr=ovuto*(pD-aoW2| zx`}0g&Fcu$d`^!&+tXj6zkHi;XR4z&?RXnDe`pEa@05;T5FAT8TPwn76e)CZw7Q(> zhVx5_uTL(u9yoITPL&VAmKi+o82T8^m=4yni_1ocDm<9xyzl3`V}smA>kp~V)DWj6 z1?iv2u68PUW!gN_Eb!9b^(RG<+zaT--c8g$pberjt*!xDWI8sU$tXMw!mvNdzAwlQ zM4aK8%(gPV8odWYi})^%%;8d|hggr#xUFx8asTO%b<=jLdIz$PDgU7$!yT{Wuz1LA z4|cS6!!;f_ja7J9=gDVNcGL#0EyopX73vtk6+>n+{UVByCdCjNva0lYe_ZSw#3qoN z>E#3YaU4{O&|h53w>TcWqUE*uLeYtYkhgwGGK~fK*!rYfi2+MsSRj}>*&{Nmxe7CNuG0YzkJ;1(eg{symsZM z4nJm}GjvpX>BAWnF(xIOap;Cousls+uxcn48v+2y6M7jN) zmFaH@I_tO&`9mcPx)Y1fmFTW_$Zk8Qs>y9m9DIBCedHavZksE&ggJVdA*Kf;eej94 z&By=^ZY9ua9%<_TLcnO(y@%271^XdWXW1FYv$ta)UnqU>z1NWs3+15uEaL7t61%HC zZL|V4ci4jFuM?NoEcA)b&P#M}hiI{V+A>Sn;2)0sIUz$F1)`F7JcIM+8K$Bxw3!Lr zTgjdn=@6ehHt@_cpTj%&uwy2vCws4Vl`tT%XWT@n+g)%|DbC|Q`b~o)G0TjvdMt=o zTXXFS{S!JqDFIrWD}TtZ;hQjsB=4u-FOL`x58VIy;c5Mb|G~dl%0s}lf1*A8;r35) zKYpkesSMrTYQElh-WvD)k-)bMSu<{jy6sV78Xc?TrY*7i&@(3ChHbWkr#>m)$%d|9 z2Olo?(}&OSyXip|2jAMIq-ymtSGTep`@@jtL{i6`yZ+niY}fm0qdB2@|EqU6{u(+Z zp5i!Z5&7`I&k?Xv_1YEb-``dTnz~I4w{(2m%=Ea^P${DFYZPxpPYkP6|NKa$@TROA z$Ma(O!t80FfYN*j{Wen7`jaT4mz7@2J}`M(WB1Pe@flI=iHga%71y3LQKwd5IckX4 zPyXu?fU_fXwL$U6*`Z`SCll!He!Olu33G1k#LH(eoY&(M?5@Gwu_kD9hkVV{d+Xz& z`|--$g1g}0q%rUspMx8sEvm~1dEeh1Jkk1D$EQhCTF2Yka4wZ0CT1Gd|2EC78zzeZ zy`YTmtbpb3IzG^rE|z{pEA8cUTY7r!`cMn}lH6?015I!q657jp{nN6GfW9V1tU8*q zWsmn4o5eyBoPxfUx(weK*!Lh?A)q(fzC31M98qr*U%xlk4YwUCbwEEN;`>meWuH{| zzOjhd-;wm4x3BV^yC>2i3nd*gD{Hhhn-XAnVq7<~1e;V3|JK>Ivbl*HaI0NQc6;4) zgRea9;|aN)&RuSq+Bo=ZMNAwgtB&VDTCx4@bA?A<^$jO0f@E%#3bV;G zgfek%2u65`7OrY1ROMSkMy2g#e6sMH?S6?I_!!q2 zt^CEHRi>`G&`@@>)Vm~YRI6~;viCIynO>7hS*6uc!_oco?(YsPR;#2k3I6h93olS& zom(!X)RkNe5g#~t>Bi5~Nn5na4@Xi6Sl7iz(*a+fUQP4r zkSV{vhc72Zvbb^uu*&Ft|7uc4N)nSl9h`e)T zdWe1Nj!hwDxgtXsoUFXW=JX%i1@844=%j}3OtQmOdXAT)Fu0-P2E6N1ixrn#2$pN( zg}tduusa1j9J2y6lph+YYx^ZLpzDM6-Wj=bdwUyo>12InGuAJwJ@D8+mAd3yP5?nX zig6)HT8LdS2!N)AV3yI%rW~ow>aqQ(XNmb-- zeaoE`FlO`Ca>+vgw|NiKsB}Vx{ut!L%!^eY`%paddbHKfMUR$XDk>lT)@-@DR+t-Z z%hu!t$6f>2XNT1kNSxfBzt`T@Ov*84xsaMJQO)oMO}IgUHvdXwd&_civ$Qb}o|$#1 zyfOo|Ty!l#lMVSYhY%@y#L$7$Kw_b^$Gn__W+$n@y>^)}MF>n<3>qc-TZYuv)ey5p zPRVKh+STqmVj^n&*O_O)N{d?UEvMwAz}dWHLTQ(F^d4Q!#B|7PWT4&|J1Thv=Mr^D zbZ2;wIai|@E&VTaZCtWoH&k2Eup;SW5nN0|zLA%HLM#YM&G*BEd5I{ zw=A)v#jz6j!n|dimSZ@=_?YA1NdZTf7Z62-yVyiM-lyg~2YDuBJ-&YE{a7(W(=co6 zW1pI~NcOq;)LS$&?uJ1QS^ACoOnYT-XH@GYiySRbr(A+1KjU${>rbrRNSpNuu9?L` z2J_Fl__+D)xQQ&(qY{CWGEkEDz_J;i?_0jy(h+3E zDYsM1+2uQZ?At8@bs0Tv?yTDzBVNyjL-#jA_h%p%KBk-sWR*1T4dhLSoxP82JtbEr z)TR`!ZIq$gve?33eJw9(+Js@oB7cD^aYLz|6^q;krtv^r9)7`f z0G;V(xYEGl=Kt7^xzI2N#}SnbFU`Th48t6m3QHR*&yMZ{Pp8_A#BTPwwAzFS+qSyb zf9h;&?+v&CwDlU?=?zN0lo)A;I3-;3JQosxyr^^`<-rNr&%>U#x8J8+lx-?|m`}T= z#@c3}Z~;pfGzU*)V4(Su393 z@L@d;H!`t#+LUnB&YTl28(Z!icSWC2qKhB2&)Cd^kWk&R-TZcD-rD$~SN#tMlNF_m zo4sP&eTS7M8RMHGsAyZCg{jo*jVWi z6sg8~d3Y`K*aIB55)g6=9+M4~y|3n`lNM#D7#C|+wA}P`IcUVb{74JG zZp7Q|Y=E+R^F%)nf0$_DH##MBBr)*PV@7q}Z2p%PV5RBOZ*wP@yVp})@kYz;e1Ffh zkp#&?h~BGNT^yAn$o3||zvidA^7u_C?p9r;hN%+F?Hknt&|O_Ok~ZEv*EBSpPBwFG zlBl>$i_I}qCpyy%pqxuR*-aRV6t`8lH-wX#cP!)YLxqanRzqZG7}|O^ZZph0lTBw2 z{;n{NPdsL@zkqkWlrl~Yuw|Z9ozSafMhJ&T@Tqj2Xxv}GqY|KN*Tb@p4{G_4Sd)fW1j%r!MBv*4~zE}p$(6v63wv`S!qFJKTme*>R>QzT> z`fLShUH01XngFkIHb)289lPErPHa*+}hoLlL zV}F+zT3hVjx2!`%O?l*=T`q z(gi~`rESe^wQXI8QKNwR6iL$-{da1`O}&k7vklJnnI%l8raxkgsMjO`i|L4yNgtO>a^JT$&YD@*7N8y<6v5cL!hYNJBEAiOZH2^XqM znGNbL$F1=Rca+t{MxhgM$LNse{vS@M-por1?7i;at#&nu*;r42XM3kh)O56SHY0`+ zdV|X#5?9xJm4}pJ^6{#X;7XD}$Wk=tOmUUKl98ZMXeZ8l2GX8tmoKqS3>LPit655Sbv%&! z&bVgZo=^Qir!!@j|I4zosIY5&4WMFuPSC&t-daCb`M9N=5p#fjg{{Cy;h<1O=`!!H+JD~fg5b(-Lo z8N7Cu8dKD}P#x!>SMXj98v&SkwyJhcYFE$N=Jmp+kYH$?KWOs6)Yv=gDk8^ta9o2N z+vn94ay?09{uC2QxVY^b4WkI7ww>;bu*FDEo)2MiQeGfn1Jl*SEK zE3@ae(L#0W1Vam00AjM;v!;j`HM*c!$M`FsCa-mdL)1@eIM4aSAHkdQ zm7M-+Txrq0TLX@jHJXTG@5vQ?-X?!)c$+Z+!Bxnd-LN5<7$qSRx;8h@OXF-Aw1BmV z=v?yXQ#7o1wCbvX@@WT8=CkC7mG6C3kQqJH?+ZWtvQOP8+zwkM1JHmrOz5tYpMjiD zMrR2b%W0P+Tq6$S?aH2OOg6+?I9ZHKM#fQ(SEyGKoR;#Mk8Ts|>fBC?%_9o3mT?8@ zv?-Q>2(hD?lHslmS$<;y3Zbzq>al5(+2+{<$x~@{Bx93ikI^S|46Ig?i@kDXahFDK z4G7xMe^0`@k8Sl!jlM>qtC}rny=88ZHZqC9jhdN~qNC2EL6UL&RkIg~{zHDDrE}$z z%5xYW>Bi`i3tax@V=(XHAzKS!eX}?4c9VfFGN?s1AfZ6oHEP#u3+WowZw%Ov->{V` zdah+ysa9VE{M<^zlafO!P%&Mr?&j7nyw(jdOUL;|V+cmvaW%EKK}_0G25Y8JAKn29 zQ10Bre$hQAt8EC(hLNxle^dO;3!cfHC8{YbGflHnGA#yXR6R@srt6t zJlV5BM)A%0G_r&heG7`&Z8oZ(VRoSdW$6>NLr~(vAJ!n})1huEMP8wk#p#JOWAJsW z^IhANU60RuKGAcW^ly>S!X&e(=~o~bN(2h?IN~5PAEQYRjUnIsyQ+BoVd({GQKYNJ z(FA`AgJA@ReSE(~?o@{Fkb`N#W5YXK^tyC>SDDe?F^)c`-IOu1QAz5O2sM$q=3v7> zS1Tu5_yZ$HZK?^OBwdIrtDGc%urjwiln*L`);EvjQP`Y@@8(;NG1B*InPVe#{PI4F zbzFX5I*QAI=NnOoI5>J?n|GUZLcT)qPLR=xxD>FD9|t6w2)g(o%^sddKJhF|P}1u4 zMz)}$fT<*t=GNu+C*-ar$yzl=2?=dRkuvD0;-Hh;k%;(k{f}22Qhl4b3o80Cn7X~d zR8wwvb^8ll6QYz#l{$*^%*vsk8^%Q$e&3Hhnj%8$<_~8S&pkm)QZdti zfvCjB=z@i$wTnfCp)r?Tx{&ETOY&vYe(w$(sbC3S3fQK+#H<=_z1EVZMZMOA>`ksr z4%sE83Iu<8G@TEdU#TT1x+OXJ7d}dai1AXSY3bQe!G{O3G4!N>8MB$F^U3HE-QY@^ zp-9&eRL33oq_N-}fWRMYKnm%{atY4Crnv)EDqKgCE+1uWQ##;n)kVse%BS7Ejfisc z&Q0TrMMFAmX%tl(!}YHcn3)9dmKO{T)1gf~u=7>~VVN}nhm#P7st^z#UqwnM>_WB* z3fspucFYWY?QOE;9Jkm6@E0!_Uf_~-ojq^DA!$he&KTb5ug7?t{-=9SalrcMO)vT% zc_Df1*(V!~8CBQS+$7=J32iIAxO)Z#v{>YE?0KWQ4jeAjUB((OKd#A_5Y58PjaPrX%5Fn|i1|zI_Qr=+SP~@M2*}tBQ>- z-QBO$&ZOjzi|qlpET-F|ZnaxuY|l^BrQ5Iyx25_%F;a5)P8p9dmr^9=RW+J^`HjBO zz^tO^R)ZMsg%PbORc_Gw{G8O~*2v>5HShDn%`q3BHU}B<_+VjV^da?))kf596rL=e z%3$qqn#vcf#!1^e&+VnH!lcqH73`_1CzS_6*EY0&d5%#QAUX4f{eZLFA4Z|i{M`WM zg`ro@kgv);_gBu*h(C@*$>~`&-=^xhs*{!B+eP!+Cd9b#(LBJ&S~O9zSCUF3EWg3! z@02MM7HCVanU!4|HmzLhTYdu)ViHwwZ(NEJ|_jrSoYFr4QF-e&(2uL zmW$5S#J`%-&P-WXE38|d$qpS$U%cIestyNuUqL%h!Hh+fcT=FG#meJjOkXqk6CH9p zpCutHR2WOt%1m#13qCBl@`~V;Oi0=gL4FUfnm!-=y?)HNm;vWr9fsejz5G<9#l0n!EZ8$WlO+{utg#|%m?0CLmSiz$ZKep#DVt-%$4yYi7@plQF^WO-@$(-GRdNBmF^m_94b|#)O_% zyF5;^OQ#*V3!5)hQJ+^=V9VBtAIPPsb=}$KiM*U@XP==cM^q$a-rpSc8_)Ei_0M#w zEKM^ZRFc6c=ixHGCL~iPBYwY@CmNz$N5Z3Qj4$+-`{uIq8hOO>dnX7nah(m+JTQb`JYI4SN&wdO^aT*g` zX5R&S6}@Kswf94=yqAcz(fxB$zRs&9y^{cEvuk+9P9!0WH^jGg;tU)WYJR1NX$*m6 zhA*Borv0Hgfy) zu~4ATx=OzEd@#3LALG5MbX2ZaVa+b)G07yQHlk~;o#4Eo!zHX3P+nioCc0JAQR2-1kD0KSy&DMV9EK4P?d=}{BKRzEfZ` zR%XbhmE#70tR8`%%_BY}Ekb8=A15=jg}!mtaKcal%S~4;2tht>FdEHYdNb!72n875TiUYx5vmu~Q|9(J4t>)-$;W;^x4$#Z zuZknBTaM?>DV;Ng6SE(lo*8XJY?2&IaU3wnD4Vb2q%;jO<1mVL{9?Jr*_qW`$YpJfykr5XMSIcLu4RsBQ`-1uUN?SF# z1)(d)j?=m*aWOdB%c?~(wgjYnfYnT9cs6rSV+Sl-UeEK2-CJUzm)Q#63?U;Z;CP69OF4&kjuR$^Tv-jw&?cZ$tix_%^A7I(e@8 z4Y4eCz|sD+Y4+5LadIR6gSctT%a*OY!N)@AojILQ&+$~H>|LaI&g8|tUGo*g$fH)q z9JMJB+&UJ+w6On7Zcp6D9Wg^1%j@qV^;E@yywfq38?6tk*0@El3;awOhCA}evv>7* zH@+GAk$8D+CNJiySiFklIXMavXA_HEu~^-@QpYYlujBCrCk1%e{n>_>5FbMoy-pd6 ziCS2{fd(`bY!~BG9^i!K{=wsKa$P&u?!o*$= zrSMDuoxh4pPMHzit!bgv|1EVEg5dDV*U!Q=3%5z7Zzv7aSNZJv)JYgOMs^rRza>%a$xE`NV;p)Tx|aB z5p7$W*!cW{rGgW}dq^!R({50Y-U@@&KM|#n81?nFkAWZi|BXa28 z0>BWhgWGHVAB*xlA*}oRmU^L^_ISl09qrm=VX{~*$KBEx;L}M5Lhv5IT8@P|a`E}u z%ff9T;R3y|wHV=ccZ3J8`*j;zzM#=k1K#m_e7O`Q#M>Y}#w2W;m;CVA^5yGcX$xOeL?h05~)Rh~qnM@2I)FVtxLnKay;r0RipfLfsZU?i`5P3xs#{VNcO)fEz_UqXVs7e^|4R}lh=#VcuI zZmwvM5$o0S3PG2Q)Zb87Xr~ubhmN(j4LUiRU_AK)MjOr*FoI4E^Lc4i)o> zcib>9{*oPtM+gyWz322(&g#$HG*on`RN9SJ?U}poeg0AR@dEAi#mWd@+-geNmGMSH zE@-H-p&@mE(Z&k@D6Xh*BQaTOtfRoqRNVmMWQ)H}WWm55KZ={tWe$oMpk_V>0O7|}HBY7mZc$G&DR z-As+S;c^VYEx0~{Y8t3scc@_j%D!Nnq7H0?#Rku4wO@zL3(W^Vv%>U66ydWGqoPGW zF_Fs*-{BKIrRoWV6j;*XR=X~(Ms9*08I=`$Z*YiS=9z?jk6|G4duTREi&6*^A zYme3t$hSbJ$mrU^&`RZNA`@sjV=o$_^Fj`sO%@KI^KE zf}BcoPr_CnP zJB%j^c~K!CIz)===xC`!k7t|uu7;I&m$J&PYTX`*0Kn_4>n$k)8|?-MmV*{68+TR< z5J#!sS4T-f z?_+Q_Vf@w6l;j{lqA?KIkpIuO| zJ};@7@M>l{NKJMf@H3VQ&HmQj@x`zM2T!+|@Ij!(1%1pH)me()MG8z+L_7h^WXbpQpG9>E zc^*iW^g;TS1ltL8Dk|>YAxP`y$Y#T@Wch8astE}gH89DA1gGi|dJ)zk>uP>@Os{3f;=Cr8WnDqFa$Ulj1#=y_Rj`|dd|1cFdDFH~Ro?W>2c@N+vs(H!V zOMq&4{IHbwr547W<9s5CmfOGtc_+LobJogKE#!2@i#rCKqWQcTA&uxci(k==7NlA= zE49}vL|NP=z?+ZmZf|U~%SlECGaUd1!Y-1I6`#p3tGgS93bnRkk_jwzb8ooYeC2z$ zhCo0g>WO@*>kTpc&Ue){m-)tO&Ky46|6zsyHG)Ct_5k4sdvK?e8&$NomJ~p^YKgp4 zd+ht0s>GndW18IpKEWY)eh&KA-4fq;!C5sDyv{F9mcTc9FO=fMzdpHR)tM@LiDoOP z3AiG>@x>f7GG5cILIik6ETAE1Opz7&V|dG61(0V}rS zmmMB+W1)pcxPHEI<(b>h*ko$n8p%tiOe*lTCDy((QMBw^t66ceRzBXd);Hjn&ts9{ z({Jkf#mjVFp4eCL)8W$nSiqSWvpB_VFy+)Q6hmeR(X$|oH9i&7;3 zv7;ust1@%Z3w;uoQb_q2^HA0fI@zCi7k5pO<`ePrn>-FNkIo#0v9)eD8k%43NCI$v z2VT`SmHmMm=zgG7uR|FI;b1v0ATq6KqAeS~u)7BNn%BwsQ);-R^kj^1iE9$dxCSQ+ z#~#CeUB^B-Cv^Xb9uNMqa=5FN-gBwmffBbJMW(Zksp(phmTM^)9cPz~T=LYZaV*I~ zdUDmcNl}CSuS+g8v6z08j_lU`;MG!$kJ2X4WY7H0VzTivJ;YQ5q90 zaAhflimA!88T@qQeekNn1k0l-lD-A8k4yhs*$GlgZtL%#!Hc_11 zZZNXy&h7UHPIU#7ly8w+NzD5jE<@j3XFqO#X%W9Gp{i>&ffI5W5;>#(EL9T6b7p5} zM()I7W3LCQ#fxfEvG@rzT*THwtNYA-B()oSxMM*Sm6(k7h8))+0aU^M5E{;&GG=AzJ2_;Sd=O{*@C z6tsI$Vqg7-b7&KrXVFn?B}Clz;cc%IMj~C348Luhwu$-9fTdfvdHFro%pmd+FN24-$UxP`^HDl|`iBNlT zioKL?cCsBubYNDsXxIG}c%)s{Ex+P&Y&hbFRr<)+nXko?m+YAvqR24sk4F@k6VAd# zj9XB^W(IY6pHUY}?mcb0hVL8m5YeqKc7qbk>i4J~oLLM|;>+%hxJ3U=JJYiM-R{P? zoR4n941PuXK$d^k^6we^Jzar{iHI0P8pA__G&}4oW0D!1vp=@P%(aqJmOv9kCL<3p zOJ`j%zyN<(7U)W7$1ud4fJ}ctTEU(~zh*$cVnTf^SyzjP<&<<++Zo{di=BGqh-Y-9 zzhut;Zb%0(evNuucZw(H0UPW_ZoeF$t@EC7Ugf{m7!DDT*n-FrHaja{+jm_Yu(FnJ zLyVc^K#J=o2O>5ry6}CkR``eVm-EGpB>k;S4yDz(?roE9_pbrH(;jV{+dB!!!UVJ` zuJx2a&G!h}=|>Sa;kJp>k0^(lUoF7}piOH!nUcu36mlSTCGmwqw(1f^Abz!LCC$I+ z=5R8>`c1lfZS&fvCa5I5OO7;~S38QicKh|a)-0J}E+^F*`rkI77n%L+X^r;{y*%s@IQrb?C8bmghC_zZsGJp_aD9Q(;=z+aQ+sFEbg^t1jrsLe;u zhwdybUP&Nfjx%7t8OcLJtJGel8>=T2!_rM?ZhU-@i3{M)@dGYPGH>QhBgpas6s zevl@Em+PgLo)wZ!x+xY}QC&G_Zm3~y~KJ~cU z*>0%6M_a0msv4>wr-icCgsvcAH^tH|&G?LywB#2ZpT6qv%!Ngr12h#H{mA?l-%WCI zjm(HY^59F=g`%)R()ocKTt|^K+;b2W7m&!VW(L?`^o{S!EdtN7rdJDDNMwd~O#5Un zjNrlbahtm!<8&)NXHwZz(6!=Aq!5Fq;g@e@G#kZbFgB^)h(W(VUdts|dz*8;SFBoI z!-S4<$tNOuKUj12+^`E|Cr2qZlIqL*xHu_`=wOEaRhi!ypYSIVIU!;esISVSFU+B9 zefe(VxJ55hQ)v-R2H~VQbH0a8`dw>4;bQ#wvRa0j%^Smc&d{hez`NjLj!)^CJ z{-9dq+&<)O@O^lEuZ&i(K%?MXKq1xBJ@2>N>9C1Z(oj!Bz%q;1Q(3M#2<+Rv5^h{Nc2G0BrD7&42t9n(iq?~=fgMPqxvg1of!=Ek zF)3&<)IdzLJ^)_Z2It!J1?-hH;88Tz%cCK5IvL_!yPOu0JYGBF`@7kwo#)W}GIYx4R^SK6yzn9?QA0k^$*-U;q&@-fBV0n`*oth5SG zKG3K&h9{i5q;d}1{4Ax=CJpj7seHEL(=;=pw;6dS@fA?FM!$WiXf`y*G_$cVNZc%b zkTf*N*l84P0^QfOeeBQukjQD5;SuRqi$ymNO`tMTUunc{mKi@DK(2ZZ+N4yft8YQ9 zn8*ZQwC4+PM z!ahbqQ26QKfT)O@qI?T?8rHu$eCgX&D70A`z^#i8bj$Tr1usQg9b3PP z?j0JmRq9;Q%ksw3$Xf+3eM~j3?%nQNyUmR~6%Z2s)MJzy=&_aXq`SV3oFLK5!?W@j zLsnjZ)_Q+8Q$+17!}$7Hk|11vzYh~)bOE&mM6AD%xZn?LtVXbIKW1$ZMG;sIOCVD?h*o_yY0{ORMRp z=GHVme@P!hHlmsVGw`tQ9ie+(Dg31+TYVA6d~x2|-}HNFAP8-SC+&vF$$*q+*G|YX zVsQpl>%B1fH<)=)0%5!P{R^(V`52iDUCovl@SaY0`XM>9uwm@}64Eorob5tIZPD1r zvlPXP{xnC@XML8$aW8A^-G_>^`{tNsZ+Xs_WY+KlhJ!se&0Ik2kfoiqK^uq6DuWWu zC{=9rb&<&Pg}E7Y5K4Z2yW1g>Tw^wWrN2n=YM7v3%My@O{2Mk;un5` z=$J5w8;^gHtZQGIG8BsJn0(~`?<+`m)x_)Uj$fHu>8ZZRbQmdM2!CK0idcfVh%Z)5 zetqyZq}cl_&4*^i-7h?`C2j#YCf-l5vwiAjh|-7n_xx&0aDL;u&*a!uD8T*vDB$BO zt_>HI)~M66^0cLidhxZ;UydSCnnWjn+qnTPbEg z()ZiI_whDXretYqtB3Aac0k+L5Jj@peOK@0Uku7rr8zWH#lN*|f-<6#$_8$4%2_#! zFoj{x@x9&cbWlAsbbq*{hNrz+bNk}x_9fLg5xkjjFS~!0Lr-l1l&u3c;gpL>em!XH z*@c||wO?M)u*QK4^tw-;rH~+v6+A43AIl=>=Bivf_l)Eg@Y%9+KG%Ir5gn+nx0?ne zlB!q(voKr6Woo#7v%`+uDC0Q9xR|%JZMsneAxUIcd1eFvE9$if=u3j1D3nngm`96u zLOyj42U2?I*FL$eSxb7JGKLaI`lkd3amZu4dBLUR!xd2zXBLwlRK3_i1WJ zZ{r#$5xOQU1$CN#mc=6PK`_62~xm5W)c;{6)!1C5?^+eMBUl*^Msid{p6 z7^rfL5Lyfv9v&98e4mM5RW><7haAZeLCV=zhO&Q!`n|pE0S<`%_yM>iv3uT1>hFDe z%)(FZa@)hagOUQgZtZp=I&4Sf4ZmxM$+je1xzV#{I0P9LHv{wqJ&n_RCStvbC3*+U zB}$zJ+No}PKe&+s+d}T!wwZ}N#>!NVU0&z2A|S&^KqZW3uftt$BC*dPV#r5!(i3o& zRy@YI$FW)jP5DhtUD-`1-sfm+9?loL;23~(=`C|$ceFA#?DCxM$aYJlGdSRosY6rx z$)w(HcP`QsgpSLe{6-J!;@G#VKV=>nI&5TkyEYJu%E)n<8^Bn7oxJqa%O=B5tv?Uh z*yVwo6vbbH3|LwCP0l!Vul36I_?QncQ!#t<^(+y29b=2bv}#7&-ye4kEub^uF^1aC zl8Oi^>)8=~5RrOIB{b!g6kwWYrab zf#$-xS(+=#lOrrOB3Q(O;3Q!6BG>@FAE}Gn>Xq`np@nf8GTU{SznrRgd{QI7Y>j!_OHEmp(A5J3!Z5rtmA#eDW4Q zC`_+pu>P|!@UK1P4^S6IGg&Rc``2R`+4Ei5fOmO@ecsaKeu~5xmC&!MlEP^+zjTZ4 z{@M@o`;!9s$IQ8m!$B#HzZi7>^BWcD4LgJ0j#ZhB5rggxf4~Q|HiJeQ{JBK&w7pug zJpW-p6qJuHbr$Hb6PsgL#M+%ltNMQDgQ}@7fI6J?tJ%TbUMxhov1Qn;&INcMGCiQSLD>KMogNUc4InE) zsgw$LtsJPX+YqyEI3^5OFPq;uG1Y!VSCyD^ddq2lskChZK*m+Cd)I!VK>xNNg5fH$ z%&9#(a--6qa%xNz=&6p@L~etTY`=%c4w_*dZTRbUJQ?TP1$o?lao>-AI5c}n^6+mO zfB(`#S;{<>SRsZNlGUJY_V;uT^_r3e^x580ox5M8{+@Y6c|v`K%oSmw|z z&}Lm}a|`J?c3!*_&_DJC0U|9hU@qmi=&TZV-hobPUeKVNzg(x~QPYPXz^iUAWpDxy zezc?E&!Li~=;PH+K&>FEJdO-fwdEk64EcKM^C6pGrpYyEAbRnID)Dk|0QmGwae=-& zs&0;qQyya$;p!5JdH73`pIheix26jFIV1wt=M{T`-@nUP;Q>l8JP|L(`PyH5$c{G*V~uquqQ9pnQGSEovT zVSj)G(>;+=8Qo?z%P&X$J+i)=gy01*8eI_|GJ4k=D<-0C}{1N zt6o21(ZApE-y26+0os#132lwYNDJZuXI8J6-=}i9$c>ZG-`dR=8sTT1?i zPyhb?WnQ3!>atpo|5L-0T!9BDyOcwZQvA)j{M$&rnSe=!$fUCTRLTFbi|#7WUeS9e zDgJSwCqH4pq;>*@#Qs$v{$|`cvtUOVcP~)!-KyMP+w zCn-@-{Nwk*_8)?+5Sm7QiuB)%@rN|S7(q<1`j~>^pT8eM7Sn-*%6}G9WpLw3)$EA_ z6#xAF9h#H>Zv|C;G?*IDQ2R8qqz6I0VLut7sULI&&_!#8L=PEuF0!cZ_a=^wf=Yv7 za$%>G){V)Im)GBut&J@x9(N6S%obRPu&8z}f&zzbnhcCB43~=ySOyjQRsO{e{l~-b zNh@b0+@30m9TdOrK1PVU!(}eQ8)HEj(=z8&iM3t>joL=qf0P-D@MYRW z-jlnaXe0eGd&uV^vvU5)v?9yeA#w@3t2y>2nb>U+eV+=}7q#{DYL*}fH$*^Yv^3r1 z#w2gWPI=vKTQ9hRV8nki)>T$))H@+ye|N*8g#a{nVo`R)G+7BBVe`*nx_<9wlY7>` z9ofJ8@tbOn^BDf&ibY*WZRJY4l*OH`*YD|s3o<;_hqxO7O{u}9`_oghK8Fa$bcB7LhTX-7Jd&Lk`C7Dg3;@RHa zYKv3zgKyh_i5p#UZZNUx`8^bSMk)v^k0M1I>rL!{;t2N+hKI(2X+QDd|KSuq4-L;S z@Dg+W+!;&^8rLXQ0hbzh?#eiQJ8x@fr62Vo5(9{Wfw@GQCgj z_%$)d-cM49NC4H1!aLi2gSCRl>2z>K#&07ai@~`fxBl5W7UC)g#88F%od8I7ZUllW zKU6}QRsQs8|MMBc$iol80A~YI(w{rw2FiFw#L1q+U>t_JLwZHChg`x7YWMO2$mbj+ z_CD`?yDI15P^B8nY%_#)vIBuQoiMnZU1bb8HOKnfG@#cUA+(Y6AF;%1Lp?>WgrVlq!x=(o-A&N!tM9%ZaqU;M8Aa zItjV8JzX#OU#OFLDjqbGdvn#R!!QO8aF<0;b9DpA>QuZ^_C^^Ot5d-eVyql(*FS=a zs8+YoZ8P%iHK~AqIN&}1xl0oeW@7UbNA&{TsbF^P84iF-#+0pWX=QR>E8-O|gn$l; zkAuJ`7plIN@K}W8UaP5^$9zJ4-eB40l{}4XytIu9oG-BF8?<_Hb1krlf4lgzTpcVg zYL`LH@L40ir0!zdJ-Lq0R$VW2W!MI-`mE)YotpNdE+Yr5eFoh=?rkx!8qH&F!2+d? zQ+62~&Ta=Z>_x>r#4ml?L$YmiLp0V+F6*6pmehT*3e?1*r{9 zY#=B`Z3WuKxFiy!dBp(TB6NgsDNY=A-@j~9^^A6^&_`*I14kRJiPdej*}vA)a7v=0 zJqxPWG@O$_Iy>dq>`)wlQxAiL#bObPrgC*6N+~$M+&QcZGOwOM?=9a^-V6P+vFSdMM00 zHw-*{CbRz16ziBIG5ylIRpAJ$p^$(KxMd-(a=xUc70`2C8sb<9V@~6^S{CDS*Y;!Z zpmh8aWV0o;=gg#Pf%i-k=Ph>$}`ZncAK z*>A=SI3V?S2-dN(AJp{t$NsbT2$V|3!LBLOqKJ0@F!%dvdYLu!`$YZIX{I&CO#AzP zfDC^lRe$lzKB?#U8e0ns{j;v7pkxAptsTWzm;Yj8=E+TGK7Rhs)`mVqzBp4Vb`Z-; zSVQg(+C9zpn6ya?P1|r9oGQIt+1)!@#@Drcj+*Sd=8$L&)c}JtJC^T1&nCx_svm9P zM7~c&c;_Rd*85nN(Pn4o{t-=<8zat+W6~9#YakY^mECzq1*k8B&Se8AJ7eFW;owqY zh^^`_iL*!ab4`4x}>`X7wc)p?E0wt>OFqo{$Yw@97KWu? zl8DyaD5Y-erDuN!TtUmjPY>WLdsW73*za+>u8l@)oqNCB?$+m$5K_0TpY}0HkLSSl z$6goK2}ivSgK!){s3$Pj0>16l>JN?41s8#izpM}iUx7AE9S^h3cE5gH`TNl4@C_Vx zVR^5^_RaGbMUMytQYX5NI$1Xge)$VfYjlD$;d=)L+&8)V z+B)ObxR7*(=G&9Jx9eoh)e6e!FXxGj6_aJ}E5=j5+J9O>e?uR*ZtckqOZ%}egeB}r z_)TRQkA*z>lneMD=BKTg$gA{#w$N~NvhV*~5mj^%K>lvz@4kDOkaFYGK4 z9d^2*=pOV_Q2fp3d(NkQGH^tD$0TweCxYG3qaA=KZ;|s^L;*waWqSE!IKqKGy?R zhGQ4N8#AEv3pSTRhIDa;%SN1phV@3i5jsu|Ph3t5kLoVQv^uIoaGRmJ?6bbp^39XA z*%fv9ZZJ(t=4W*jL+PZ;H{XNK(0NgTT}jenhWCGM++b>iTm=OmCL!B}|4iQ)mq*>e zQsV;Ockfi!2;*Z%M3Ql>)`Y$|YNq(#FA2|}D?DVTIVzbC@+D%rSoeS#3W~p-Qf6Jx_ zYsxG^yALklfH8rgtacL|zg^H8qxj8J(UlnL4$KLHCUC9G_tCwUwSKDSX7KWlF4>D? zI68>Mc><-d_R8`tSWE}z)MaA#1kND+f`$?gV(%~^QS=pj@SGU}Xv>*=@TPH?_+4~l z{5b+BU+3$#6kMi<&ruM94mGcvgT|p;Udh-y)O9CJQ?wJl&dp4LF zhs{*h=ZcQ_>++mw!pb}Jz0!=2zIv@bacW@zZ=4ACp{n>(-bBY+*O20kD)Zv~FuUR{M z+pR?U))aB#*g4K$1fL_2Y;Apk>U7RC3icn&YKaQn$BvM{(=WM#BG3NM z9Z-~tRS2pQ=#0m`9r3?bK3w}jTeQfs= zYsOt%S}jO-R}Is!w0H47v618$_xOKDbcFSTEZMLw#4ic2t*pafq|}xNeU_1jvwpKZ zAO9v#1da|&*ljP;Tnz%5TjaFUCYAK@-K%qKhrWG~dx^d`8}wVTc`esH>pKtzGU72b zT?Z?~B5um*J5=Wvtd>dW=Zwf8*&t}mhZ+|USqHo>zv2_5ryY)CfkZr2`)Ur%{C6U! zEL`PoQ;Cb=@i{KG(4KfIIh?6ni7jZmBs5ZMsy3T_J3kj|8(g9QZK{; ze7fJ!0sm4^dG(?eUE1_n)kX+$u5h**X9HY7KbF?cT3vmiU%umJy1l-LoXV*T)o`!x z?!bQ8&O8e!gYiZY%Kv@kRWTUO#93aAw1#s(pQ-Ay@PTy0sv=5mbj)hao#2N~#B?+X zy?B)GdXx}6`gTABL5aK2c!@f>1_2e)oUiktQkEJ+tkBK2HmP^`dPPx*A4vU>Zk{}K zvhR^{@919HIDD^yc=!=iKJ!vKuzlU+Np4dIOgfg*->7D}D%c*=I*nAe5B9UAw^`eG z30{~#KKBFnQyG3i=#1sh8nuT=) z1G*;eQ5iem%SIapkK6 zhdE|YMTy4KeB_v*+5_{U)gEJPv9mWdEoI7U^*KP$NIiDBOan$DG-nde?Z;^D;5_Z)XXO%T3E{?hIm3Ie z%Gbl5ew&l*m^^QqH!ikf!)6@W@nkv;CA`+y)~JE!5{BF_IiobF>ak+mK55snKZNEd*hflG|2Yx+6vTk2+B z5$C!3JX2eM=`>NXZMLsTHJpUfbNw&8S2T!sM-{;yqKr4Xw*}nAqT2CH8>Xq?u<;RP zhHPIm#NvVMrMCY(@IHvuxaR0#Yt3oz%ZOrwED&s<^ZPIasTog+tU=_IlOM4=b(~zO zbnie#V^M~wDG;EtIynfxt?Vu|P^RTaX2o?-QEx z=24lOSXb)qemg%8-gCmV0{0>vP1FRI$Ip#xQ0ZO%!`q+6A9WNwNP50hH*fECM0hK5 zH|EjBE|FSTzojxZe{(m4`3G;g$E@PTL|buJeSGaS*q!%ZgdxRsb{EPvcz0(q40hfQ zW7Sg)?ERf)5Zmf2=$I|7%+=sc_gN*P6LEv#*! z^cJ2qp!QuB?^W>l2HQVj)R1z_0)ytkX~c;&QnkdeCY2;^=A0PV_tVlO7{5bd2<&~` z0~EZuy<05s)bRx1;bm#U+%Ewd=jdE>#;O{XOGWw_W{-hXt(j=u9-nPd0h=LQ!6yXm(F$ZXIBT8!ho`9M%THzLHeTHyrv<2ck1Em08SVB`nP!c(w-A)0fP^g=vYvMcQWFG*2KX+S08&zu5Cr5~?++lg?-H z!ZAP18TEIhpk?Q(^Q_{V5}9V*7}nXo3>^8&Ib1SOHgN8S)XC*%X*-8bc_QG={rc3hYq<(znGY(Q zm=OxIPV9QZCHLiv7hpGr0I_WQqqauQ3^D7p^Z@WpSCH044_B$e=_kq5%;`n@k5dSCX8{j{qcVsF9%T|BU+O z-VkUM%B(;Z0PC2m%{NmZon`@)QJJaE(^O}LCjN|_-ctou7&A2n+_g^uBi()-yIhq)lhH8D&vgz*6*Ojw0$775{7!$;mu9^ z>_!oMWQL&Z12`&6IC=&plz(_Z$qp7Zx7e#CA2<+n8KB}CD@_}Evdg3O+j4xPb&2XK zXZDwMhLt>Dsr47#p6uKJ<6i+lB)*%^2uzAPQ)qZG`Mu|%;#y#M=FD1C@-{WG@>sQ; zFM3Z6x!Amd^^u|_#>jGJ$Ik-QY$(BV`Nizbl{WL~l7Dj&aXgPy7LNm}we8$`7V2Wo z`IfgVdf&yOJRx!oR(=?{8ZsIz!;J@_Twz{HUzU7Kzj$1+7~1&>L4DZjzOsjrvy=P9 zZWP)x-V*b-j1XMB84|`>7xwl(e-p&yB33`6n))qwCs-5vh9xXx=xJhM<(p)sBKHIUV z)?2R3mt1`7>VcRLtEQ|RmADHvXgUPNm%X0d_*R`*??i6Fh^E_(|KItPTq1^=aXEa- z+2n7H<@G4`d)cM1WY7!m^qM#I%mbH=MynJnSc~N9AIl9Qcz~k^cydq*F`0j}3P5IZ z+<$@rDo9oglhklY)>+SZVq>VervH^)Y)mv-J8A}v(Ynpdl7k7=z%sm{w*X1MY=e0fqGFf3@72CB&y7rJ*$ z((ant`9jxj(IF>3vy(9En=#nWT!CLjsa@*E3!xr{=@wS&2=^Sm^TxvbqzUy5PSJij zXWgD4_uO%-x2*?Tv(mhP%c9;l~LdEz!8YE;R`PCPNJ)|;pBk!db;KmU9)cQzn% z{&%X7M-RMxGGceL01J}sh&V6g$>U0{-m~5X<)D;A8CypOw|JLr&<86kHkvp4wO#N# z5Ska@EN*vocZ;&QT|%(5j6UuR`eYlbQ{-PvOjOfi(K0MuaaDOz8nxelrH{~zYo|ZT zxPS@Xj%u9;Iw2uVD;3k0cC*JdTnc$=6>4w)F@kqVT_45D3KQ?6({c;|e@fc0Cn#7$N3))hrBf*-}9Zndi`5PRX59Bo$rsw(+Z8;p(c3AxUtY zPv3A1)CL>1Wnd`f-1^J8mL^`|SfJ+Xh^ReKGy(*mlGDZ0*c_h06Kl%0J5}V)(|Q6o zbEl2YpR3l(kWN4M(f7(FKv2)?`D&{nt5Uap%_J0@pk@d7zE15Tprx$OR|l5WoX|@@ zqnBjZMF_svdpE+95Yb^l2f*+89(13E3VLtpmA`Ci}e@9S&;M67*4mZ zar=ny>z(KF;5myLe|m!_TLwE|t!a0CXyEtIj}OZ&-Jq^uLP>Yn72D{eqL5-QuIegC z{Jn8>S-J7;su{hQKz-r!8JrToR@bC0M8E{Ytk3z=lN4#c%mW~L;Hq>e($d`+6w1E{ zoyd0&ld4fXsdq6`sc~J=e^LC%f$0x`N%;X2S&@Yb8k->6HNn#*@#NLDnXrZH@(*H5 zNH14xz!~r7XPNAqBkit~Z2>#P=czRMpzn;XvQFmtDh+tf4;YTa7P|YkGaKNDqO8ntxHZ8u@dtcCY%>v>MsjvLM^P5HImE3C zaK&?N$n%G1+a{?6!iXE4jwOPCaZrs0)L!41Lv0*vb~_8=(TC0%gtZYcRne#QM&aVZ zwmxT2+FL#z2~MMol9pvLC%OkW0x!L54_lmS{qUjl*)cNi6o zKIhDK=N~wDv*^a)Sb_JFQuzI4x)!4t!5mk!Tri`kn2Y_Y1i%hlD;Sd6fqNa!L&0Sq z!HV7ek2JEZaESg`8mn{(BaheKR2vjgj{*=xBV^5a%c!gM6PlV_%yK_ph}*u1;Iu#Y zQJJ20FWz$tTeERkW4Fuq!+O)Es}D^$RY~idqK?l`dZNYx%~)-li3MxQe@E0OHbR^b z9UHHEo%e?{&+|S;mg*J>`gBR1=P_SCHP~HBwVpIS>#ftCLg5lWaf+y^#pJPYwL369 zkJ`u1{VZ45vG2MGvF1C2?TyfoxxA_E>5Sh6mwy&XFclnvNG@egIpf#!4T&PW_?^J5 zb|0Obp3H!&IH99CqM#MQ3}{LAxO}A%++{x`&9uWe2hcm_PY>7Df`M4D&j!3D?5hv7 z-YF>QKj_v!_7)cP2w;OmcA}`}DuA~!C#Ycxa>Kd^B^wIUaM4Ww?Z}?J*(^9z{S21b z2-MDT^j)N9cJ4gQ)*R{tn|L*kR{k_``p|gSONCBg3^@kiFYQWX{1tYGJ+KH~7QH8Se#u)D zvP(%yyo-EJ#P5TmN< z_pBN{%gwMobDT{7GW~H_(0OI3O=Tt@EW?`+Q4+XyKT18no8GL2{{)od#wZ2abH7dy z?Fj%F{8>u{a0K|I1~n;U_)0-5_5?%Z3jqdQ+=bm$f}Xtw904qK$$M@#y4sdX}2 zg&{C6z?3ro?>s5~?lGwqk~ zD=7c>ZWnh%EB>8{cKBscSLglO(~#=7HO%4`=&=EGl=zK}Y3YvAz$$FAVQyxuqGP6T z>sa+rhK~$_t|Vo&pFQbBUP5{bQtP%2m5BZccUrE8bE2HtF3VC;Gp-O*%I`Q}Mx64moW0O-Z`D~W$1ngF~R z_1WQNu+v0HcRw|H8^!Z$yH)=*NQ2i6dC*a)OK?`j!I)&Qlk#e72tdy4ZuMy!fBHLd zrrFqo!}D-)6a(CAXNU1;3a8XCMXivx!&Sq?37)nrYi{+#>MOExlgI8d7MIzyn)KW; z#DLp8L}|Vi65ML;beO+S|8S=wf= zSg5e;VbWICIpJ+#r&vT?^>g%X;n<0sJOv9YRv(!>UTJvME5$D)-{FA7ZgINmlHt2p zj)qyB-Vn$*i-t3JD+d{Zct#2}BHDjOL-?{IuTsRbws#`R+|1uOUeLpjz1a z5-@HM(o~8QOfy+xhO*qJ06use7#TaPZGb!cP7YQDMM^$jp>_VLasAUPO|X(r<^{}L zu)d?>V6*X3CmHGz3*M)wz~o#Y@i99pS^y-nbZ;A9H?bN&?ZO{gmG3r^<{jnBE_*Ri zji*HVK?9!)fBv*K`QqOgz|tFqpMa$jjx~|qGn~S%NzhHbZ@?*YVbIEGZJCeSK%ssw zo~@njPN}fNuKPY>MKNsqU#U^WF<#7O^L1Pla&mHht>0)O$4|cZ&qwZ-Eg6VN!}Xw+ zm^JXrxD}JUir#^BcRS}Cvv*2}oHASPd54rYFJDsBfR1Q=VY|g_?UdIez1GO~!}yj; zlMX-CrgJ}=@7(t%6mJ*kfOo)v}UcQGpL`Uj?k zQX8$^yQ1Tt>qBYyHX9_be^N~^SPe8Elz3e`#Wp^k`(np)HXEk8t`hACpUu{P=L>t; z0&;I6%>o&+oI$(WSry7AK6}*%C|ViwdFGsc@ zU+W?rA5@FLQlhaZ&Z~qNYZeKu@J&}SV+7e#=(r5Zx|JOU@&6Xm*PL~voYRvZuUsoN zCW(hk3@Z}7NwB_3j~Ksl2-2Y&W|uwxt=IlPnWI5+`D_|p!hEH84zm*yW^J>NTS zKy5_K?pffz(4Y^AJDhH@@Vg2hZdKl$va_>OH76x!mOdzaqPosNA?JOHk{sxg?CzB2 zOsnpQP3(}(clt2ZSw3<8?|++P>K3mI9G+Jz@<6wrGSLzKWw&^VGMg+*lTzOpqJful zY=I$wlYU*7>r7gvAGT8r1DATVz?T?_eCw_ztg@QlbfOMnRrf+P^u?6tzvs*~HX43! zdUF$umg|(=4?fHP#Am^CXP@|Nu_r|uC#X?j2nm;UhuwY3bBhc^H z=jX|I!pb#&YWq9=eXHj0vAKBoD=2y^x$)9={g^f)eLk@{bOPv<$1xR_LhfM7}y?*(>{m0j7H)o9?Q8MwPqYs^jXBhr~aR zdQUiK_!95k=|_IMHIe`1Zf8u=d}K~JJNhs!XF&1&ESEzV(dsf;I$C?;Q@@RcQ`3(T z(U7m6`bpT|zUc4APq(uzR47R?9PeE32+v_vzm$Fm@xeKMAY$;~`_=douye@~OEYeY zT`9idBc}5b{j*?mf4}KX=@${orN;yFm(Nqqb|}C`^**kC6FK__SQR{1@t9X4eI;s< zK1GCJ??mTo&bi_!2QZYQ(_zu{1Ur$?gKnYbEa1yNAhe2tz4^x{C(*CRCqv2b^d1wP znC(owDRN;Bg_h2SGY>4jyE_*C{Y;QVpx44Ik8%Kz$h&Q?Qy?F1m~8%dUsA3kXi$UH zLF+KE3M%RUF}+!r_)PG+c~dVX)1EC+$3e*S%AJ_5(LDe8T;BHQs@SF4>lp6WDB($x zj2q~uu5GDOOYF+sM?R~s!xk}|6ufg}K9$~}bs#SYkywiEmxu}DHCmJggiM;|a^iZ| zY@nGHI2|Ky)Yt#Sd8>XGjO{0(%4w9AGRkcX>y|(XLaso>BC`r+h+kRu$u4k*5P6~YQ`@e*pCxm>6F3IFV4mc> zpInOQQ_#BP-AT+6abML6z+QL)LD-QyFf+S+p;nyIC}mvtRfR?xsxZuV$t5NWpPT|^ z!N2_mVx<4E*w^Ie@dghYbJ_!^{ym;FH*8XAs`gq_L;&IR8`Ey=NG>AMmmoR`yhAA( z9M{)?ZT24*ZuFYIcYvQ`Urw&=;BReCy;)3ljE(K8r$YBq{l6@8J{lUz zy-M$3VtW6^(;WM@C{gPH%B_=`bSLT&5h~~r!~$}qdp~f=dt@d^*lC>ZB$;Mu6CVK7 zTGEkU0=XeV74l0U?~!pAU$mw7KN%O3*834LZE(%YO4Z!T8p5`NgSP#lnUD%j`Nf^H z!zC|-)w0`TH=fKPQIyE+v%_yazUJGNhvVFi!?)kHEo`Ogp>}t60iUS!Z~~i9yMEp0 z!{N3&_~*a4lySvXXF^GvFahY%*I;n+Qj6M^I(EQC4uNmn(8KW#CLRX4lo`)j$x!*= zL@-2PUw|Rl{3;07n_1NK$b3%0qRpdtoof5;m{`Ys?DFck?JXZU9rI}`CK7}<&CNq5 z7kA#8U*F}Gc1~**Reb2mm+t=W|K<0ROFe!`+#3;*IwV#ZH0H!?G=+kDo`^vuXr+o) zi^70QNpQ3|g^Q%8jnn^Fc7r`780cog{T#fClcCZ@wBT83$ zdh^xcHxE)4d1hMaC2>aoH7Qq2Bu1I?Gx!T~vkhz#ED3tCD%{OCjq<_+yJVf(ajA7W zy-Ro-bjP9FQ>-Hs6QXCo3W9&D>LWH2r4EjKBr$zycjk(eB$yH*KGsFwzfth;v>bRO zlkeu&EcI#K{&}AstU8pKJt1FS;i@=V#yT3j^YDr7{-$x;?&Y7^I;qNt@5Zk;Wgv2X zbRL#m4*#7cR^b%lpGZ7z?E7CxPfSbs4JS7VzMtwoFv+pfa{psydin%=UY!TRi zip8BKpt4+?*i9zgABO%rf|Dp=d8R|}6Yih;VS1anx3^K>%aLuoHo@N0^}u+=z!m8( zJvSM8X@fk#^t~}h=ieuRio6kxTnU~M+l>$J5*h;@+{+1%KpLVD2h0!uJt#3f0JMh% z*Ko>b=}yFwdE|72|IRUfA5`E6)-Z2{mm#&LQkn65Z(>y9H~Fz}eOxRy zdT0(k=?V<3hTpwER;YY4Ks3oLt`t{SSY#7VEYSGqTSfEA L8ya+&fQeOFffR7G zXvtD+bMyxMbF=_;9t(!brj##&dIMrKMTOd#++=N{hEu$)c3{{^?`7dIqpy>Hw4=>T zCtHj#tsRO@N}kR+kn$UM4smo6U~|Z>hztPKm$I@lMW&EbT~%+z$fXe6zLwV4e~!%e zxZ9T>K2V(Kn`Lr>p^@6sUn6?lX3J7$RFV#a{pI_P8XS3R7k$Eh+{bU$-+g~O@IYGy zBKh<=U~Q&A^+(MdIIw8lZKlcX9RRkg>5`4O&Ej0NQ~T$*7S$ zgyKZ&Vqzz0+TK&Gs)rkIBuWx7_`{ZbkitOAf*X^56tN?0#h3iwH_pKyj-Tg(EB@U0 zGL0DbI_7W26HSALnG{!@(+> zrK(BP4b5)xr2-fYnrJzUXO9*U$;Di%@(c1_c2~?~?V|;^UxSb}hhMCEBVSDW!S?=p z2g&YmIqhG`PRA(FlF#W~3(&-$Y|Ip*0_;zGo%@>yM}f@%c`IM$Kl_yly_AFnaRk*$ zwJ+A0U5xMUxhBmPWh*=`nfs-ub&J`s*Tu#@U5Qm_5Yh$%yQVHCM zPP+eL_T{Ib@cqm1ZSy}oOta}y($YGYOVk*mwML~Y5U651mg8J(oo=zDJhSbaqk&j7 zXG!$kcApof`Eo);zs&)zS!zax-3;~_hhiZL$~;r$UHJoT0wx#s*zu^iIr%2MFic$9*Mm*>o zv49_lh@^5?*2aS_#4-$Vu_INQxMj61Rd^7T5hi}+t=U#^S7YGt-rrcLSTt%E%B5}W zIJ5tZqwb9qpWpDcc=DDcZth?twFARC&{=2-t{cbX=ogPv$Kdc-#XC(W;ZxUdSqxO_ z(jtRu`i8VY!Ms@Z(y|hQtQeNSAcSK-B9+i?%cqgEY{g2 z9p~tqNg0fWJ>3WeXzdZezv2GDqjcD21(W*T+}xQTD{h)4rAlCzYVrYMgOAY(p$U+>Om5Q$IN*XGN7F^5MMt*6XC)n? zia8*nM?Y^$R>jyn%!KXyUaXFI9~vs?$D?KR-VLGLACfOsU9Z$l3&vR&ioY$nb7;?l ziT>E#h5skNiL7_^1b^RtTafJ6agc{YmO{P_{1|-y{TSl1SbwWImwlWoAS7D(U`Ac! zXPl5h#!8=8m|rKLn0dF09_Ev*KCfv%)II>VpWuqCN6yNz9r;{NtXo|UUOte&tzLPu z!krhLvP#*_8HQlBN%dQ!ohYZJrL9;S>+)g=x$@`z_ezbB3zzq1S$_E%6F<2}f^%J4 zQrutY$0q1#o7nhxz?i_n0r86JYen!y^tzNozmnA@Zgv2_t zzc3*+YGb2(neZ!8`IJq*LN!FiW$`(-uff7U#FGF069xYf>-7= z*YN=Y7Tu-bNz6q}rLt3iqoQAK3>LEKX)m-*mu+_eED~EF`=nO^-R4iaWZAX_ESW)` zKi|Q}89fEn=J$ld*ZwB(p`-7iDPtu2tNK07cO`AYMojBT&I-w$LWEsfoF3Fq&pv=E zHO^MIXuWlLx7%eS0hp`IgWua8EV3U5$|j+Tl#7*)*`g))fIzOubYEP2;?yAqTU;fu z5EV{b(aLIy3^bDrA0a@x$NCTu_$L2$xGY%RFCgPZ}Qvi{H5bLO^(f#ZAaR+)3# z%_RVm^oMSTUoJA>Ieoh`K4}VOz=d8-qKj7lZCxV*rWPsL{nD8`#VBM@T!zg8p_K`% zbC|HAavyYGwLOnT2H5()$d{<3diaq3{v$ClWcNVNuEyf2WVBDp({ zo$74luRFwJr}2pp{;3&8=WzwP4Ug)TMzsU8JXl#OOqS)Rz7&2}?Mo^0KjZdJqwqE+ zJ&Qj}ux`GS$BYbm3cz#wK(ub&peZ(fd$V)mLPFgIQ(G3R@4L*)&L-58O!W$- zeQR2r{4O`?xyyH0aD{!0FuknjbwA70X<%p$gIn1IptV;MQof)p0j)IpYfd9UG|7=7 zj!Rw1=b~vObcN(0`yd5Ftgkf&>}$DD(M;%%Xk4EFFvFbb*SJ~$fq3g|w0Ilx#}bd; zRlcQrdJ`uRO@HGCF4jW7P?kPwDN#`J)|z*~e4^NYVRii%JBO*xqU2P9W1}ZjXc!L{ zWA3&&st=ljvKq=GTv4VgCn4@QZ4Cz8Q{R*riF#T1aiVI3t?3dmflN`nM$WGf{(kP1 zJwjD0F_b}M`6=V~|5BV`Kh7QhWWH2GiJ~bYoes%B^#n!P3v5pWLw6^fSlrKen&~Bp zy_M6xsRGR)iw8bHa32$AU;IrG$L4G5kgT+Yx_y?58vIORVl?%D;P^+fBO+lQ89tI) z_V#~R0Jc6w)w;30YK(qy^;L7X5mc$S8E}fOzXViI*fsU{Y_=*3Ro)S-4cA6Gkcq!# z#ndRvOj`6Dc|J=rS*~&$tdqam=ktaA+A3mX$2uuC{wWaki1VF}9P^5^kXfj3*tv() z7R}q*rlGp6bbg*hFWEy3b6;6+Tdkk z{PJhqsG1*hhihc=Tu+fbbfLaLOt>Y1_oZ&Xbzjq8O=$7|zY8x^as5}7A9LOua3uQ$ z06mlDk{a?y!yZs7Cj;J0@rQ(pF5VUV;V%2YCM2=uJAkiw*!7ujV{k1>z4K!lI{|)F zuU3>wOnP?QQ-yI&I{Z9s4C#O$h?7V$X*QwQZ9mbzea%7+Y##eL4pQiNu$I100|o|x zo(ygc-fS%f+WnOnz=i`Nd-~DDWAZZ6~}JMCkRhCq_&($6&mTIe~{?0na5aA;Xc1JI#P! zNfGO*5@UtV#%FGx(Pb%d*V-?@s=iw9_eJNmLg$B{#zKNN!wE`Mx({l#8NpX$a20;6}_R7NkY+&e~{x+mjX}{R?1=&%BY%n?qJiurkYP;mX@>XGwj z({JA14v!6jWzy3&4xV6xb%J{8v!RooUFuWfSKdb+Xqal0pb ziQ2@Rj57hLZR-IxjMaNi-`7jS89{M&(f7~LCoe+aMnJx~i%9885@?ld`|rW?Q=(7POj6De};_@~)q*WZmTNA7mFPyFNh z>z}W}yaEPHF5wqWk&|C#edF9Nr8#*~-SV$D57|9PCO?JOwz9 zF>>aiz!E+V#jgcGxVGnM7?2dx5T;-Q-{oVs0JBKFimK}SJKs&&h~b6oUh+i+ZUC`eJZI* znUgTuLLPBbkvLd&2p*>(QBACD;Ze^WL*7;$T_&snC)fNyFVSy$s>=z zQxkhSq!Ir?$b+(K;9d;DC#@P)2J3f@%YMdr5HE;Jm)Mz3?5+d^-V)ln8d&pdd;L|> z5^I8d*e;}XZ1W8hEc80C_cp)LtaYS!S@jXr^CLUKqHtgCYE)vfRf56W_ua4FRe2MM zvw8+ho@T$Y&BGaVUwr!6Wq#Z0Cl|YSA3c4FkdoX%`=_p*MVJp`tD(NA)Jw;**n+k< z`R|(=qf*2(JB|OW&4WQQz(V9ST@Dcusv6wrd?)YE(Kfe6Y<-JP!@~EQ<)L2r`9o_X zrXgG5WC`9^t1;f$ZSIhrwxj04>#ESJc+;$)$LbyHVHA7AG(JlEfxe2} z_vMKhme<$uSS4%AO|GKq;#tT%)?dXsu+s=m+;NONr(ZtIzICzx!4mx6Lmq+IGvMRG zX_$|4hH81Es-^|yuW+|>C;#l}^DYWbu837)F0=f_AX!&G03XJGBXHXNvlQa++)u3f zfe3?MW$5weyr~=Vn;TZIcZ|DzjekM0u+MkaKT(=LO8X6_8XS`27e?bKjlLmsr@>S94r2q`zxQVy$!$hfd_@5*-6{{m1oAP5r2v zkCy6;;{~vhYt7*TzeBS~Glis4YaX;FT1eaoVCf~{>rRp9 zq%3x#1#5}kv$(r%g&sRl@-eb9Nbzf)v8!8ai}~B=~k^W;eGlTn|^1*rvQTiIsRPHF>YFzLfjJT?^$1KN}cG-TIn^CYsS2Z z9D@Nt-R#5|QM*^#XJb>##%ibr`*{|8Ht{#UbR|Ct_U_mT-OYeRuff*gT#!u_b;=8= zQt9&vuhMggJu{f3^sgO`Pt0mv3<606)gOYS4OHZ!ikWzt9D7VgK4D8Y56oMmn=;nE zOc3$6Kino_5KVMk@;-dg*U#g)5#2J;X3R7_KEdce{xG3%-7Bn01XrNhy6wsLRrEHi!gT`Q(Aq>WWCgc>O?qz6Wr~aKFH?#VIjT6x zi9kb9nH|cRM@r!3sToAXuaKuCt*qE=)gX3u^yd9s#Hu93!Du=oW<}nYzAtm;q zAAw#44HYsGvHY0KY5l2 z3nPS0(b&MC&16A^6^)UfT}1kutmGp!lc)9tvx*KgU1shs9gQe+`)VE-HjQe+XrU^b zCMxJ{Sg4C@x?3<<0YA0-WMMz!nVS*J9&w`BWBDvt@#qDM>U1J64xLUcHKVzru)^tY zYDFzS32uw-CzTJo>OsNf>2B}6@u#XtElT@HBuV%cKLGnmJCr=|PrWC*UR{`1U^3C- zo@X1zb~Fk9ah-=weaP(^urCXUSroO%%^Jy`Q&=f=`VJ5!xU{9u(l4$|%g_4P05BH7 z9&x}i+&6u!d>Byav1N|jWm-7feu^GF_1kv|^V!MU%obnrs{g^Whn<6$@Wh*7`uxLn zf>o>2)BNd8D;GwoJdt8Q3x4%SuIA4ad|2L|0ud-rF+jO;>yeC?(@CtW`F{V0peFQ{ zy%ug*Y?3qYP~`IS7tWe49y{4ZCXeW>t?h1q1$QiHJbd>0X>@e-@4IN>=`!I%KFTi` zy*eacC28Pub6M*0ysl;a{kLMzT@qG5E9{zN65vb-3}Yd~g+nF?=jTd>zEAgYKfO{5 zDQvaRc(=n6RAbNACq1#~tWL}|83h1*V6orXSHg)YpYN@=xytX!p^o8lUyZ0DcRqNz3=XL#nF zs0v_VMlf)Pu4ZQR^ndRGNo4*-ljkJVJTnh*5UjEOY`q8`(5yw^XbaO$ctUIlmt#P ziF?j3pYJ%Li;=2n zk0eNeG5)sDT_1=36_NDdl*%IU#m5Zeu-U&5@4!WN4BxBhhxk~(@O)nNTh;SgSUsc0 zq|1EqIXU5H9CUr^M7r>zR=hB)q)NcG;9k6Jgrz0{wgC0RR}qOIWA??gUVi3 z{1~J-P-T;7*NXG8pN2(?#Eo(t>Xeyabq0V zNW#g#Lpuj8mY30QY?Z;&6bv4tXG1WTOnO@xd0M8pPbgE^W8+!zjf)chzgW$LbB|V~ zK{2-AsO2LzVy*v_v6vDM@|ZmO?wlT|N98rvW3u*)=6)b(9-Ao##Q+xkoLR<07p zb-vvD3ipq7kQ5@uHaRtuSJ5B(M8|!#k<9fN!vn;-(m4j;tQEQ+6f)9A-XYt3<-$8C zx6gklM&>T}j(PZ4AfRG+(S%v&sH+6cjP%vdO_xO#2s4gUiBCw#d*fBag5#1uB<&YS z^sAY|o@&w?xWZY*z+_ZTxYE$&?>01bOn&?ky7q;oG%ZXvzfuOvohbC&)+3Qnxc?p+ zwFk)Ay@Z%u{%>?vjGcYUN)Gb{@Hm=3Y`Es=ELD-h5QVhWsC*~d+M8Fc!YS?iZ5=Ask&VGprj%Tt=YxWefv$B#2C{=HgNUOWX) z9=R@6`Dmg)M?2O^RmUI8;}b5Ero&Y9btT^@lxI>)EaJ+UxjYc=C)e{{1K=l3cHgME zXP@$ADW6lNYse3lKFCr1me{edkMpzTF>V_NRoL;Q=*-#ZY+oTJj^ejz}(Nv&Hg zQz=X!c_6tDXCWy~EP);j{AAfL>Xj5`q}l@j*`FG7F#G_^yghA688CQ#)D5gx8hhJy zl7Fb;>|6~na=TLi*cBnxp@7Zc@P6x>+r-zsckUchlXxuH%l~v2(nl`eue!YF9BGRq zy_=MlCxG^XDh8RIUb&Jl`h0JG(z3`2v82gsP?n}(YZ6H?vP=C`IEsM@vE$xYnLbnX zsoiEG`an|&jFfTCSqVU|XrPy0aw>Ih#5$%6=p8bSGi=&#<{j6?en{^j-f4IM(Iq;{ zQzr0z6<_gN;j;6Q4-9PNzy!!wbVR}I_bF&tu*jm_Tm;bGqg{FUfzE3PAK>jRYRy{! zCZ|PcESal4p?}6rvXmhjrxE}pp2wucF=gcEX6I0P|JT&ckX-VXuAR}kreF5co}yva(}{p;&}f4gfTpC)wW;a zxo_Tjblho!{rGmJUG=*7I>9ZcN`qt`RDa~99#_8-t7P&usI?!7B>2V z9bkT(q0AI8FsDRpIK6F7s~0o-mz|%+WKl9VBO~Kc=a)2t7$vy;JDIw^Ux|xe8m9{2 zz;8%LwN_!Obb;_+lri}TYbHi3De-7YqfHd8;6H;X+>grOPSIFJv&$Wm6M1$QQL$ojU7=ek-UgO?J>EqBd?9FCBqSgB5p{8eO%3<_= zPXy-gKO0Gem#Jj8WG{mO$^V5CUwT=j5DXeOh(p^NkVX(!C*1Wa5&0#JZxOZ$o@E&K zNG}8D(~Gpj^qT3vg1ICWzny7ZTSwP+{w08D1~vV~97Pr-S*drub54iN%AYYj2COGq z$vkq}xqK!|q=hZ^3AJ6nd1n23u9v<9G?N#Dm}Qa=-)M&jd?OAK{Qrpi%CIQ6HEaWw z5TvC$q*WNY8M;eaK)M?Q1O(~s2I)@e7HR30mTnMf5ct;U-uvw1+2{Js_v^c^xwvM= zapry3yWX{)=YH@g9B zexAex=8(0LunASJaKW-^H7D-a1`gr{d@UOU0?9#0{yqbFb={4o;%1_WpGU{au zq!!T4`>?1QTa0%vM8#<$MRegMV05vHPfnDVl><#c^*;h1iJO^~AgtDPky38}2qXK)T$X`4fad{$f(R!ysICBDCl*cN|v%-;eEB({6T&QBt z)!@yKusZFt25zGP0$2Dya-3h;>JO4zxf|z)>%9OZ+}(@225PLR>`<8LHId-a7!z-t z*zOFr84G3mD>}?e8=U+XehU6+UCLj)?Z`5o8M%KCfNrn|rznPP@am#IwPUpmzo>nk zFDE*o+aa90!z+Jp2i0wjUJ0#2@N(hF`|u{chS)m=F`(db_~ROQF5x~H1r_hb=-s=z zx08Rah;}sbe!4}sWD@m)0#|kRC;*+=dm){c*2$Q-D2s0Ci}}~rhmOyM`RHU*BqPS* zPj3%iciVgWJANT`2(F3E2}NoPM1)+*2W|?rn`s#z-JUsq-C)#b&y|^YvQ_(*7ss}3 zH<6>aiBXa;Mwi&E-ypcRi#V)MauU>GbL7oM9R_*o zrm~0;j>;L$oofuOpjYLiOSGYAh;jOQB8_#p@hfyyRk1Xj{6A{-hnpM4SoX zjNXNyIJSRR6I#Y2={^UdPu%1gaD&!ZmE>bRGwOO$l{>xfUqQE7u+rTShQ+8KRv2LH zU{+b_!iKi5E_2}N^MHF9t>SYa&$|dzYVV@cdW^ZvT-R?77mDgGDQbA}hPCTtUM7cO z>!FA74>&uIqIWdYki2EV^uXCX6Z2xobHT5g+*ySle(uni*cF=YUN)5WYQh|ARp}w# z`PBT;SvT%C4-N!7>wpK>_*jrg%Xi>6^61Oux<1(t*9}j-uW!EEb$zT;aml&6;)LBj zh~ISC{4jVSu8C@%ZJy~;3kL@bkEhN1`T^UwKW;uq-M!jhbVkcy(3B$mL@G?*=eR@~ zoYpKaAmC{}UD@+GA?d!BmA{NPX*~xsbA{PvPY=xc2cyaV$vW_woGp6$bRtDfx4z+6 zQ|69Q(k+FPrsD|>!|B~-x5!ZMSgiwuI`Fc_4So$K}a0; zH0c%d!-$9Hh0V_o^T;QPT1X#CkyhDd=V*#p7(qwK)Y;H+5tcB5otmf8w);hdPNNm@ zq)A`rZu5%^tK z@%>Xkb{RHm^)Gp-kE-Zr_9AsWk@p8^i$gX>!&pWEC~l1ycTV+MnbvDD@-BIdFNmX# zb#^X+QTf-tC^Da!nX-08T#gJK+E1&(K3D4A5mkitF4v4^y{albT3{y{v2R2BGWDQh z-Dbw3riYgWsWW?)1o{uI-;M0`9cfT;PyYiPw zWtZPhbDWy3vkO5*wm~uJXKX%uRdJuDB7EeMFe^bCw)4Jia~ct zy`JPYUDFZtSTf!4>1&`W+pn_;TwU)m{e9(odjl6<4mYLRe71xX)dc57MQ|-=2|tyR z99uA)2!I?(z`%hk^dB#>2r~?QZW4EemMx^**EiiH`5r4wLf@@v3NpgEpA4CS?!CJG zyeb%ABEB1k`rg%|pLN(dutBMs)P>3}w{PAYc zNPOJ^dFA{OpS?i_XPklTr^G}Sy~yqJW4JnYurkXY*_`w1C7tPZ;YAGT!lMXcs zaeij3^npV>x0Y!Cn7ltJ!2&`gH)V8}-_z>8y1S>lQ9*BDem41cQz9aCp-Z5-!YO4v zJ<(;Wi|@Mp0qtuVy>Iut=0HkCUD|S8fAEB_2rWgd7kTjlRK$+H+;_+YdO-_G2B;nA zm7OtF!-@0wU9H!t9@oyx=_nES`J+^uB#0q!7m@t3pr-IBmztOR+kqehd)A0hZ1Z+f!5Tn5CZ{8pv$Xm;tmIu@cwzI3@upZc|Sp%8ww#ruGf zmO7gzeY)fQ7Y`Af6}!+KnEYT*^t&BdAYmR3{7F6o%k))hzViT>DS-VxmVqj$!9`7! zJ>@8=a(~CkDa|k=zx5Gb2x=(nYt)O|%LkdV#T(N-d-8nOm=6i!9THzeleJhB53B8s zO~ij-1R->N$vpg|;tO{gSZ0NmoQ}82ZL(`>;;;OZvpij=NL8#xURc`r*$vT}(HGur z&S6qKEpSLIEVxLN^uj!JW6aI~HCTw{PP5eh0|mF!PLhIOd3yKJS>SRaBexJDAZ@SKXw~;2e>sR z3@95u{R-r7qJbjEgnd!@b7@6vJ+-{Jw6$_lzHoj%(zh6?{f9)ybe$G3vG9&n=VRs67SYp{bzVB1W9^M|(7w&QwvH50 zhzssvU2^abobXBPtQ8ct$qE^m@SoY3RXE>`QxgzeA1I8WU=SRp3sPf*0%Tk$mbO{Hn-YFous64 z*%=rMaHJQQAW}7p%PdRDXn**#N-|Z3x*MXj>}i)J zhamCdCWk#?9?txWUYv+yHCM;p1KqZJ2A=coK;iId8EEA?bZY?M;R=BHGf;P#p@~w{+DYYCesbXP*=Q= z7{lLR?HH}EpkwrgiGy@?&3!kXM|2B=JLT@cO4^e}W}CYs4Sqk<6~<80VymOV;RmwP z(vXDSuieuBdBP-~%%nrPf=3C-BsH+!j=w)VTP1FheFX}o{i}niJQ^!0*_N38sv<7t zP$Fs~K1;Y|Z0RYjaJH$r&dFfu1VRlh2F2zx&7}evr=*M8Ig_<^GNOUYs_*-8Nud`IrIh=+sua1Cy zysN6jE-xoeX2Mu?EPi5I%odOwr>dc5Vxp51Vj``##YgfqRecZ#S&`X!Yy5nAVuP3> z`gmL^nwM$`M%9OW)LS?K_p2xt<^T*+)rOmamtJrKo@fbq8Wpy;T1d4nzOVN7W09<$ zfp$)_u)=4dceNx%_GaiJj*r(cRNFq(SPY+%1uJBMvKr4vh;qR zD2kNB^3jN~Q@1I1PQbdDz{R9+t*W+bFeDC|;2hs{d9O1urPE!m_uTT3YIF~zcy^^_ zyqm`PS;X8=C|%yu`8JTvloQBr`|EUF(bCdJ8ag<>?UwoH>BUcaA(dx?|ND5ecfnm> zpSJXZnmSoe;@(9`!yf0VaZHT)%ueFW%yWNNKtaaJtBu>C@S0;g$Zuv|K`T4tHio0I&&!NV*X zs!nrlE(&qSzPh2NQuzi)iN>6RX3!sP090MRk15KETUwSVnY6RO%^IaT8-AQ5v@|uf z)!t8pn9UwKQjwAhIIN)_qM7R{KXJtK)=@39troPGR6*X2?SA4ks)LoTtGmVjwsJ$J zEoJsSUk4?0a-yJC^Q$I<&A9TKk7LJjCqAe)w^Qe}-A+b5ksUZSZh3w1iiVy(8fICX zwf+*;Xe0dpHh_L`K;)iI?Sp;*iSl<~=t?Ezo2-6{OY>n{iNtrDf>P4i#kcRrieA8k zdVjuOP`18Q^&m6!h`vag+&sm17fqLNc%$GTx!mJDT6&lngYg(;=Ya%-mdoghQ(RGi z#dYSzql2kEr2MRklO1exC*XSq!*m&O@hC&KX_%Pek}T`Rd4aL!Z6@df(7*Rmkv&;9 zij5!N9)I?cm@Y9~ZzOZZ)!X8Q`yr=1z5M(0P$5v%xC4O#$qdGUzGoV`-faQKYNLkBr{DcT7R<3Ss9d-RRXa4m5kNRRaNaOR!T2E z_oWj`yvtWCwt|jlXzSvWj4olx6#oJ z_A56>P>HQhR0_J_hc{LuXrZ~$(eaO8u!urGUpEra5b|_Zw&7=#Q=89{fL5dr9=6S{ z9ERAxs?=0->?OKD7JY4X*q`|K38a&TwqcBcFe0TeJ;-4+k>6A`;280hIqoo&fP)Mf z8Tq;8Y^|MR;_Nd7d(p3bEp>Cd5kI-XKj+?GUpC#s&N6N!P#U_WOnA-1vXaC; zG=RxY_e+a8?LuyO9Zkd17PT(LWD5 zD#R?C&ke(vkxcVr@*+zqezf!MhC#%PN`P~46cbe~VtLdb)rn>&8#uDdfNr-zj!-zAI|C;vsSwJd z&uG2>h`B>O2VtPKuC5NYusatmKS19=x|18qa?~O3{P6dEh{Us0`c{TjW+qW(DWMBe z+W_mXuI@-$>8M(vh`{oAioiFou*h?3%V+BR^bLV#oeod)IoO* z9T;CA4ZC$>)5nB{!<&c`7vOSg-dGAm!ZaNi#p zo$h0-O#OCK!_p0YK9Y~uc6_x6-1h)yn3w#i`Io14+a(Pv8VeV(Xz2|Y8A*OxU!7&t z8|oN1`{9QLcKcJef1F{CsauUWIBmHnD(1v9aFY4ewi|SvSd?$BMIGX_T7=-;o{Mp& zeWKdSAS@;)T4+)OIwza~;=Y;@#29H-QrB5dJrK#ifS!grSJUYHPtoMR#nK?Y^mK>n zd|&DDQn6wX>-btWO;(|%(N{Ka$r#baYO=NKXoANnjEXRq;`9E?HmAr@kDk4^u%xwJ zl^I4zpYPK-Y;5w7kPs?LCqcnGindMl%u@VztclnJh|<$+0>{8kz`D_)|j1z?j?R z5W2Je4jkQ~KuQ+4d%-}G>@}}D3@xl~1`N@Q8Fm|$2IS}1oCD{v=h)_pG56%zf z@1Vv?_Nu$O`2&~DIR~Mb2q+Pq!IJhGjZir&PTk8JaS;(;n8khL*47pU6O-)A*NgY2 z%JuwUji5OU3u0-d@NlQ&p05f1W9j;*(ZuP7<0Mz@yrvgd%td- z2nb>XMaRVSc>L({z&SVv^wU^~*+h2$2K5`uULOkU68NWN7n%{P?*E!|I^pO#7W)D& z%g$4OY|cY|aA%Ffer3*2{yYayB819LgT!NjUFk%;21TK$cP{2(F<=mrSfx!yuS}U* zbw{w7H3bu8amuGIZ=tPfP&AshumsnNQOP7f0SY$S(oxU!G|5gu!&1Ky{IHPk!-M(c z_u^lZ(R83k974T>QNMu_|MU_L?sjn(!HYW4jy069UIfM4I1@w(E7&pb)J-WH-xR`` zjaAh6un>%5&iG|9vuh?orl)2EwaW;QZpL*(XImINyX zCcH#UEmiI8DiTCQMU!ZLUcjeh^`&Bm?Ee<4{OsBP&v&juKLUBX6_|zBx{vc0kbh^F zm&<1y+X;1YBWxMR)ZVp{bbXy%NfjCsT84w*llSITsmTJ%EajbLA*D^z0PbpWpS2zZ zz6;ul`Q|Ci`F@%xVL`H;SEnpXXRHBoXboUlSA6@3%^@QqlndIiTb+l=<5=Cm06oAd zmPIbaKa|G$q;-(X!NFlT1v(Pa1*TKJ1GAy-GTk=7h##k4$45BAx{WyOCY;#yJ7n-r z6LP}|E?KaWr_Sy21mR_PV3KctEDBZ{zTe=Bh}TJI2^I|}{!kOC7N~k8^|UapLRX@* z|2Waaa1JEZNir9)Q#zlS`^D6wei}uotBtZPL4;CXW9&>fyYCd_{*vFsO&(=9s=5V? zh1CmklF8=G4uD39B&%k%mu|<(YIU~R6qJ-bSEmb1t*uhg<{Wm@Y;ZE}8kcLYxe9hSIkF(3U6ON(%v z_w<@^yv7$3C>I4A+i7$PB_TshUz00EPL9YYb&YHYy;NxE=g7NJ zhq5MSkD)BA-Fb5j2F4}<-fU){5sE40xb*!Ax_jf)ihI|;1@bt)W{xI~D&RXN2s|;UF{ovtfoXQn$?i=5Hs8%5UkLy1?4%f0T7{E&+(|fU zz623%NO?KBmImEH4Ay{QZeQ3!t$buyjCe@kV2p_gy0$z`02b)K8;|?2_ni)e)+0^Q zAj$q-T3(Ub9JA7;LFtm>uEJ<(HIr6nyXh%WU0sAUI(f{t%={K zW)Kk(SAgSK3=7r^kL$ztuBAlb@2wgSM8XcM^Xo{V@6xN#Tz12~ksye5Y-B9ee1J$b z7n0y;3U#npLM4a#RaVtc`mjMH0w)|SC*U^$qtq>M$e3VagkEeMS;%ycRk|m{^}!6F zPc{D0d(#7%)YOYT_$mGM+ok%4mpY(ZL6P(V7*NFk878f8+bI)Ra~MhFsS)30g12uf zcQ!e#AsNI@SvjWR94O0Ju8}rQcdOB#0nX8?&*PWYHEn$$hn5xJ3>I4i?b!`stprLrk$3q&yk>Tks6s4~(8e2HnJ;fwEu9L-eGl4uAunWz?tl*yhi*##7G z>MB^VwRiMH?39@DZbenFi^_&u%JvDXCxpr}KzZid=$r zSsYKi&H6YdZA))s|JdW>w_FElBTOYrenIZWJDEN>^7Lvnh0T7PpSTZG^5ZYyZ+Wy;RZj!a2mV4Jmm2Tc)qQnUW? z7c2k&Hq*jCo|A*(PNj5dN=yvp3-9VULd=y&Q7%r-9WqZ;tO;>A z4tZ*)LCBQ=0Z*N6{q4gc`1XDUhVgu77jwRkyr4ZbC1JBbywPlu5#u~LDoRg|G=_yN zMi-Sp7di0+_c&dbs<<+N>wHbBD#X~>mAQx{BsP!vVNgg2Wb{|l3d@KMIfD}koBH4Riis*&$jpK)aI2|BYUI-gUMWKsc*;NC zmziV+E9KszgU&2h=6~I-v@OO(CdJ$Q=r#oxlIRy68a;a^L(;*XvG|x#(-_V)%hInq zGrCn$i>$@VT#M-{2Ku4`{)`~Ln6GQJCSg*s zzVb-29DXoQx9De`EPQ-7V}vLlDO=qf22kJnnG(LrTEE`ExwZmiW+#LGM2Ib5Sx7V> zW{Nev0R_(ZQ5(e1z!{Kh6_XS?&4fW_==X@&GNVZT5#DA~Z*T8vVQKU88G%X>txaI; zgm~frn&aPpXyMm-3iyt8CjR%?NmpN<5)ou`5Y80cY_h?FvyLy-d&tE7L?|rwbIXt3mUzrwd|U68FrD0DQQ|CpmtpKk5pNNMWq!1D8!b0%G{St7w0(eXdj^Ja zcEWafn>TuoU%pPC1-7c_L`0UnTs|9nrg!w53n$2Yy91qPhqo=_P^Z1w&MJ@piMIx$n4(FpKCGvHwo7MwUpNhUjhAg@dlU6SK4CIeE|isqFS2 zp)F1_oI8+`Lb`(oi)&zJ&}bP=;@%U45pq}oAcf6x`M$e1{_Nh((^4HE9f8l$$>rE1-T`%`T-A`WPs%sQ+Y80}B#?uwoGh|et9k^}-GfQd zD@wWoukFa>Wctg~eYS|Nwei47L-@LOE=KTccf2_{r_t`T$rfi#p4@wvT2(To4W0ro z2O*W+9ash;w9OY6ibv_1GsxnQemGI`3~d~%%@?UUXm~j%x>g~NcuS%xbQ#j#A!#5{ zKoLV6W%9l>T<@Yyz}{ZnCc@rW$x&B^F|kgH@D7!wDs^{v3y>M83Aa?NX{);>a~B+* zXMk}RPuBl^F}{WHQ05LiRG7+eOyMPHm6RprtgUMOXcBbN*BE>>+o|Nb6>_g?BFTO( zr8Rvd`FS>5QX;&u`4H|FV)YJu$g>%tWM7_xb_}X-C*@s4%(tryb>B7~C!lJ{%j+uG zX6D$vdX)ili;l|4p*jbo8Y)%a`j5bdFSidA_g~d3aJ_r>y?zF7NM?<-eSXRj235vY z!EE}nwnV0_;K$kTdz<2K%8K_5l zOe2{G5vD`Z-%Jo?MAvSi;hK zL&#es#h8HDYYq7AQMjJ$JVP6)PW^D|J^*5u65dHi(87lJGA|v_X{%Z%b81d_-PP8f z(BLQJqYX1w_YX4;BCT^^O9QRpDLvTYA-KLDwBm;b_XQ}ijmrI&fVI7<2)E_Tx zulEJ42Hu_)vQ-XvXv~)$dOUKmyWgnYl}m1MERx57 z_h~PocxzRFGAC-Odd~;TUZ;duJ%)p>_r;2d4@MV zw(rkec^Sgb)L6au=3prd^@lU*)+8DBrhZTiw#jq^c{>n!BJsxG<%a{@9mvrKbwwhfG*!Gt*$3xyo>^? ztp^kFOmAebZ_kGi^UbeAQbK&FgRW9Tnj9(~`S)l54_Pxb1PftddZ5w^Qy-`(X>NWQ z7#PUyZ4Cc;-WT2!A1y>o&C1NgBx2$Eq{eHy2e=)h`!Aqo=!#8QWSUX~o&H!tK9F$T zT|_pAebt6#skn~;{7^I0xIwz%(IU>ay3$(_#Qi<+nkUEjT{ZZJqSDdBDl!&cSNv;Z z_7zqI>-nKvuF1}q`OsOR&M*?i? zBSB-s%(!77ssm0&5;HS%f8*gUlgD`w0f#LVb@dI{#*>r>^w?)!%_EM^_YO|1+ii+} zJ>_V=Nnem2G-`8i-FkD!ns}xnwBFSQaNOONueNfv`z>XHP7Cp$UfDm}SK(`VmdjD> zX|r&4(>pIR&m{SZU+O`(U6TRFhKEt3_DSJW{+zqZ`{_R5wf3h=gBGN_W}Hv=@IACo zG&(K~#2*H4eyr7wrTqEfhNn++8nv&^d8bAiz+$cTvV1UraF_O|xiG)(R6@f(al&DM zO}>9|{2uy;Wm$!!!y6V_&c&m+oT@DHuoC9o#1X^Pk!FW(_SRktT1Qc=((t?w4(M8q zngtL__97m~+Od#^`8oRU-4Y1nN{dc&ZZ%4#2pak!1Axg_P(Z8r0o%ZbTAEW$VX|rQ zR?=pa|3+mci<8Z+L2oRse=J5u8mwpY5RL2z_$u}N03_#r`>TuN??8TbdX1{-gzx&` z(IIYrQj-NRK7n!Plk&ERufO^2jkDU~p-oPpsv%3t$W#aYDVzEqfyBQ6vY-Ei^AV0r z8QvFuE<@0|zdcD>-(GsJCDC{VwRo67>nLAay0-$5s0lKN2I)WfvAh2T7+B*V*`BuiVQ2^dn#)O_}-(gSF6_En3qxt)E}1>&1k zrH$Vu9&I={ID7||4D<9FKzW^lBKb(4-~AXs?TJ@Bz$kr<9knjvgaefuk(>V;P}uu# z`^4Ygf2eRrSNY!BXM8{Hyg+OJATJqwk%2h>&vR6GQ^YK=T81I0WmGk@M83Jy3r`>S zQu0;Soo<`ffGD`;+xv=$yTwYbL!B3ai8_dO^7HdS?MyEti>DetjraoUe?Q5}L&Gkm ztMfA`<*$Az*AtPe?hAi)2}&@Ixx<~dl}-E2mv9*ep@dxR6MzrPZSWbh4)h;{gj<|? zZqk)DzOsC%;5?YTiAzL`j!p*K>bWm>DqsS+6}V(o<$b=pT&Ep>Dxv;%VSj(==_#7V z{DsofcO27HeFtq13BJ(5KxPjvDv5DemCb{s#Vv~d;Zv;)d+(bFsGWs3i+l9**9%lQ_OixSU z>Q(5xZ2^%BvP|A&@ME19N=ot5ef|pfb+qqy+~o3IJH}Xct(Wm98ES`~5WR+`vQnL4jJW4Uo5hzARui3XU zY^G%u+7{_ba&DZO9_pu=W~KPlG&=yo0g0vRn}~_ph!N~0#uNckboE>p!+wF3M-v;{ zz2=I{9p&V`Y?6wfF-^uRXD4ib2^mv`V^jJjB_&DAm1yyW=YlRL4LN_u&R*?z#>uIv z?$c}V8mC?6O~xM6ACUj2=^D;|LjoEKh`WIu4UfzkKocSF+mv*CQRj+Yl~&f|p*FoR z6W(WLV%dNXitt6Vt1>)N8f|8#kRnfQt6w90RWad+GKo2l8l9wI6n{K#MTj>%`&>Uc zaONH!ht|%tUvo{tlezc<WlM26sMDSMS6l^T^t`myab_uRphh<1`rD+o`2&QIZ zN?)GkuvRqOHdONFeReFPf@q;3IZgj424nO_Yn`RXQ6SakXXy^Mk(@Ki1cf*+AhmKNNLZfvNAI>8~Q1B%ZG2B^OL%~oqdu@^*%y3gz$*LT581SLhae}7i5R?T@{FGFUigH!wA(_1QsJmiK{C}xd z{u&*iwulLCaXJN zOOmbg2h$~hQJqn@N*H}sotlsoZG=<5ktH~pDJ4B3x6+Bxf95z4+_D^x6V9u@*yj&k zuhI0j(K(8bEe=xTH3=8w%+sI~EDclr2gt)SvD^l*BnE)rq$aGbufPA0`a3P^->`iW zIRrU>lrgT}ZL(k6tmkbg%zpyLd=}SAk$hrlY|Os8^KLqk5>RiH-Fa$7La6Cca<8!A_;3A}$Jb!M zyS@mn?XML>y>%P**l2_UbZCK_%a$9-cMl%r;4&Ky!E{K2skgbmM+6RezSPTee`55s zB9Gq7nkZ;AlGLJ@M{IF6GWZgYPRRLPp5+Rp^HrzGd^KNw*)AO?7o)A{9qDN`GszxI zil3V}q*p;{i$(X@nn}7J{*^#9110}Fc5aZ+mlCdw9Y-(~dN4a@E-Q+p{uo70f z0jZ&be3?#TN!-qcBANhpy;}?XmsSZF|6rSATo~ z3L})c6$ZW5bGP~a>o|h(O-n-K7qMq+NUtUMb0|&WfJqqtcTq4@|MpgQ+d-bkG95vsp>MadW|LF9H9KNxEyV~2=lrB zkLUl#n?r;M{Qi5PPFRz0e+<%GIR99xtdd8(ImGoz!0Pe&6paM4L$l5!Q^mdJ|wJRa!bwpbx80>U9RE zo0T?Icj)Pm!@s@tUmx~HcTh7L4(B?mJb!(NpN$N4gU546d8VghskB3-N6?m6?MFywdwO}o4<>ywetvJG=e#k|lW2Lt^=F0twYEhVnC8G4zT^j85L0#)Se5ruCE zCvzAx`Adga=$hXWxonWSIhC)x1lB=@z+eOgu*k8c6P{-$Ft?fhG5Wuz^B;db_ryrB zgXEuJk~Q#*&lPu=h4@zOXLPV8u!>|4wndsqZq-jrA83%a-_fwK zu^rc{-T^m;=R-IWsL@+5<-e{(O4wU{rb*;fX$}LkZuxVcRGqP*}L=3e`2$s0T-xD|(#06C)Es8k?Y(DhnB(bqLfO zOn^b2l9raVWE^8Q&@2lEHcFGzy5)p_PVm3q$7w+ruP=9NHbK2D9aJa6Yjd$MF^6`h zDmFp*KKMjprobsG>Oq6Xo38G@P%C+&Tf`eY5pV7?Sw?tgU&GhczHk5RhM!aR{!$Fl zx#a*8^|U6dJ13>s<sNfMGFq#tkNm0ur$A3UZf{j9W zz%B0$DWx)$KMyWV5gq;{LG1f;pX`^F0k2+APldS&q&BT$I-Hn}a!9(K+pcjornYP- ze`~hD$cS3#Q&5p95!xRG?vNXSbR}sZPgN8bM}R?WO7ijoAlb;oqI<#bFXQX${0|p( z!xygFd`b{VQBj0-2{9WDj0JBz;yrr{WTpj&jiW8=H*=y3v2UwOcyxSeWmv)ec!ke1 zetSS&J9dQ8t@YDtuWikZPnMF}7ey(Bk-nEr?`9v&lVTVtUtUU&YJD)FzM@Z^D=oh@ z7qs<2Ye4+g{-*75dlH7)gxN%qJetcuG6!n?IuhA?w@Wnd*(am=jZmo&20!&xP=&18gE{me_uTU2DO^KuPKp+v8{Y! zNrG(O3-0DQCk`hwpx~l1O^qJkWz6AUu(N05dlWr5b8*K=#X}6-_sYK;xgw@R;u_|FnDmT8&eJ-d=q% z{P6xu?{S8!0=t#m(n|Zt=a+Sq<>rVJ=;9&SS>eOYq4&|acJ`7gvn?t(vgvxz^D-_- zEAmCdi0Uie3>Os^jyjirMf7Wnj*1$szMBd8r}c$Dj$BStvj}_E7oOqqQ$TfTLdh)s zSo=h@!)WJJg#On11u8Uf#9+ny@@b}rb(Wr;%{v+mX&pt!b6U1~`tH_~IBBj5&cOqI zH?zilGO)&@2`9&@RvM#?PfUz}K1xZMT*b-72e9AP7rKQzJS8WH|L3gp92L>IxxP9& zZMo43V>(19_W1Ga{0;BtMFXoIAE+k2IDIW)7whKZg7_Z6iF%G({S_oXd^N%3O4Hr|yRmArqLUOV99natHYaE_b<%@a&`IAc)G?=+MG z^YA~nrvZ{Xg({^Jg@h1OrO(P5vIIk`I<2+9C7nU_O^kk^FbxGH8UI2k*iFBzDTFyW zd!nMlythDWY;Ab1pL;0PHmFW}#hX#o!qS%DDtKa7J6COfWWf`*sCrhTzD1qGRyRia zb1qnaMxa0%?re41n-%|V`wh`!!G!zMq`)%`6t%kRh&Kt5{s-~8xU1WQ)@Z`BSN16- zHDA7%&gAqyo0k=AH#FN7uqC?Gj9c|1P^8a&<2EQ=6ou=$`dI&%P*QDOzg&&%quMwE zY2t3_tZSSZYjvDOS)?8)8dX{?@Vb2Ud~kM|7QooB^W2THrj`32C(tQ{oT6gp-La;Z zOrT3P;6;h1_^TdScO%odMeXrMHv?dkeJ+>-!|S-^nV0HhCT54xW>FMlD} zG-?9jk^j0dGSr^H$x^N*+`uJFoKHPgmGCpMgtDT?M{*ZR4Gs}`*X9XM36`gZs@^4u zZ}7w5*AOWTgG!K(3B(~kBF#;Dd}F62%O7e_WE}(?+?WvLuhJO3a(yt6QRdeYe-Kx- zg2Qf2()8^Ul9HNQe(Y?Iqh`jU&LIW;N5o}Wup%!f9>jSutv^6=VC~`KO@scqK81aF zJTzIfFZRA)w7rC+ZNLRqiZ;tM$sPM9Tq$AOTs;W zBT+R*F!R^2@Tc8HD61uKI5EG1W2fU(6no*|t({A8*87r-t3Kf_HYO%Z#l6CRoZk7OXe0978MOn9muRnA-<1@OFr!aD6u5N!p7zq*wusQo!3*&C?U~U%+FddCs}dN z=JoQbpFLQrN*u-YcdPN{Sdv|UuF0j81gY7Lt#KteZaydVnyA2LF3I+-?u zMb?1S0C%% zrKr-MF)HEPSqfIu=ud?r6#EyljXz2fdLC1zyuWzyY5)6)X&^gNH`m4osxIfFVNpoK ziyXiQlyVN+t0ZxpP%x@1DhkeWXOSmnput|m8^T^=+=<#22x4c5=r*3n?j-*3-n(i~ zZuzq>hvb!RN)WrNd}S+ENtGkbgGG9?!BjTo_7Y-l4K_mFH>$<8(o`5&*w~RXyE)5U z%g4ovCmnZSt40v_xh*3_gCbbOyG%Q#4F*uY!X(jZ&vO2+(@U|&`hkX$qob6 zYz1^%Mivwl98va)_@`}79Xv-0u*&;r5(@WP5j&y(2UkqPRq0CJY~P)%cRzd-s|?GF z_efUvUgry#hqeUKD9Jh}dB_fA;=lF@@M`L^+blRZNNlI)l=PR|ow~y7VBfBBA6iu} zRE^CS(1~IFD-@ZN@SX=Wa>N1K5RtLT$x#qDI@k=Q33Qs4AZQgGhUz7uZ%ZxS0wL&R z1GeMo__3|_uBPbpPFPTjJocrqaqNzf#_?WzGq%;e8Rva!w1T#}yH92u1MYs4lG&8_ z09TOb-fccG78=K9SEs3@@_4W^6iSAP;zvWhjv2!%>2B8tf)#tNJKg?!jy0H%|4c-F z1Y1T|G=Hw#{14Dz z!EM$3EdaiXK?A+K?j-WKvTA{1wtmgbagpYO1#;KbSi`}meaGUINgwK--rMe0zP>$G z=vZ(@b|+5X%;kZm6q=L#D>i&2c8+9(L~+-`D069dVg!9N+WS8?yx_7<%HWeCbsHSE zC!*=PXHKZGLfnLnqWqI=NIA2`JMV$zPpyus+&@X7^b7oHYK>BIY~MQnBBs{v{7rS> z>@{x`q&InHq*yLzq7!%QF1Av6tfq{s4tmZhZP$yDx_OeF>&wM)fOkbJ86Vk?`(8Ujnzp#CT) zctW0D&uDd~3#&0qMC`ZYOi`SxBzAzzUmt*YLEcB!%*D4SO@9<@EMia~Fa5_& zDf6*-Jg$GVeID}aS;6352MbiXPe0{(Rhobl~@x}eTQkx7&7_$i!Jh-LZzF!mNuRjykb zs3IzgG=iiwf`oL3bhmT}(yT>yinMfhmq^#5!Jv`uRze!28}7S|?LOz8|BgF`V~^$5 zf$)8A%=yeG^j8NmWl^i4)9hy#`~8yK6NcxYzw3lp(mnyLX#la;g$>ZNXMrTThAx}3 zpFodbh6UEUqM{sJJ@?2ZtMD&9xcGQ=E+CVDz3v&0mK+zFmkiE4(?b!m5(JNd_`&+( zwly4`17w{cNkMzbH{Kec7>y<)Ne|tF(34KKc9A-?!aKXI0|aw4++|cl{WM!Njcd~WkY7$=z*nY0YSu_=WY!o`x*)n6$ndW6I_IxKn*#}_r50%%qS zrkr7|uY;>n4Y`9TGipu{`tH9Q#yGWVKBekS>g{+Do2>1At*o-%(aB3p{;1j<;5YGUOpi0M z^5EngU*K*apEdvVtz;M4eq!%tlYGKD$CDY6BfIW9x~4Q*%2QHX!`n&4iG2t?IsvcM z=cVb5@T|SSVQCQuJ)1n2!f)$2fYQ?k@LisA&3*~HHT^`L&)=)xKQgyDg^)KwC2c-* zQ{5#mK4kg^8cr{3S*vrnsm~3laj;1@{r8nKta?dbm5#xF)7dMBvpUjCwpn_yP<+vr zi)F^Hm3=dus#+llon;wKKW(>}DR*sha-8Vl&+}f~r$(>KD7T;KLynL60+uMpW?PTt zncuUBs&1X|gA#RN$LBJ+u^+T7Yk)32;K^RT#LmCL(u6w(hl&K5y&bW#(p|pe5uqer zd{6ui@-*bz+IH)ahPbGRWV+ZGR7-V>8@hao2Qet0wqC6OGPx{Qj=E@$lZ>Qs%1A$_ z?Glm>aFM6kD*sjH4)7s$nrh-XOx0NFac>k=+CLixel=p(m-E-MdEdsV#kzPjNV>Rc z-)3Ea{@9VK??dz^Pk%{^GH9Ez`v@lJB972?$h3C?RySNx(RGrV9>UxE2+Er-uq)sL zDn%|HmXpduIn2qX9WbXh-Rug3J+Kz{_iZUj*@HEIAm0>Nl0(~`fVRJ%wqfPBZSnj8 zDM8_&YG!nbTC#H0TB+CRCu)`Nw-y%fREe=j@FbPu-Vf)U+T z!qcAE-G%4g1r2fW+C6PAl3R8Wy7RH%(mGHyfxR^`rFim=jOkPtk_1)N13Gw-h~=@G zzmATT;$H$YZL<;CAzKbFkX+RHiQvn&f!YiFoFGpXK1vc5f3(x zannRU@?+V%1)hU^Xwl8u;yLn*#+D!i{a_cAY ztoQHVdY%oZ4*_8utKBIWQjHWK4MrVa00%`yV4t!EbfaV#@z;CtEfIz)*C!nUV}}3^ z%5YQ#NhbC%JkmJ9092x^0_A-3qv~>E<(~zw_qMQLQNhwq-)&s&j7i%)4$-1;LlHNF z7+#Z$pjFI8o5t$wQls-p{`((XocX*+&v32IUz=;8XH5&+m!}3^Lu0UfR);*UycVPc zGqRC*0d&Rv?BK4ziMO#lMS6g`vQ#ez#@kzD0E`%se*nOUo*t#@rU%PtOzgQbZBoi) zo0e6((S3$AZ_yXM#6;@sx_MTWUMc1)Ez0QJAhwCC zJS!{9x?D&voOWrEDHZ4B^kobZc5lqd5`p=hu(>dhbDUS*tG!urN*89c=Vo>wiMW5K z(4*>k2R7fCFby;Zl11(m zt#U6!OlziPLFa~cxQ2y`1fWsghwqG*#=B#mz6iw8uLIm4aYdBJ{MyI3TH8RXk$SW} zn~a_XjPoTxSCi4$$jeOUUB&dX(FB|0DZ>2u+_6%L_#K9>c{DsOh~* zFli$t6)J*%r>{Dun8xz0i4e@V$G*I}65|9IQRnAA&1Jl-njo`#9B9%{0O@V=`NSo< zsqjqEOJIxcnHIZ1zt73FFpFbboYtp~7Wro2Bk&A>xF45Y7dKWv8FDyIc zWL84xH72(kJbx@FiNHXIeCbIR_HM{zf8tfR+3ZH^bEaEAVTXa`d!TTbC8X(6pR2Zr zw~Py8A{Pmas|9>sEV3#(X(GSLN|VCdLF)I3c^qY~Z}h-sX2j0#VU$tk=vIk9q4a!* zBg)*2WL1HKNWXS0OTcSGQRzq7OTE0BYVq{!sZ^e2NSHQ9Bu~R{;0%gZ95ai$1v2Pw z(?z-pq3bZu-YmLx6TLI;b?seA_A1`TjOt}1id2l4h?CriiP3yYl zj2d~_%D-N|hS&Gfad*Y^cGt_tj9MNw*h;9o0>rM`2>j7_ZQG!@ ziOA>g(D)9}4_602n}+>uU#Lk)7Aj*TrBH1sSfk{k#tHWK2uW`B2|;cWjt+^m$8F|u z-JKhSm>0ZAZuDv$4Y)gjvb5ucO7s$9gsd+<>WDJ|t9)`qU+fVK>D_#k{bGfLDfbcm8UqK ztErN!x@Zo1S9D9_)SAgrSqhcLMT|~_pgJSErmNAf=PHmzr>w)e`kU8Qb$kIx+HEOR zQBfIablGPOlF{Bq|JB9K1_%Zq4>Au)3Yj-ZidFfBq{QGmVgg5)QM0{p!$RmO8Z7jF>7Q+e<_1LLC!+wbS-q2nJpBY}u3{ihGg%Q(^{aqA03mQzY%= zZj*OZh0Mh`aJNY75s}-4?9I0CyyOFQ!5CtWZzwOzJm8RPexe!<6AKkF;Mt=3IN6Jt zC-JU}>Xm2+OTvb~m(L!@wRbA@`oY7+ekCtbt&Qpd3@Vsc)a_j8dH*xij!CH%n|#}t zBBiX?$E5^k4ghUF_?~Pv1i+$#FmdM2s>a3?U{@;!Tu%?zy6#J+V8EfBU%xcTpFyWs zt5M>fTLW#(0kAYQRjooY>gPMw&I)I+A-vjN2E7f#S+tZ71+0d7k+Z@ZKu)|7ec2As z=;Q#`FcKHaH|bBca|=7$gpx<{iuE&Pv+5wd)7y<$lS%iS6V@Ie=9Kc_hprb$taTlr zb09Fm;wv~5r9IES7sWwGZEU8;tz5Ht2!jWi%SOOl+L*b=n=I_)wL+x;|JQlW+6Tn3 zfr(q~@7rm-;(u(qC~qBa-0dGb1Ud$)@&)#Ca%Fw}Ow^RvD}tXl>}$bevWU$5f=h-U z07?cx%kJGiz$4VYUhQn4M(*}FXCF&s>64*Tg|a7))kaS}0@Sr}<2YJ7ii3!rp@)yW{LhBW~wmMc#=6Ljv9gl|@l}@#E zUv5l9nvc@6g|_Odl8>gc`>BGB+UhVwQlIm(1E4R01or?Z(KjdCyfcFIpx4S)RhR|C zqO3J9uJt<(q7g<2pV9_Bj&U$QPGmOe!-U+c)njlqrTh$|smzzox@-Ph2eN4oiL$YX&tc(x{R>$Z!2KsSr=cc42J`uboLk8+=) zd05uLSjG)LMpv^y~tGbJyDL=C-5ysdS}(9QKD?7nU_c_~%-oF$X< z>y*+H_oxv0O80rJk95!*y0)?0(CE~%G_&~M`xrvG8Ckt!orZm+Vlvzi$1ZK7R$?DGM zceGmKj#@K z6yDc7Tn-suB#G}dq>>>J(TV^lLi^ge`*A`d%VMgGtxlJaz1<;$VN>9JgF5xBA zLC7V`+y2f`!>*j}+clX636?U&)Liy2VW0E$NxZdAtrM|cDV?yCHc`oVNqBm0HztbwZSEM2 z+h)O6)7EOHHd5qe3s7pVR`f6^duwXPCof+G_?ulcOjtCACob{mP~<-=7PSjTK}ZW~5GK$r!=#TaP89S`?32z= zE7g^i>s#f)MZQR&Z=Td>PTMM9?8_9>U@jkxo3AX(_w`$0;vE>4uH(@gBYN+m6Qi>z z)FftBK%{ocUEv!3;Wt|zGIePQSha7=Gb|nt0yMGS*?N(1lW&`*JX%}9!-{7l zDwFeFWYsPG6tK3(B^?ouv&F;5>i0n0ZWd4zt{%sMI2TITt2;lBvA`B z+6CCTs8vYqViG2a7`;Jl29u=v>QP@nb1SZ6Ioln_3sEN17&6$}1<`vl8y30_qc6;N z;l}@O;MTZA`fT>A3PiK&+S-stqTeULY~btKR}wlZrv_Bj-9xwEJ_`RHd-f7L)EY&} z%6;jXms$7x33eN2GUIw@zf5+j2yPb+cINFY>F?98tHC-b#(Kv5V98EIw~MrY5s*#E zCXw6iaQ5r!CD(cL19Gp*O5SoNJGyt>d1{4gvGa27t1lA$24yhWgdMizmbmm~q!?+E zfSsr>Y&339i{+RU7o~Uf*#Im!CAIOL$YwYwFA>pjRd_ z_Jh#in@Q|UYUvN8x+n3sPpQBtkD!+zKgu2R-8YYgqmS)(sN-)%&b6YW@yp?6MHZVW ze5xwMC{tfBvR@buOlxl1L$0vBm$HOTs!sOdmXK&*SoyT#t74OqT!v}&CvihY3^)~A zFL(|qH?lW5zZ%773w|@y7`lv> zl&wa$_iauFeEoqvgTFx?g?tq}Lt4f^JC|%gU#QA3Kj{s^$4U0U~58P}gQJpw3A`5i*b&-(_@FTV!{s1l^ zS(W|#TIaK^Z9jg4wNajRYs1R3@*z_L-uyG+9bqboLKV!Sym542&Per!sp@zWWTjpQ z2FIpu2=^2+$ASUbG4g8I>QICH%lw7`74YJjkl!4I$nLz>2yOU5Siz=j{W3Ec z<0Sn01a0d$&Sa@!PB+iQ%izKb=K1jALXYhfc9Id2v$6}O_!7EG_~Lgug10=r(2nLS z3-`o^E_q+lu_pUCK&DeWT6$1uCc9)$kNbJ7aZC&3XN9*!Y+AP{9WOZ5hL0R@|sJh8(0!CJHsvxYMcDL3DzZ)KXcJ!g%)SLE@ehODvb z7;jM`)NLPwDT=6RLB0(LfheFq_Ce#909ujp2Wwr?1mgBaoHOy7l+}e8lNdJ&H#r?( zsG{O0T4uZ<;zWI!NlSKd9m+VG*#&+y5u%=d zBop)kV!M%|4n(%T&Z_IkyO;F(_&UwdM z9~pw@wYk21wP$E9vm0mhgEK0?=V z;cyDJT#g1ptzK`|S_}dA=SzGp+aN^98x(y?xdID!l(en0A<(urA&Elc>xk3l!uL@Kt z+s|KujSinavfaBl8*fn^N5NBkno1%j9;hAaCf^hm6(3nkmf)-7aU!kESZ!HZepu+Y zHUEZBOrdOiqP5K_X4PP#P0}Qw71SHWUf1w6*@rsJmnb9tCpY0=xEMWRMSn4I!W94O zSxBC`Z%$r{;bY0b5847>pV}lh`}@ms`_^CZ__GmQ#@^JhuzO5F?y-oZICM%A@ti7^w~4B8raGPU@9w;ah7!@#ckaYD zQuLgMdtGFYsYj!xf*@5r`kfHFoo3|=PlsN&X}0a@#EWC;w2oYnuwJUkfzV@V{_7=5 zIdtYnV$7sN$v}NOf?{yY>VRsN_0vi$Nq+OXT7{Rhx+3;UU# zq5<(CemyXLv5%gJ!WTLM!>Q2kL2F>t@mwihtm1tOupFlzBhYvS267Tb(O=Ai-n@C! z%Khb6843<&Fl#Wb#AR9&4`nZ_&R#L=C1)Aue`|}QOZ8C{bwn{sdQWu5c`@`Lb;o_> z!9$%W$(O_cwNgPZn>JFH+sDAdsLM4iQBL5o)7tt0U8|LxCfjbtmeiC>kG0_37g@Ex z|992i@KzQUB&0Hm$ag}9#eaLQ^CATVBw)5)gXhs-6X9M=$ZZ{ zllx&u*B6n{oBa zzwkNm7`($M2TJe;Q3HIyEifO+i!MR5I`(PHu~30xmVkk`h&$9aWhdy2eXhr5_6i#{Z(6T4m+aR5-E~jDOnTG)})uIfAv!f*u z`?yvJ>2I>>e?ruGnjQ=q?{UKnVD(QiIJ8hqGX8N?bLhR}2=Ch^Y${ka`z!r#n!byH zeBp8-=foz$Jg55%Vre?1WsjPYYoPjyr-a=wI1|=S$4^@*IFdA$f)wtzFR@tI? zTuOAgLj&ES`@k9Qo`nz+bUXH4?`QU!#V%@uU{WcQvdlx&Mv7(5vP{{J-nYTapt}dx z{n5~)SXiFQL}CVt(Ba#0a&D;8io2cvBuySwKc1$;dYJseH}3ZK#vyKMwoujO3Hxju zl*evD97D~U&@cRzvCYhL&lWx08o;=PH)W|_SEu&CJ!6&o;vb&Vsyf1!dGBm_{9C8H ze{RBm{t$fNjJ^kcP78w`b#l7H02-(B^e)xPWU@Y8$Thb=-IbK>W9NX4Y)?~G3kO=9 z3RV6%t_5nl4BFhL+--E1qsA_>L!0|Yk1`p{-#L_D78gRGSx_nhP4Tw&R>z3}DutJz zx&11Q$qBr6vWgafIuld8GL181|Kl5*Acx65b*^MhvJ|bT3h4xB6Xh3g{8Zy<2J;@t zP2s_3LC@$V#rf0a>Ff6Q_t^+E(cV7({cdg`5XoiCvSjgGytVmXJh~eQCKTRr5K}1I zIn4wQLxm|laO_wI)RUXAkk}vNvC4B}xGI51xT(GCLAj#-#$FG>I9udfN^{DtJ2;FJ zMn&fqkU5`5gd3G_IG&fwE=@Lsz}fN~U!T@9M57dMwO+OEz7$Rt^u~jaxk*THW&mwA z7fc@qKze0W&Nl#HN&*meIeXUxW!hBB`9p|rA%qQWU{d(|OL{Fx!;=`E*Nqokc#BE> zy?!)=r8kMH>y(p!tmuA$m~`ArZx)Ab8Ge=P%dCcMT{_Ki2vXr>GuAC(y6$yRS63~< zdp$na0|^gCy5Ed`&@DW=i8M&$Pi*zze?5?aG(^pcOt$hLbfj55A3d_L2 z{KqB-C?r}?nif=rCtiW&G>fbOmHD~bc9$8_8Xt6ra%4v)S#%d!&7>!=7^RIM?P7X`GWE-4z<7K(w_TEOT`$Og_ZJgptJ5yYacTqK#IN&J&2~b^lo>|NfBb zFIa(6tdC^@Mopa`F80^d5kZ zx)ZwWmS_(Nb~D6)a`w$_p##HbG#nFK)W#mXQPjOS?S#zdi?qs}g_GlX z5)qZiXi=$^$k`C54FYS$EJZ4Ush1zs!RtoShwtSc^e42mWG<2^z|fu5`l}$^gn1Px z*O3PUlR1aE(z+mT#qdQ<%^)ya=y~Q6w|1A0om^*92u3A$(1i6T!Xssh$xq4QCT~v` zI}B~OrXAO;761VFa7dJQzSbsNGX$OiYY!qH3Iv;g>)*pkVD^yM{1ndU|8@IJc>{%m zL(r7}n* z{Hh#wxT%v=LD>nbArm8r?DAKoYta|BK>q|NOkY(n6L?)Jh)ZBFRSS;jt|EjAzYU?} z9CrJt1iFPPk)q7LCWf5Lz^Q%C!MYUsMl?l=e0hVyHa8L5Oq$Kj+>HuoJm-ymPFnV; zRH7#(*K^0Fp!htfX}Of;KqKN-$p_A@g9V0e!hx#TzuF_Pu(sq#KG(c=6u8?+`e#l5 zsT_VS@hW8a3r(feBlT)A3w%M)gm!Q6y)qQNat`Q0&ohM#TMnldS z^7X_g=8ibIz@@~d2CcqZ_kG=~odvPZ-@G0LdWGBn(VyLq0tY1Br;|V3bhMc$X`dA{yM^%{%H~G% zB^Mo{v>R;{o?axsnT&!S;J!;ps=W) zdUuDtbnuRkOv0QH|JRkZN`lZkIUq$1}f3?90ZV zbYCwVrzq;QA@q-h7{De5-m=xI_ovaYS22~46WsUfw2arREJoa??$VYo+?nmxh0fC@ zX6zBC&?hmSq$KkMynF_SSMq_dSlQCjvVFE36H$)CJ1Get&V7F;pU%(vs{H|cIHS|x zI0!sZ4vSkrXz+mlvJEMR0_O|qyPYdZ8WmY}a#eu0>s2yZS<4%j#d{+5=|wZ(ZwurK z?%Y!mShA>Rn0WQbL}ev1lqJiq<~s4y=Gn`Fr_?;-zzJ8KKHUuy{&35mz`>D92jP}9 z^dWE?6>sReu2Dwyhr;2b5x}>hrK1};{Aa0=3|zr!@B(&?|I&`xyC6Wf(fbUoTxn?x zhu1<-zbQ)+cKlRYSqgtB%Z$~`Ez+hiPte>zWvdedd|n;u1?5W%-iDQf! zd;v@HpbcqL55x)MFBzmyOq}#@TCn2_tHQ;gZ$;3w?c}qGEgr3(`!0xLwQ$Wy_W^K~ zvz5&(3g?8ma(8TdXUuUX@Bbt#ulxYosHl<54EZ8<2h-Id%p~^ysspQ{j^YHS}E@)(U7xsL|I5r0GMS=I+FCJ}fIX z;LP3|22U@Quoy4N)opSe1kSwj#&#^X#4z0qeu4ksaQkYH@fzKuzH_1W@1-?>a<6KH zG$ke1@cwA!i#e%nUje}=J}ROYq_E|E)LKqUmZaOsCroqS>a7~O#&>vOH&fCo;_bN` zP@DQyy?E+|C#9L9V|h}85BMkwzaZHo5m5Z>-Mm?n1ufXCXc_GM3k{FD8r6-aPpl$H z#WWiq8uFCu`K?=T-|$Pc6xQm`%8qaAKUgi_9Z#avVcB-$4Gm_%!KtUTImFpR-lktHpC3o&lKi&>FExThUevy4XJU`rmBhP~N~ z^O@KBW48~giv9O%4l)%t$CkVmMS)e@ZW=fF_(JNr6wVlR(qJS$5G5NpaE~@&4GDqj zs!^-#rFS!6H%e?4QQA#x$zeuD%gJ2z-<=ejCgjbuV7b)oGKp?krNfC=o<+_nv8N9n z+&icB;XJTo^hp+kZ3(5k(BWC2y>cf`v-jWCz!;=V)?#j^oF@1QYQf*tKsne-PIO+4 zJbzDZdpv|<2c&uXVOdxDC={S@IHnUESMKa(&rXpAk67NKTe3rWYt_r@Fe%qR#TW}O z^<(7AXb&KxDa=hbqcyB3h~?$wr3nRHeDcJA?bhn|b@7e~e;(s-6Xb542+cy< zFA7&L-~O0qF2JI9oFTLK*YUtasGTckCD8ngTF)~*03_VQC>%3_ap`PgysdfT~E7O zU3gGg#Mx0zLosq?2hmSYt|i{BAocde$v}~VmoPz9lMf=YMK;{#ZF}g~th|lD_zQt- z!kBWZ`(^6b9mkxY0UF%4!~sRMb&}+mt#+ZOTL2|#{eIHx{~)4w-8G31f&F*kcLOPf z7HC}MA4(G5{lh-KfmqMwJ;v>eLaMp`>Mat{Dx3d$opG}Lh?Kv=ABFTpYZfzJDp1Pp z0qT@~A7V7!-8GRu>>(RQ4cvP@vVCRfH?>(N%#ZByhB$kbgaWLoT-AT-?M#Th#~`NH z4VgIaBkp0#QYzC40UEF7j>?#*x_iOn9}n*zBvJ(956ZAtwYM)77+&QXU3b|mcvT+R z9Bu1Di5A&!_Hm9Bnd2CQaAHACgM)*U)vd!H_iJ<#M%9`5){)wAr3{a&=PH-4g!Nk& ze^(XS7~T8u`sI~E$v2%DkwA99t5>b|S@Lwds~hzwFv3%vMPNtb5nF5an6q&KZ?FW2 z9tkwJuvi0$VO06U1sgwZ{e3Rry02&PZnt8aJ*BmQ{&U#qM18nU)gGnnZ)ixfhY@`m zN_F0cddrDU8apVOYyQB>ei@4Z3q=9Qf7UZSIKDP&tC z9BhV4*}|L;G2-C0+E*E>f%hm-VR{vx$n@*JPU+1WQIRmpabBqDND4}^&4X8(Y=?Jw zEtqi}Xz@I%v#)&~Zj)%u?zu_a$>3w$tzoHSf|xF{!bzqq*HO!dE9JN*`Qry9EuByQrNdcXMRVf@_I#YjNHty+VH`-?RNwjLNRFlCJ4;KwuKKtTi=byt+tL}r>P%mMj{08< z;(HfCOEG^PyT-S%ce%q*lxO2*}StjN4yg+Q}h8FzQ=7rD%IxjU8um7ZhXo#k?o zWBOs{gO|~C%PE&nJ1h5%?%Qptmv;ino#g}}48z!_#ScCzVaY&?E}eHNIINF8Vk)Yp zpUaME*y=ajB{oLhMiQMcn9y^^xqU7(FqYZqjlFzM<2a+bg8|c>&oPn;EFMjF-;=~= zF@B)qMnB&5chnBwTOaU*ZU`OV*=E}${(Usov`FAmf)EOr_fs#R>S*;6zg;?DQl-ZH z7y?;tN1+xlS)LmHE?8_#C@Ef{{ziJM9mzyoGDG-eY0}^iqi)p{fs8*m)WKFJmHS28 z>XO6Ft7)rUB4q*xpoFidN#Lj*^@ai&v^E`n@G=Z(5Z)yZ$TEMWUs&zZ7$dLU4Ae_^ z$VrhPGLgP}E<;hl`Oa~P*YUMRhkXylHm~ zO>YzZA1%f2t}&h(2!(s*g|z=wYgaw?hH(4dYN9UV`|h|kQGG&NotI~bIavoY;8iM? zZ22;|LU@9!;USI{5J8voOiP~%f#}mc?1Y=Y>Q(O&v6oX-W(Nnu^2$KDJ~X7{x*V&< z`cyMbT;^7DZ*@XlAF~OS+fid?Ki)z!=F8_3HV4Wj4NU=b$?0n6c0kXFT~cu@(d{hp zowYh*k^we@+3Y#oskRr}nz`B0OwvD>OZ&PIy;qAY%~OTLz%nbe5>h;t(!ABj^M8xu z+9{`gQw{#KQuSB>G?-npT+sa0TK(L#tXQT6!Y2R+ziSD>@opeXJjFt3&{GBPrBvdRSlo~!9=U}W);__MmgRo<92>Oq; zUyqR&uDf-ZkQbQ#q&>A~W-u9)kkmghy`M{no5vf}(x&vQ3k&U01i(_e%4SiF?=Y}Z zFrUe;(W0ll?+J$db*qsipGnX=z8V{yLrZ2+1Z=uT(gx!?Y^T@30*n-z=qBo3LM5Lc zN5#|E-fHSl{_;J~`m+tC^c2U}tXfLWoyB)2GRF@Z&Z&q<`ID9Fqu|UcqvN;)dU3US zZHD(ir+z9yA@t2U!Pa(RI*IVc)4yvOe3Kaja78qVB^d+Y4gXp9Q$B(gM4x=_pqL-A zZg#LoU$*M@Jz1Ch*4XZt_mY0n{K!^kB?@LF)sbabI3<~xMy?KFr=A;EZIWIgk8((* z6WZ4V&*2#y5Bc>R_`E?>#;}%ElPHigu!j=!B?)DsH-UTtb2C0r{+|UF*HQxh&@M|m ze^ili;W3im>}A(N;a3^6KGW|JA;$NVJAYD*PR)t2sY__dnDYY^g(%E>B_ZMU_n)-( zGs3XWX`D7NaVk0@{3^4zVv}jw`$?M@kd;u$ceV^8r{+yCKBpUH53B_&mWx;0N2)WF8&hEtzr z?bre-PO{SmTsg0VXt2iWNPMZn6SNWFE_=*zPU|vzC$;8Bqpu-X;?Z4)IW1e##4OV8 z*-Ly;&K^l_=j@qC$nsPed!I_wk6=C%r3_)tdv}a~V>ubf=n+ZXWMiISep%&t>B{%2 za8wG+W9r5*_Q`wlz++em%Mc5K}q)NS*MZ?f{sYDR84X z0MW8PoVl=cfEWIml%>yqT3M{h!3+BQh9HaI=Q(_SGAxl%jjG?kr~6oOle+jYpi5Qj zy$ROh{SY!Kx5;kih3$A-f5l|+_}t0~B#d;?6gku`>nODlwX<_M5LsN3P$C3 zx)$Qnd3Lrg=ui__`NLx9AFs&?az3Vp(guIezU*wLRrx9riCtM(y69eQ zXz`l9GkTiS*P_}~rzxg8V4%s^_yggEs_J{n8g+;N9PMQAV1dn&x??$1EzA3;rbtTD z59ql5Krp6Q-Vh#N;ZxxP^L4aN!ic0nrtNpVRTIMBk}lo1~7mPMMQn&KyQu zilMqF!f-Hho_mIG(`#P7ZAo+geiZXNE%U&?#p=(4tPO6lv%rC#|L2Z@A7rv5g{b;{ z>~FcIQ3*9it#+ZmRDN$j6<;J-e|5qRf$isf$=Tfc<=)SSXd ztrMK=_}hfIPgjOy5PhD5D;*0MQ?L&s{BPO&e|){KOGqT-XG^vEj9~GwB*23S*8pcq z5dmOdYHxhOFvpf~@`HmSS~(U}Wh4F$7<_uQ>?c*{usOnr=H5O5Gz1LF))=VzKX2>*{NT+9H`37aCq4pG zT+T%#z?vBbw)B!OUc5MqGP(x@wl0n>ePs4r;EbY)8jc4vJJyu}P21R}aa!u&mMA)H z=~3)pg^-On%9cQSqflT^L$kSP=K^kFf>E^klrJbf*rAUCGC^F{yT85o-;MNVIsMl+ z$oT>he>7Ctp_MKS3kh)*d`FoT70mD7y(0_$_gKU%m5xNF0;Mf4~bg4-dXjJQjbL zY~Y6>=SqXb^8AMB>vZDYSD~YKU0j1 zbhmA;heAXB>b`40sz_R{83)|`CB)h@A?i)8)s`*alPpYu?RO9iM4_ez6eEU-vu&%Hm$P~OtaYIF?XMOu+$ z@|#%_5uFC|lb-cB6t^_VjC%*oM(}ale;Khvj`?|lSj4V1`}Kbf760qq{r#=CvSfyi zpJ^B5YMIeXGE%}9q!39m#{r%}(WGB8hgIIyqJkd4(}0S8C8 zACoE7Auw^q>d>h$ya$Zc1M}okr2v~ls_2(qx=I8%N}16cCHIN}M{XYY6->`I4*jms zB-cjP&?I8&@)<6xL{-69#);-~7V0POPMiwMP9Lnl+^`?$2h@89zCXOvf3*Yu;0(V% zd?=tAuoS8a~VH5 zxV4h%ly-lF%)Au%bc1&thf+8L3StW$j5^>4?*M1u;Rzi8$+8&;eT|8a7w4F@SMEO7 z|5>?cl9411?A6(ARgbR=CdEzRPZP!_JFq7RjHr*UvpRK1Pl;_deJ+_>+TjQVIEn0$ zMS;!}D-uzzbIqUnKL|>5kdSswl7V73jZOhv|Mw3y;Q!c>;H`4%AWq2!UV*eqFm}@} z6&Cqwh`~MF7KBc3v)hJCEn=z>m(*Wr(y#j56r>8-Z%!~36VXpcZAb^drYAkjqJ%>b z1^H=1E-qde*%x>oA_mEMYFe96sT^5>TXTS zO@E~6#lVROkIFXZnLK^Lv7m*$&Lo2t`GP0d$6UOR9L~cYi&M-PH{E-vCs@~@JNjw{ zsN1T{M``KxzFKpR5E+1GuQ-(q_CF1q{^NxLJq!3L=c>%2(?)>bY=K^jCw*GR_y%PG z8a?UO@aInJxWgzEDAE8M8(UBwzh0x*`;NsyhnDl%ovN{_37cSgj-Qgh3QCgn$9t!? z*O$l9JRYtb$QPNfqzZw zaQN-bx}eWoJ!4T|OuBwoufzx$MCTTzmx4Ga&Eqp*aWY z^u)Usc=RqV8Wu3HYCS zZ{UJWAOasACS)~5-vTFK4pr5NH*9(fKD3L1@CtvG#ECgSf|^S{t~D0G%xITbqQ5X zdTEDCe}C|JQlqN!mp2ObCQ}N62h|`jwUYsd=Rqr$yn?9c?cQV#`5v4Q0*;ccl_0+V zxMTn4MNV?d`@wrLf7Se#v7`))0PA0hB{pzz>mYI(7|0aHQk@=ZN6#Og?s3J)YO~e) z4tHc^Y&yWaT%s0`WNliClI~oy9|D;>rEzmLbFPI9(oN$h;VKUCeQ$0ZyPhWfZS=!%CM zy{(j^g5fOrVEad2LZi;?F5B%PjDh)cq{9>(|b?&aHIJ%AW%S{uC z9goo$Y6V~S;DUi1T?bA=)j>BvmsHW#PH`6aNpcfpzGhl_T=0qJQuw^j zhY2-o?_$eaj%HY5c#AwZ36tiQ0`4WfeI-C1(+1(H7rnC+nCvS8EDifVr=*;)>jeGn zS5w~zK6|7>{baGB-;9gKvpGZEH(g6^P)(Ri_p?*=!G7$(Cp(u`gPB`q6m96+CcYh2jsib0Ul-RHlgYLf+z z=zgkuc&nI|haj0v%$9IIZvzxZb;o(!0=|0zN@d5+v)ir>I8WS6D8LxT%nw+XOC8s=0k4)dH6^8UceJ5;#t80%0fko>aAZP?NDpx38WK6o3SMoM_vU<30#)P}4n*66*yd zCCO;4><>rPhJims(ruH{lN4LjlCiBlcZ_wWqDffA$G(30Fmi%UO|QTfZBZKYexrc+ zprsur-S&_6N8QCs+$W68qHk3APGM9~$f(}Sp5}&kUg+p>s8cpACQeWc zJl$#WrK`aPJes{tPo29ha0CGxC3@5?!4|c6`4s*1j&{t1PKx%-`BhgxM+4Tzf;??# za9&w>_%S~6K+`+mP!ywll*XVY-^ev6;p>2r*|HZ~D(EYx@bOO%`tM88-U?xhO5}Kb zRJt;iWt5Z+k?1*nmYnUKDx~f;@@r8+CPXNgrEB2wbgAdz)9Ms>LYg$VAhH3uPb4|W zsAks_w6jX2nvjq3CMkW9h?D6~wjGK`Zq-gbeIMU9m2wZCJA|GarJN=@&o5QNgN8J- zudXjm@oobmF{6*9w6fRr+7csou)vSaU1Nx!28TPtIDVkD<3W(KY7}`(fv7QTYw2Ce zi!CC~;7@5+p*zFYN+8&Xv`hNVRUo{zP1o?AFDLx-IpeUGy3vMf8dEy;^^yRQH!9G(-+ZIqwQH3Xo>U9O*RgBDX83AZ{6FWn_Hq z(Gh0pX#2<4)jOKnx|eO7Ld2Bo?X0Ez1KWbaLJQlM0E3;>AVHPFi^x$Tl*Okijxa&l z$&dyrb4XMaDQ41(-lJ(TM7Pk)q+O1P@Vm8b%zQ1K{3#gpzdV0K8=3<->C1tOwL?`s z0Q38wVQ4b8tf3LRI}d$iUpL70l*-DSd6D<3>jo0dvH@qLk-3k z!F=&&XRc?!D=xjeppxcMqpbGZ9WAk+V=hF*mr%B65VZ~v-oV=r|N2XpqWDl2EE zc2q?{F*4{vwlP_zm1b1jDOp@1w0CgOaG~fKz23CYd{jnWY9k5wKq|_+bwPyI7=+wuQ)UVTecmNyu(D>Hl?l!y?7rl!#SI>$BSNAhS7suvLlixS}qeUm4awyMz zUvZh&Q|J%Be*AbvqvFNZm)iWGo!sQ&@!aBlg&{81syN$cr^#vGwro>YIyK$Ox@=nS zU$tCqw3zP9HOa#CZ~q0o|2Yl}r9UJ=KQm5SPL%0WRA-2)dxEsJY8Em9fpn2asD474 zJsRB9A?VW+;0=WXN`*W)1ajVvM|n8^`1-thIZiGC*a;Byf*~J5%W1Eby)K>I@`+er zlN0zS51r-t4r9VvXIQmTe{}Jr}>A--U3e8jdgH=UtU?>A;Z?sOG0l{}p z`KyjSmo52}Z9T5PL-bb?LU01av4C^S@IPVlWQ%)amAsJ6u!V6J;}zhhNrDW3qSdjr)~KUf@&9)Kshc!0$jT0S4*5{G;K1*dv>WS z>;eB+LYs`oDZXw~(%Zcq)3EV^FGk(A$LU#Dh$ANGPc zoGy6!7&d95tG}7wS_wQFIpO6I7x#k)8(gMw_Kx{FOw+hS5j)!s8(tY^O%0AKQRIhP zIuYykzu9kbp1!LxO(HTK4yEdEHo|tqQ3!-ig?_x{~vGS}=shdaK_ zJ>36~^TE0u{m1#GJ!lRTTwJ*DB;(d_89$ZOl1Nr8FLgyvUl55m?|T3MBA$P0ilMMk zzQfL_+sJ7|fFqc}hg4OFuwl>r!nzr-o*aC=LR#E`myVv-b7q|)``U{i0RkfrR>9$I z=D~U@YQfNY9>-=6h*zRw@e>-4({y|h%s^gTeilO9NQSZ)1mEi{EkUKA zWsY86Xlbj?xzC`9t4~1sL!iDdI>#*}?QgR4)v|@KOA~%%0u@$J18{r9&(~J79mF`nRWK#G4we zMr(9(bqaz$Eb-BmB~45j zg%M5;Z2A)82-&T%4u8=h$b!6#{RPQdsnTlf1}$_TufJ#C?Z;#Zj&Cym5QO5rG1oi3o0_pzDRfMP;~hrLZt-fZ1*-n zcWh^ZC$@YuZq#d(KVjo*NXF(?GW+mFBKiXd@x>3|kUThmi+rRp#(6w6`AW((&$7~K z@s@4|!1fO{dJ_+iv>!P{JjIQ}-7oHYTte6wms?|{=BZUog}Xl!%p{ zG-RG1#l}%gI<;C42`@hip%J`0Vipm>vN*U<<#H4!cL+Rh+jA!6r@B_Y*Dn{{ZSXt#9oz1GMCE!voEVt<+49Zr!X*EVmt!~zO zo&}fX1g#DF;wTQcCgx<1(BXbsOYeY-7h_e*^UB%_RBI_+_Ko~X$_>#c=G&8esr^T@ zGWiJE$=mWwFXZGPY2l$PjG%xX@l*Y)_uX`SPAj~Ra1EcZ@}vlCAv z%+V4Z^Jy8#!gapQ%W_wDX_i==($SfIQZR5c^TnK zZ01j*#;6}HVV{ARk|;J^u0D$Cv*d1j+`}`iYYzx&aIfT$C}!&tES%H>G)_*N2_decz~l-udMH0 zuYGLer%yKL_KRQIbDTe4;I@9en|9GF1MRkQU(srYSuUo}kAPgC4n-VbNnlvrDeL=e zAc*OzK<(#b@!MQlcf&gBA79d{xG+srmG8UWK(y29q~%@b^(Qs z+4@|!ybX$_J2MbW%97W@$p4mGVwrBpXGkuU%v#-xdJ^4 zO9N6B@XvWddc{>Av*O9N5Nq+JKB#1D*n=M{WmIbOE&K9vUIg22L9PO;$h~#7nuVs# z`^5y33l_42`F)=sR@E1Lt`8Rv_iAX_DXqhc&!Q(OFr1B@K0F>v>>skBmpq?+R6^kh zvDD*-i%?mW_9 z+b`jmaPBH@Q|(~(s>y+0&Ij^w>>H^`UtSVoMz{eTz7Thm+JYqyA!nM@(3$rehaK#@UFQmlR%*IgCS`ub1`p=--zKcfRrKrny8ply zSVl%bh|2YSHHf*r&{#n!!Nk8$b;L|#z9;)!Rer=rnerr+#IP`ZDB11MP1ii(BCZta zRZ6{eF{qC$9h91Pmktb&d)6$}ZooFqsrE!iU;S^Sf)m3V_&`$gX;rVRUN6N)|Fm+* z^yL-JI3w2IYM?B%FF1E33WbvG<+yvrsb;3 zqMWLBu4%f?XP<1V3*xukUMXt$^IZKd6OLk99K0{%ljf$UI{)%+D7Zhyd5nPRii6&P zJ>y(HxJ;jMNBkaZ;Rzg3Z+o(}hP}4|`yOD5#C;PipVwIE3N>K~^a@Afr2}Ch|4bMB z0;;#?GMzxb6kU}XL8X~g<5^}s{Aw2_>^b?$?m<@^JNPshAVm$QMnOHD$MQRJ52w+^ zr3h{5QZ#< zwg=7E*7yxUyHd03*;25kQr`4zWo-(RG$EQp-XA9$TO;H3wm88~Dg8TK^U&&3+RVUu z{)L1CVkTAsh5n1TIj&MENb+hi*Y&0qt=Um#RuS|8nd=AhR~)zUd7r74SAzixiLDv5 zXXh!1?RB~OV@GR833^W3*bJDaX=rLH&%F-h4dr0PL6tDP+sVkxvD+IrXW9>tzPySV zS8~nZw;ma~{7l?4t!iY>7brMsmooEsAkw?mU!+3(JUV*oX}ok`PQFfq&$T~nbdm>e zgBaV>Z);c^VZRQ)yy{E;#A%Gw^UgFqqjO0LE?viZq?ol-|Kg$0sTNd)^Ha(r`u0~} zy*1d*GqgxBpB+4*>Gh(fq;_H}=km_HaTe>VHxc(Q7u4^@(Nk^m`|vwh~3w1Z-daKXaUnIoS>gDw$2UBquKLD ziG@Gn+j*rjYz&tqx^O4KeaUwGll%IdA%LhQFgwwGdp2#j3XkH`qz*^ErhdMk*?MAK z6na8{pT8GrFYnE%;`ihVPt($_mLir!85dS%y+U_;fd{A+FKs}`?eZ$eL;?N!Kt(I- zl<5^d(CD*BgwhI^^aQpG@%x#KzQ6US-42Jc;RgNg)#n=dt9M2R?H?LuyPd;ttM0$E zVR>xD=?L880ZvT47c_ss5FMzfoF zPgvIMziqG7M7$g-FqCBxeblSMhkY+CdpUw7n;&c#yEE0=sLJ1nSYS*uKWUQX8hSm* zqUy)6aR%%5QugHW`!k(^5#Ouw8@|2~$8U}|jC$=@?sSBDO}3hR{qjNHPu0u9l}Yw8 zik`#P4OsdTm}+{7u5p|-++1sSdt-6F-khMR1C@}lWM?fMDLbP&V(V$Ob_&u3-_;>H zNOm3Z>#SsHBGanz@IHl;A);?5y>9EW@kcW)-7|)MGpC0=X+-{%c;G9u;2@{$tlRQx z>@2qInv&J@koc35u(Q+M+F^8lbxI>ktY~xIu9cX?g?jV55rY$iN&-vub$hB|ailI9 zXGq_8Qp+8sb}ZC+>P4+f;a+omYBhxD;hkUiB+Xg+i??&tnHer>S4G%-1HYom)R765 zv#;v-(DH&93Kr3(V9mt}$9W^;INL{=ow4<|gTgOyA8(5n?hSf<^ZU6DT?3NFnZA>w#;fNS*-?yjXB7fZf)3rP4W&_4hS$|pS=>qqP%YTzlZ!Ix-n^v}wG7!t zMdX}gV$$*uVfCWi)@!(SjgX|=Z@L)Q62s#KQ(?7TOkIzU0uRHGG?05&D>dPbq&D0M z+-p0A-Z@XH&s4CW8ymk$T}*@Fav>VJ8cDe3{@;75lOzJryH}E{dNCNTP}jTr;LR9= zcW!qrO^k?$-X@}Wl8B)K<-nMB@Xz(sFCpT`$K_!XMH8c-pq29*%+?riJ=^+Vmj=aNbGkhHF(Mm#%(Rw_}+RCR(MuL(3Z+5!tBT#CVEI0jeE ze=(nJEsGklv=MU5$aiTCF> z`dfuQdgfq)f~U7dEt*q}m)l{j=G7J3SMw7Zr~`}DrUu2ix9;@_D)%t=IGNw+hsLsn z;$B_*k$JKaj1GqqzI`_vRiQ{f6QTKeZu@Ae*TZ$rYpIH}glNuAaa`-UBbd`_H8gr^t-?6ffuyQ4T5%kC_gPFr5^+;a=xd`d>JHrAL&lg{Pv6fyxhMezls6)tX=Ei z{3A@&vm0ZcW2^f~H_&RZB>O3i;OD8fMC%oCl+2{%=hs0;jrVnqQ3PJ+9M{q-EhhFK zJI+sp|7^3O^zhLW0@#^Y>ze+{-#?uTEmIsJL#N5dn}uH4h+?aCl;iiCi5c;w5&Q2> z{%?Z+8<_Fik(eVn=-;C{G7pK0kdN9(dk1r_Nj1fYHeEYeGSf}=Tqls^CdZyxU4gj; z!+dqsNd}h6JCWapa&&~J**pP%(}2m@3qdRiHec4j$WagO%1lZL2YrqM;eX6W|6xJW zt#E-XT>#Ld030a$+0x?l5kUI9h$N;L1;8>4u8Tcyz3OZVO>Frv=cP<>u8CMDVYbh> zHEuRUnr`q?_8=r_EGTwo2P30Yur$f6lIu#hgB!M6hlb;SSODgO7(Fj&4%HULfD&pD zyhV)R;2nOTTW*(P*_Zdu^;#)lv$!@8oHttdNBvNtMpKZ>Ib=eGiN<7dSyL0zCcJnt zQTJ$PcQj^cu(U1v4%qyWWzItk8euk5cJ;WT^h!R+*Z-g1!7p3JjQSu(A>pFIBVkTg zq3`3w-hl#A1l{n}_6%i|kF*lnf`Wp$Xi9OXU%zG)o_N9&x0$c1+_!`jXO^#E0Z|bV zm?=slVaNblrl+SX-$>r~|E5~m8ZTUVshUk~9cav9FSm__Q|ChyI`#pDFaz>D4{)Y{ zjN&ZIwisS?*Y%wjpLD5@F;wuaot^$=+y28xA5D4jy8Kck=WBbEx^e?W!pS`Wa{&jZ zT4HtLaqkCxPK$SnK>*4w-I1=5S3;oJQRX5=d~A{oTW(0A8qEc@15s(kS&Cklm_tSA>6HFbG^A|3W@yxVZ;5t{a28Tg8>U6Cd${SNRrJ>8f^P`V;|J9--^EU!QpFgN##-nyK;E`_%+$#Ek_eU zw^Y{5ltruvUqxB7-CS!5Dqk;^iGr#x;UIrB6RNszJK|0$cZnOdQRRm{+P2cV7a@L; zH~I?)=CZ%PC0pH8w{f-C9+O1#pbt$3imJOC z)H19hB={{n9QiX3+)ewnBs#95HP`@62-xJdR^i%vhuB=rG2xL2Pg2kgBs<#v;FvJYJ9tuinbt|@x>Nat1NSyW-< z-rce4ID;dx#7>LE1RbPuXHU#oMq%pblABSes_%*;xTExf&ebvp#QMylDDigWI0mpH zX)jisDwqN5EdX1$K9Cw4@DJJe-hKK9Sn8M4`=5{Ew2Ti(bdCteE9La#k`ZoJ+6<=z zHKIm~y!}~}+SAi8{7a@xr{p*so$0NFLudLj85ajF-jJ12I}{{m^%(k@xQe51zL+mJ zm9a~Wy%~qPvetf$tG5n7-x-lA9R};)*WkBH{6F9OpBu(8>_Afa{QXk<&%Dv%+a>-e zMNJy~=ITSY1zy!D3HBNJkteZRyl$dbbDXL$HTqHOYg~+tq&e}DeAT&fyZ&Ldm1bC& zDfoP(!T^unoSJR3`Hol8&FgG_|M`x8_z~&CxXXb#!k#VBs@HH}PHiw-kSBNoWy5>j z086I!3TLeD{&N(Ch9 zdaEisxSv94R6;1ZKo$!Q5JxYH>ecy0&ELWQw*As;Au zhnzPu+^BpwC+|s%J*S}aD-8df-Vv=ttMyvfZr-ZBjX_I5>son)I5}Ichte&!UNmFp zkoZ%j3Tvk*O)zw6OfkGz$Zb20??3S=-5!@Dd0soxzOLKn?eh40Id1C@A3lHq>t2hl z$5*Kx^xb{^o-fihbDu8bC&t%ccuM1XJ(i!Z7{sCbz9o^DZaUJLpKuXTyZ=Fh)ADYw zZ4&b=wU}M`YwL3h-?`nQGkNc^4`dFVu|~k8yB%d$FreP|glbBgIzkZz|Q)KYkQ6`rvO^l2~M2XlSwBM<_q14=RzQYCuGKQst=K zzXY)?AclCr7xWdGUL7$#_AfV?_{&yO0V}!=vJ?t80d8(?=E;Qx3*o)BPIMOpqBa!w zT`QP*kQWz66DeH^Q6{zhV+Tw`y`)v;l0O(Q7eGV}d9Y#e2E^;ZiBK^sP&bx>E{oTV zhvV9q9|}@3VK~sUB4?SQJVijU50Wf=2g)2yV#Hg9NQ0053u2TrFUc zoEQz6Mj_{IiK40qCA-?<&-Gp66iej!33nmrG?6`C&3ZB#@uO(m=(_QId{qbH1}^zLAk`p zpYAMW1Dwg6?2qy9lm3`C@`k;&B~QkXaw_3};b{};Yze641wP%Q!wEZLxjF2dxHw$d z>&g`hsk0oDBzk@;B~rHl4C*T^WwgD<{i%jZuod%MYF-fJ%i@YxUZe7+7hxt2Z_pPc zoWqs2qn}D5V1-)(6&gA7tvpBpwSYn9U}E|J^KtbNY2MDUQ7{hV+rgTj3u%~s)M`Dz z|BWm~s>_riY576--K_%l(HGqxZ+|9ogAgRXT zt#?jLk`0&7Y!51UzIr<$WZzQBrLgW(^2*~)tbEMn#-1D6j~x<74> zzlOOvI-dzH$%bHci6V=x^xHrtqMNovKt2-5?*`4!WckqT6KR zV`I$$KDRX_OJcfiKq5ej8-56xS?-vWNqJURFH5V)#FmxcA?wi1TP4H3UZh>Pu1UUI zQ6~c1D~}9OTqdxSWnuP@Sy;4}GpRYxiMQIaKT8@o-su6Ba(k_p_h|#B@{PL0tN%n8C)cS-lT7_Q3b~TI)KCGe34- z$fLKmRLPiVxLCMj?XD*}e3oe?S1vbt|?0d7*KOqn88-C5Vwsd7&pa76EjHTVlA^Y^d1<{D0ePM|86F+s<8&uba- zSOMGQcyJZT5zr3g;;zbVVv>~Y>)D0YfJHd)b_}6=B?u`ctDsN{ILmkEE2Qq_ z0j?Orgopz^{n%m;V_jaSY4pf?wY=@ z;rr3i(Md*RS)Rvd6!k8g3pLVk0j7Q@Mt1}euu)}HLL{&nB^FtJK%PNKD8kFz4Sft2 zNC1(+RjV1nqA4!X1cx5T`ZA$Btl44Roh^M{lx{myU$i?LwVz>i<<8fy?RP0x-&hr| zsX5L+e;gXHpAn_tn4L13Vcu3R%C*hT7dZzb#8r9|f;_d?)GWBVi^KOba;}i?l+EP! zX=k{z?WqpCv#xxb$-P0>?HRP6v2=yP)uTJ9Hhk*aP60VxtR`JM3jWe4G@+if1Z2DQn$XmB z)_+Y>f1E_0^W@+5haP0wuy(ks8j(6=$Zo72Sqn55G^ZGj9y|#!|5w!F5? zRdulZ+h}hxvyZUor}id8!}eDgGaAP$xMc>xqwrd!;SBR-*)fRemCGC0*0# zs@OfD@%1*b?^TdFbbzneJ!qCL$^7iq#KV0nQcXed->$Bo2d7?Ptc;+eO=R^6d4%lE zn-bD#d4os-()eD(3H$*c%|Sq(n&P$7YmF03T7e@n0DjbqDCs9BNzZTcfLUJz(iO&) zy9Q`9Hn_!IK3A)hOL=N@u*`Ns@zlR3s6Wyr8w=bJEt4e>p7a4fZm{BnVzGzwPyQ0D zDu+)!InCPy@OF56{}V_RM0c03?Q9HMTdqttZa^zljD(9dKt0)+c>@4X6BPG0!`WHL z_1?>5kG*Uf#%t0}ubjI)0-f8ogJ4wpI3+DDR&oiRN)z;w*A}CE{;eqbx!(-V*$ny3 ze4S_*1fOSX5M~rY${7%%xfRP@xp0XZ+9lI(@n~phIDz&pN^%1HZT(gRWVM$@Yisua z;VmvIDhhN&2D3o!7%9_5C*oowsRIf5Fi^=G-zGv4P*Ph5bcZ?`b}P2;No;R%{55p` z&4ru(6fQm-_jiY>qI3lX1xrBhuB2;3@UBDOPGAyJ)g5q?q+Lmnmy;`ov+;;OC`!!) zYc*UEO*~I~`xHmIyhKs`oSz@=G!gCaBg0?WhJXHg0Y01txxkG8w$Q2NXZYso<<5v>v*FgKn3;(4V{Fcm*!9D-ulM)>Co4I-5 zHqjsdmA_V+Kfh7s9Q+nu-J)NmUH`mWeQrX*tcDY4_74m3r}vqD8h%U0*W~|o871h; z!`%L9SroJJhc4Z7E*7FY{zvSGRrWyWOu;SI|Mis7gK68bWp&E`?PWEAeu4|_NA|xx zu>c;bMJ4phrMx?8Qcw#3v@yHF*%5SWW_R zttBrn@1oM|8iae3}SY@2^A9S=gFRbWV5= z^+LXRPWi`d)^P!pjEgSd1xWF!>eyU>FfiORAm59sABXkpm!i^{9Jm>{an?>`5wL|X zBBe)&PTvQB#TjQCQg=&8PpsVkgd|>To=75rJtNM6gWr*#uJ~_>V8#u^wQ>V`1nc}8 zH!?#`@qBDS_y)%1>#-90vI43C3o9!;0lDXhja(&wnv(JxSH-vS??g$jLVB8gez9op z$&+KH)M6@SHvQmOJSp)AXzxXY#cpyza0n7&ILWq^{tZqH zV4o&J9={4VY{K$}@V^~EpA1qL=;IwGBz#3{1>hK0n!TA>Mm#tWkE>9&y62j4(+BXk zHa9y2uRIDV)d6w?sp5f#Fa}4^Qc8AJf!o^prIIrVSKJ3cn^@=k9wFd;%HYOp@DKX8 zI0Lsd;ClwS)f+L;2-^=9mgiJebxmBkz&f_*Nt%P?`}D&>%>|Q_1!M~PUy{Vj2s3G| zg$c$HXbUF)w1by{S-h}w$xjL~=h+Yx`*z7ibv~hVR|dcX~h6VM_ zzlqa-d=nYs;z>E);2a+FzZd3x6n_9Lh>X|Ivgef5A?XJREl~(08845) zX1#QS^^c44*Hx327T3#1Kp?2_pCdKoM(pTrYV#wI3-VcyEa;9CP0sEDCZ=9$HOzq` zi3bKPM)pET2|_j|zmOMdWVAz{H=jOD>(!Z$sHB4Wz*O8J1t~z1%HR^$UNXTXjTD=! zeKTuDlu3ay2N;ByPZ0h~@E7mK2Se$;L&PFuD{RnjFbV`$O;mXt3?p2UoSVrOgW0a~ zxHX^>zEvJ;xU-2Zh4`R_Msa3e^~K|gD@{cbH7NUEOc&hxk|&bKL{VzeQO8Kr(Ak>p z@y!zaD6A?8J2`XBqEiW(xRj!7gSvsRv`1GV_*!}(x6JxTtQXvTnhN(c3rmB^>VhzMpbdWG(jIh`7 zH?j*LvJHay!JOZHPV^VcG{tc+N+e&H`rK_)8RI}8_>#l;$LPv$uqFKC<<6|o`cu71u4vtA`dwF{A zo3!(aKZDWm;@5^VrEnPyLI;tikub%-g`y0ylVV;~vntQocAZ=|z^YbYvSnfL7~l%7 zd}*&=e>=;+*qfU;%S0M@ zu;!(KWPmnxFi*H<>*GF@6aH@ER3*V&6|6;Br!bewV7@R?KquKf{q$T1H2i#4LuHCv zYqOn;uwsOq=GV)QS=DS!1zvhO7Y!tg<#0te7tFQ`6gu(OGxvu~_HEEvMcuL`6I+4Z zZv_PfbyVp1-1PVjz{jTTdR3%=W@OJwBSUVR%Rc01C1Gk%-{|-K%^8Cr^%1Z!fLUUm zCBPX?my3YkJbwfUXYMp$DvhW4EKJ}LQ-8yG0}}yIK}R4~Ayb>Aft)LBw+;8z^n72;{K5@x$!xY{ueFBg&=Qf@bWUUR9i$MW5jE znzEzWT`(3~{qQ%^x<4%1Z(Dy%n9*kFN^(S|_z1cHq&5S9*OIO}FiN6f);tE+eDsDZ zAqZk8R<47A$!Dj9zHvM0%o?w~?V4zrwv^Cy`l$r&&!%v{Rqm^t^>$PXGJofHl=muQcTw+D%L5--AD(8}J>3Yrm zpb!fSOV`HOJ724jX%Uy-VM zlD1dszd$wJn+gfMTyYERqCRk{k~4(luh{3%kpZmtcXy?zC8VDO#7stroA_d8ZcRaQ z@>$3MULa2HJs{v~-~WM2D5~I;9_!QTRx$ava>9()XX31+EIuMnqxl483j(>5kJF8e zj2>k~a1TsBI8KSjvtoksFNM21cNVhNy!o z1w}$@xnk7gbRR?_9o<&0Ap7A`F13?pXWT?_YQ^Dai*z^-*#;a%djjtxz-{SzgZShH z`YETHV2OC@6K4*pe`t79QJg%Qp%tp^A+o0S!6QS9b5CX1?e&+5q}uaSO2cNexpO_| zD9;e@sa#B))0D0KM_|*OZBIqVoq5By<8FSr^5&iG?21jr$*&!EpNnMd+|j}2T6I_Z zCWqt-t*Mn-jh-onC5E1Es8Gi!!>V-D%{X$AAszH!HF0R~eOB5R|R{CUhW~ma9!3U1y8%Rf6D*kPm_}8;0 zEj?C--vJ7iVq|wq3qTX-G-vj1D`b97uJMpMPOtDo;2b!?rELNo2#?jBk$gg9qy-i_ zIK8tDtXQn6`K=suTYun%k*qoiu6h+!+XkH9MWxB=y)81oSi9k9sS>NQkcgY~^1iCw z9q(E9>ZFf}xxYrWr}9EH*Q3_#jy0`dPt0d)$33?h_a^{Yntd!vU5iX>+HS)9@t{>S^&<`ynlsOcj19tH$srIcdpvCU0Q1;R^Z@9~J~7H~~4wn+bk zp|NqSlpubN^4Gt{6)YXLEQ109pPk zuT^wsOwdrpN#xTlpMT;6B_1LktI#mf7_X+3)zPaqh@PYK-nC^>WnpBDs!F=}`%jSP zI7mdIR1~KEa0wh#nszs)F_?ZZm%KI0M?oYtKg;jAHq()$OhXypk)e{g(5WC+43?~w za~YW&ZWrQ8_`UZ%N5)frJh~ zr6t!1QP#gH`kz55#X)!~2F;OA5w47~mjNzyO!2&EGxql8V7gBm6|LIDz)SUWurL4)R9-3H3NkFrJk@GIlqF0}EXzH#b#03*t^auWoi9MIX8 zve5_3q>f!9Q;xR&H^}dXFFt`Xkf2v{9}#G8-{twe5#R*rz#-#baIQaIE?uW^q;rq= zoRZ|7kYrzBu7v645-iUZ^V%-)dFzAFLsMUGObQjHFBQ#+v|*{n^^0_IxiKf%)EaVo z@{vTKiNz{%S3ju6bvI?lRIHlIjtY&29k(d<`%G_P2E`%&tichj!y|LL;apsK&DLoe zO$ib2go0Gt)1}Aaok{@R@^j+0=E3YNb!SE*@|Gk!qaQcj%MH>p2pNcokd$R($i7XA zL-qy=Kig?VyysrA%OWJ;c642E*owg+wla{TWBxp4pri@G7lf&U!S#YkJ1`Bx8K-1! zvcC$NN7+HN05z9Jz;W7JI9qT$rf}yGLFEiPaP4`;rkx+H5MBRp-m!m#>t6Pd3ycdU zn_n59aCkTpdyDu3rNrgklxI^T1wFTl*DQ}hH=!R_vlTX#t$aRff8~3j-$TL`HD*S< zWXB6u3>TMQhPW}Em{h;Be3PTqxl(4Jv=RDS4TD@Ov<MEHN<%8?0)B5a_uW1xsMs zqP9Np(~f5ywl}zMo%P|zV5%UwkN;!?9ubJ#0V(xBhQuoL5#*pcLc}TK>5uU_?<4dj znya#zVoBJeT}hx7sS`MFW`PQ~)2oO>tI+6aY|}YS)br=07uPYwSJs}bwyP47?J*nt zb7Y@8eNZIXTqCq$g<06)F?#s*h0J0MFZDftA8{HH4Ss@pBGm4=<&Y8UHsne>Y;1F;!KseG1Z?L)i+YRa5Uu!j1=xy#Z-o7B3bzp%1-y zZ;(g3GW<$rHtVZsOrxyOa*X`Zs`Q9e`=K)19I)WpO4^BjA6GZ* z-W3T|R!e`h{H?5Q^@?}C2H!vWA5#8isR7VpUGO#6j0j@Qslz6mNMn+#X{L74yHSr- zxmaQ6oF7~aRIceKe~5Q{gl>+Z4pz?3MB5Mibt!oKW`s7_KAB$;`SL_o=~8yyRTkN! zeqZYA2t|_5^#~O_t68^~&AWp(9}rs(7p1sU$yhzkV7qmLhn*1DMx#zI*R%!NoQu9Z z`4~fVo-CVDu%(KnA885x4#rv9ecb@*E-O-x{6HICs*;ysP+D=Dw!aAOE`0HTq-g@h zbb|fiqdF6$ypmywzYn3lWNElkx=1YMuzw%YFT<_x=&Cp$k(h|BUxqBI0t44gKS+Zu z!TJy?bRMP4p^YzCD%;0FCllC*cl58%`uj~hM+VSLMb{uE&RnA7pqf%loQ8aG-vW3lA;K)gRK%W>?r`6}DT73R!O*KQ@1E7|o3F&<`d%pi*?q7CLqdi{2V+Nm_G zXd%#%oRAoIP>0qyxzEA0w!diYT?pe*I|#~k15x-K^FR_JSZGaoHdKN^ zJ&#juvp~m{z)PQzm)9QDk`zyqaJyy(Ndvk-E+h=_(lxnoUH#e5DOMyAQ)gT!Ng?lx zY<%GkJ$@T~2BCs&i|vHXe|+R$UwQfmASujcrd229rm73|j^`+;pr+3#hcuG&RnazWXVMGCOmSpjvFJQ6Y{b9#r=t&s3P7t1}Z% z*eIREM*AFD2jaFaIMht^%9Y{pO^t+}=xFJ){x@jUqV2B9J_!MIu_U^FTJQZML4{d} zSgnYtS3%^f-_c=Dn9>M(!%mXqSMT!w_A1X1^kLWPEm|{IQ?*Z~(bYK%q=KwlSe7P*-F zZZgUZB*Je zKAyl00Uf>cAF(g*iwC&IDF%Thy}Ir#l)%Q+hsSF4RJq#o!zGZtD(`gkU2L5mn&SN6 zAY|+d&?56S*j53f!>Lu{Q4Weyzn-1B%-kUlUc3Pb6YA=f1};~EdfejM-m;jcZaWv&iERt$_&kJKsx+kBJ3mT!FsrqY61+2to2w`1Z+v#6L)QcY{E)V$>p&ik_V zF+hthhH+}~cvYd8jl{Z&nhu5#LdWXJeccOhW1IEHe~?Sr3@_hFoxA?T=3!p4-Y3Rm zvf+r$kK7mAG7$JQ8MP%W)(-r^!9AUi9oO+#3Gyyf`A@{P}J%JNAZYQ+W)lb@bpEh?lOS^IRaomKG0`B1GxJcn^ zWptlN@8?p^qFrWBOYer&YdbGY;tGsYBA6L9YW!wSqDm%YWe02e2oF846j*+!nqacn zfC+CkG4p3}6fIDt4>Zp48sx;zJKUZLeJ+B&500#1v!Rbg1}C#qMPh$y>bmG~$R5K6 z)1|7=OwPsC)4^6hpy8A>fB6}>AmfVLF#ET*Qf585FfADa@MT5p&@%NB2w--YDdjPw zR{%|e20#*~B*C7RqNYd3Y3ICR8Se$eie$pl{-&T@JfiMjsz>)1OpS!w2Ye?6kuLPp z_p{xNiJItmjGpX)%I^5^HFK>@wOps?AJ>}IacrS4%n>^>4Du!tw++n^QIPWz34`Ts zv{_2i=eNr?N|zBjx6J2Dg>w&&HWA60D{e)+W)7e7W!TP%LHS43a?24sTb(y|#|K^4 zI;1TjLZ+Gm7Fw4X^@F4EfnhDsvFM<0IC7uR(XwQtJsR(95y!=4Mft(v1;zgH&CmDT zUJh%hp2qKBVhgB?LP=sm zWZ=m$C$C8Z7VKR3UaW=pj{P~2bW8*@c4j)ky;XF3=G^8q2Fx>_u;G@`F8{VmVjk2R z_z#ElGAg_03-t(fd~wL)i-<6#COo2>@J6>*jWU|<*u7sP`48&H$ADoO__jePeXw_A z*fOF-l9`qf7f*SM{r)zuDZbo;BVat_J;b9dxJE%q*$;i|ard&eI^~74>)t! z3Ze7x1b;bquz4Cj@D-2fbkte9n}Mob@+tLUB%Dc_U&`kkn;)B{V&J7+T6^LQaxi(n zo_jWuGlZ3oZX($uFX}C+RNlt~oFlr?{2BsDSA4xr)u=Cd=w>FNdUx+n)6$Nx=5}8| zn_m=QRXEBW62;wWp;vyzi=uvvE3cluzSM6ztjpqq=2Me2|GI?@)ZqGOu&|Jgt^fn$ z9@iqw~Ux@tcJTwye2^L2rQz%2W>Q z`D*^`+t&1%pl7G%<0b`Ovl`kx2#d7O!nPrBC5b5L%cz{RzN{m~SOI^pF;3y8~vDm&&huU0TqYw>&f}P0g6|ObodNi>*aY_*eaAcTP}nCx#Xg?5Rx`e%|eaoV*4~cQXq?DXuKy);Y?|IUf;-MqX>X-wKJ(J zswk3d(H{EeCfKRKkG32$?&;W8f1BOoca^^U!6Q?JQgN@O3cl4n^he z<9;Ec&cZ5kkX{lK8=d5w_0L#ob(w08PHjV+wc~qJC62RGECViv5s`{Gs)j()EH*N% zF9J-;w)WI0`1(q77xjEgtw*iC?@o*9XjXyVqjF7}j1v<-0QFSpJA#U@#}43UlklDk zwI@gdwT-=Q9vLkqA6WXU_9U%6&}2$*BD>Sgjff~JOAuM`x@TEg#zDLn%yM?q%h1{@ zNh`Ogf%JC>pTcxz?|W_@a_ zxY-LL*wuc>mU~{5aS#_P$qRlMfYGH!yac#}u^FC?nR3LN)Ii0(k`8qe>E5e4s_Z6{jlD}G(I_at~*@&t>O5&jNO#3@my_;3QHb%w= zR)ed{DqW>mh}7?10LfnQNIw8Uf|sC&=Iy?F#hFL-d@;{66dU!CxziQ%s2m-HdW#od zbp0TF;B9GWLWZ}aPWS<~uXjwpF*0I@$~(ErC&Stg__++okVLBOfy`(D*N!0Obl?TR zaZ9ya@32P}R5`#Yn3}ovrUU&2+&btQ!NM%1CFpx7TeGkS#5@s`yE&@T|sH;CvBSO0VK$k87 zz*qUe5T=S7=^LuCQdHg{w=%_iT;Ag~2$2Vwk)2rA$!zAi^TnV{rZ2p##AC~j|Jk{! zoB~%Y(_A&*UJ;i+QO|*B2;G`6qKB4?sH_}x=eK;dCf_Y&XB;}P>lSnKbvViSwrPZo za!sEX+C*Fc4hm$KHp;*p^C@!r8@gN!$;WQmj? zg@)>{)}Fyc<0*rIdp*G_)c72^8tK}jEVkX7w@2mnrcB!(;T_lZ`1+yskY*xzv{z<9 zVd3yw5-DEA(ND5kdzsh*JZl}7aeuD3nv%|Pdt?3w;-1`<(2=L(WghhTvG&V@>R)MI zX+{66<hoBV%716QTylL(_iRyJ!%Y#?3~A`>coizAjr z!aW3yn%2N?ZOo=6Rs?(ut?TZX4C;PxT;rQ~m`kARxS(p^7>pDwl=ma5fqsK?iZTLS4^HYh*R38ugFbK| zksQ=Y#4mEpU!y1qBoY?Lk|!+F4&r)U2&t2pkJ@{Q=$t7pr*hY%ByK##=ltyyshuGS{cLBk0_Br;d-(Gbwl$zc_6^+RU>;* zd`08?{$4yw_1&w!ghqJdtKGz2ia{&+0oF>j&sRd|pbKcHT6xC7-zPG19pLXp7=6Iw zs`9f6P;^JyEE8%bZ~Yo&ey^4uAp3c9`SYAxAlHj;WV!g+8*b0ItZ%vo@R@bpkZNVT z>ONLb2wug4KkY>ixhKj*){0 z2uC^0;qcU_dWe}M7JwRsdrxS4cjok)n(nWxjHOk+=2|zbh~=O2RXEIkIl(jnZ9LIV zFx{g9`xeo&j5f=CxPG=s-Sk;^hudlPVm~|<6Y&To{-SWKmEa-@FpTTJ@#7bFE_yNAAMfY zlWE? zI#iO8D^ZHNR|sh2`tG^DyLqE)j0lU-j3`vPahGVReUsJ0lZgKI+Cf)zhAz$kXj6vg zt5#d>B~I%35IwI<=aY3StmZUNT2~&w z>SaT_iqHoYbEA+vpf9_KcbugI8rbrtv6<9 z1??s;GAz@z@0ZHCzA_vzE_=g$f7?nb*6(YyPiav||C!Jb1)Op`eYPKV4m-P2x#&wj zt6&QdN%B^H7jXNT3E|;7Tw}^btyHPkps#firCM!!_I<+I4O$%iPo>ax7zY!?<%ZAI zdC1w74^5GvHLd_kD=b|C+Sd+d6fNnMo0F`k(xhLyq8-^=;BAs?>6YK`uDj#;>F6@T z5^UXOqPy`L#UE8gYM3uc<@&l?s^i1K@$3PzbPkKW?_S@?+czb?6fc-mB}HsHK|^s; zI*wOmOo{dANVP|FjTX=~_B$_JI74q~xI*B%&ED2hLBjWq+j}c`Vn##e*qY!gPmQ&r zs)GkQ7K^fc@GfzORrtTRuHc%pr$5Lu;)KM_q#;M^Ec1o*+pn}433a5IUZUNdVT=kF zj(<)Bj5Xpv^XQ7lD6ge1>4nL~?5orxYlB5#*;Sg}RuvUE{p1wSkac8LJ1Cjr+OeT` z+Y~5I>(t74wMTr3l|BtS> zj*4>q-iIk+K#-xNTR~~0r5hCllu$a9kQ9lbYba@v?vPMI=@>etL+KQ7kQ^EbdGGP` z=lgrt;*ZheIy%pD$KKbzf=`1Y+1~1cNXJ=xj_dZbDKK2!sJ_Wu56m34Tl0KFi(JS9 zMv(Qd=Ytbm0`o^^JT%0>*r`TxF2Y9P5@dp~>Ws(_gFjps`@@4N;9t}nt#ldpoNy9j!yK^>Dlm!oXf?Lz((T3mv`txZ+uL>Dc>rk0eXC|f ztX=s5#J%DU*7$>agkOB6R>!~c-*UF*8d1#Av!gULz(eWeLR82P0)Sk&_43g8lH#S* zX|at=aL$zuz~>wheIa{n3IZy^$XL)LThi*Bl%VS_(eE_QNSkg3-#ZwUG^h;Q4>HzEbX1 zKlYrP2w9ks25SG(GUw*v#O(>8+9g20a_T+$0vMOOis(fBtw~bkQ>3gg7#<_1g&I^3O)3RHb+~=kqC%e68s@Q}#r8cB9 zel_1#{a9UanMjKJ%tzW|t<>9dEYK^6H~u)4Vf2Aa_;te@!fVo2cGnFhi1URKWt=A$ z*93Nf>m^3k&OJmvs;@WXgUf+;xJY!v<=~HBTpqrsJ{z98lsjTK5q9p z0G#u9Dn>>U5b4>=Ll%tBTx>?Q0$Fd2-{(qjYgQeiC4XTQ$cE@C8!8vM&{0ekeGocc z@A3J({?u@6@mAQ>yjB5fY6R;O?sj-%)IDsfXQw<-fVJ@LX|T`nmxtI7$u)1x9?-It ztpf&w_<{Gh8_geMs%b?4Qn((nTVns7Q<~!3kZ1-*Du5m?qojfO`13NWIb7VRpr8YmRQEb;k|>$K;6lTmlPIg(K+_Lc$fS zB~5mZ07qqPj_^KQr&QoyB&fwT2zYZ`MuDrSut zW-*ny{BhnQANCn7M%y>puM=uy+LBK>wfh{d7x2XWY#7@+OoUYLMa9}TcfJy8oNxHF zbUti*)n6~7Krnr$S#lfBhSa^|=LGk-;wH)wqK6l6OjacY+b0qii_h-j8!zRXzrFn? zzA0`=X>yTO@ciNf)$4bZThXok3&Quac8nQ}@{; zOXPeS==^MNIR1wRM_i4Y8f8v=>5{tdY22Bi<`*K#3Jt;PwV`r})@6K`vV}@C%=POq z5GjrmD|RsI0EWu;q`$=1Oc;E|Ft9Gh?^%tzX*b*bIu<~rN2$+G-_^NnKyC=FErB3H zena9)Mrn;aM9OStC z<7#h>S?^jV*v}rw$eYe{?B$K0?!gj3-)5qV{NJ_Z!s5K|>RF>kCHvQhzP<&^}&xPQ){wW!iX<9+z00Ltdy>shbb)ucs1zGC`y0 zAPTs6@)I{dodY!-{Uaj8?j`jyHAYsv84*Ja6#_JvA|LgvS=SgzK{mFM1~JnVZt~4m6!{iD@xKV zwE6Kr&j&sgX`@JnKbP!y3n?M#x-+-(LrIU#RB2{xpj?TW(<@H`1M+zbg$DJsV*Mo8 z$hDVpqothYnLIS|Li~UajeN3AdXDm1>qo=|f;~&YNYUojj%^)CrPD!qAXebwj~EuyVMpfkw-qLKCz+9;GWlZ z@fhvIErY{BX1EfM1Z=5_x5RpvMBb$jE_Ah$`!5$jRbGp0>Tqg|45AO~o|#K`Z>kGJEG6;N8qRAljS~EsZ1& z9#`#~hO{?FFKzSzH_7m8{<;OfIB(T7h%w_2q`&&g8oFJ`q`+o=Ogc-WT@MCQ zep#MkO8I0RqkU0vuYdM53M6Sen=R@1W4r>eziuG!J9sHS5cRu#S%i@cx8u0W_jxL^ zC?eR1$-)@xNya0cp0lY-NFIzmAijOGaBay1AnB@h3Xmbuww8qm z&%F8 zeGSxbKmWY^bV5zCmQKXpz)GAZFV|Z@rJET&`v?t`d7cwKS_+F3&9}+=m3hxL0m`1d+!CVn(U}l)m$lJ|tj@mloZnYJ(`;-b*pOZYld~Vc0@qUtH$Ig* z7-8tnSdDaglC#u@Uelq6pxzCL~ef- z{CXMwljaHbFXw^JQxr9>x7zO~9+d4v?|X+RJa433BQqQ+m^K=bANe|sqM+E%sx z)q9G9R`CP0M4G$)-dLcI&j2-LMcNFX+bq(11rF|U>kAAzVUDz#qoy^Y1Vb929>Zo`vTC}W{1Gz(-RmTo(!8T zVZBt`Ku;)n5@Q{5X)SlAzV+o4km?+Wzgv*L#CUAC@-@eNI8en-Ik&!wYe`XLYtJYR zsE6(55mR#poXu6oeqr-?eP|e>4ZmmX2?Mv(J3j?<={OkHO!@@zqq))+j3%>D?)41o zDeoO$ui}rq-psOjRC&*9q<*hO$6h&^LE=7q99!O9)H6;wyfm6LZ*{4-NbcYse56Xl z!?(Y8-jP$8Ji%rHhb z;JMz7cHL6GAl*o|Lu?&f?ttY`1qdf+@`P$Pb2ggGGFX+B6Cgrnf9cWumzB^$^ZKGk zJra>R;nliE?wvOvHqMTHzvdCXDz>u0k@~_hy3})Te4jY=LMn%m;Pe7k_QFuV z5hHgaH|pv0JBBuw0N$BGdqJ7X{3bc>k`}5u?`HV=c+x-${ZYU4e!RMS=Ai~K*LvX} zDepqBk*;#kNEFuZ4~h>dMh*LFZnYZ=v>!j7P^7$&y5$t>L(CzdxPWTrjIX+o+HmO~ z!6rWcbzplxkm7)jYaH2IV@}||tSECikaL+yztzM^BayYCFA>|sVWT_sXn9Y+{p?)qy=%fJM~X%#S0qW4o`*sJUr8t;IKm6@w4cHMe3@jWHB@zbcT;ca)5kC6QKrPJ4g;dL}I(%d8S)ct2WS7iUo8ga; zdJERx&uz{4ju^ zl;LhP^r?Ly5@60Rz7XlU+`>LKYV-&z*!oDE@e|m|6fT&b-82B+V{46FCI;iC6#JR?5^ zI$QybF?5%qa$}aaghyD@%BK5frVWv6CpdFcDh~bwMh;&}r z)US6gI*NRY4bA2sc9?FHCq#lso6I68qegU(66~lOM@JoDZNHi}m4JR!Dp`&sGB|{) zQf+S7bTp_&EPu*txGFOyO#+VNnwpv^`n2ZvPqte-I99$lIsbUWbpH*tjZ>2x%&>kh zqsxt$O*;8P!Y(B2@yT`@{B~N!Pab&&BXclkdQ3uN`uesUEr-Dy@PPjMA=kya!lBR* z09m5?#@7!oX}b%zqT0JjO+qt6mDjeZ3mE|%*GEej(tIXIPkY%i!|<0v=v;bA@B<;~ z#!*yuJtCw`n50bpEXs>Ob3Q_+JID^AtD-k7rR8qk^-QJmrwWMp;D1ZiHNX-ZZb8Ul zx_A{^A2r*4{DwcYn=h1gP)-21l_{geyh}Ca&e>FA5+S$@vhM;upWocv12c>dDBDp16S>=6Axx_1u&RJCtt9e#;2M`c z8+O+%fVat^U!aDb3T_88O;w@KsBXv6^)hBAU}V#l?N@Rp@!DzSYqbIPA}KzW;0`~M zcS`^LzJG5J@Zw+J#2iw7yFI+R+{zHlNYM4+YCt9R+wzb-fdUi8mhP-3lB`IkNLRMY zv}EQpFlu-qss&%}BF3WmW5w^@h|#4|fD8|Qu6nr^g@*ls1a9~lNQek06&F> z<$A_7s1sD@y0@7IzI(oD55=0T(xwuXfhF^7WzbrH6(e6LkJ5^gk2Y0Xl(`0x5gcj@ zwU7DzNHU;)%G6Rje9&tJxOJ)Vz zl@z5H^bOe#UW3_0>$}OJL%OO*tzu};%2V6up=O|DS-X0+JjGy*-`3vlQa%EN`hs&< z@a<#)%XN1U%{St6t2FNS8YO>;m6W$a7cishc!x~zWf%~{j&pY6a7J*q^2HL&B;9(w zL+2(}DeMX?BFRrXaJ2X7w!bKd7*+`kLX|{C9(WbBp$u`A@x3hP?ytGdB|D(s@83Tf zIQZw^y0lx^qhsy^lJ6+ph^QbUZNRU=P@~#YDd{oX#LkItsT)t?3(6cjN4O1xWr)sc zrj?0?X^729E4K`-_P4LS0d@c;=`C3tzL=HX8EOX7kAN>GCc&NdiE4M6i$4oaHw=ve z{!dvs*Klj0cGzSYi|Ljk5Z<1U*E?dc>-7SmUm@Y)&{`A*fABBV#TE4*z!XaRN(SKc z&h0i@Prv+qhHpb;pj}7<;240z@C1!SzEcBy_L2uH&S+oa*F)%}YaG*hao5el`RB^IZN=0-0t3$C~B!1AP*LZaNm?nyxIy}nlfkmO;$_ps7#WEyAtz69JkZ&P;q z3Y@B2$b^Xz+2Fo!5mJ-<;o|9^J3<1oc!m)irrOn?$AR2sB{L*mV5~7qVhTcfc{zN) zdA;Fj(mfvC!k;w*e|G(4X}nV5*d5#23;GErV+aD ztI(24$jj;_?z%|u#|HRx)6cOK7181rG`F~l=Ya2K)N6H>*b>5FK}vq=pA<-mn+WBg zB@vxH#3aGPemFT4Ph~$?pjY(*9R)5fhmqpMg_ed=-*qnh-XIY8;>hFo5^O1f@|mja zaz!G|;6g!^loC;DO`ZpMn6!1mYWL}2{K)6vaWDMf6*|Bh0V=nx0LB5v2j3Aap1Re8ty@hIH zlL;Gxv{~-%D=DEci_yY5D~X2XAWIjoiKV9!(t*f3Y$s8f-##H0a0%p^k6- z+HrEImQDmL`iFNu0e^|qzt$!Cd`2J9X6jioL00FEG7KV?fkf|^)J?>OUuR~_=$Y$A7~2AnLUo@F#U;vE-44N&PS697g_gPM%BVpL zkk97+EMYEXEoaQRU3Y&YH}cPW;jeR}(@(~qUKtLw!L+&l(8W_2LVD{@bd#X_I@*9f7JAc?$`>>jPqiP0{lo=sz9I6H!awhlEMYa{C-4})M+P@p{jqU7!!f#I(YhyD0({kg z6rgzU5s}GKdVw2Smn3ne%t|%SNvKt@xlkdPs_Ah*{5=AZy>Aop`EjVzGr}L-i}Yyj zyqXI?(5$6@jE{+73a8{9=PyEx9w+(yd3ye}N^?}us}!Tn@468xtL5rEDKI#_ERm){ zVMMjEH=qcar>7PNB4qDS;dUBOGxP2fTh!wCE zbp5Ahu?gQupQGA2M3=>==liVih`9+5So^E^S6&4}O%QpJ$WKpGTp2)bf*Ck5E5?m{wK4a?Z6)hXu<#xM?@#S`s=pk5~8*bcP?R>7!o6P6v<5sVz zXWh8)rI47L@l}{Fn;EKP^5FV$T;d}u(-hggcu^n8cQuQ1z6{IHiyRHd+-WNK*f?9mPGH1ukTk4 zZlYNYLI=0oULh&&3Qa_D{jr*EjpCr_1Nm=6Feb;$DtRd-dq?y{94?*>kO|BWqaF@E zs6DErZ&KGSGb*`83drJLAIk_rBNVfJ(>R;3nREW^UH&+rRkCVHYd7Qs<+e?`ZWA(N znxaZ&eticNrx!-_@NX5g5<(^@`>x_gmS_{XEAGg}Rj&o_V{0Lcwt4|p)rJq3C97^u zZ2m~3YsY@R+o$JCCN7kiqSp)sw-d$Y-uNs4fx`<8^01pblsmeyC-RN%Z_Y0{5N=m9 zs<-7#<|EI7_doMqvQAIpC2VDeB=v;izowe4R)5Wfa9cb|Pu{->m`Ij9&i}wF7Cvl+ zUHvm`_^65RWcxl;#BQ<5PUP&OJTbA-1ql{=S##ck$b4DbFUpktb6B&}UhRxN@DiEW zVO_Bi4i0++CLUvI0wnl@S8uf+i%Pxfry2sD3sr{Cp5;v$K9*e+UoZm1%nnD7mScUR zO#O^%`An(?xz~iM@7s>)Pla;C#&>4}`1xTfNEg)u?X=>;^UU}>&o>hb;ENeK6Brw6 z))@yIgk4ID+o+I7-UPgbTJ`T-?#+Z@IiI+;k3Y1)t>CR)#iO9N6hmC+gW-=*)j63! z#_R~iHklNbuQ~(OB+VOQ7f4wnm}=XNzk5JAHv6fAI1+f{tzmov491;XA{Az8zr?f_ zm2nB{_WnG={y4~fl0Erk7WCaxVUme3>zfz>b2^ya-m77`fg21cCze}u46>zcq$#ZJ z53Y;7-oK;zsMw&cTv%4r3dUw?#yW^cMni&$H(v+UGKFN<0XE~IXj_&G$Uvk28CB(f zh<3sn7z=$4ru=IBiGSBTZebW*u6(}$&6JkEh~DS)2Rps|d;OZyWp}J0iqJ*O=(V7$ zlmsj$QNfZLw-un$l+ZL&=h(rNI{CU(k^<_-wyHx zw1XV4S)E~^a_zVNqtXBS@9d0{@y8zl#*`CNMc|(^<}Xc1%ECx*QPIX{68bc!i%+G_ zbxTj|BvGiqi^xj zC6kjmA1Rht8(Y6=h}T$?qMV`8I_Tf6`sFkf?#pegVBiV1&CSlf063En~w z8dX7Kb`eNhw%1WVuCfTq8RdAxV6_>PY!4Kn+4Mx%8`5`xZNA%Z#P{ zRfza$meuFy=TB{i8$I1ZK1O)&226EYs6LZ#vW7iE3Q4V4i%SFon?$8oYJhzx8*Mqy zrxQf(49w}js;~e}+;zl%R?rO%1AKs7NWS=T>nCUmwIifFj}mLiAP;b4sipZ)@~u=B z`G7l^%0aUOjJRUEsRKikxbFgH>p+G1w0W59@W~0xKatR)f11b}J#5_rq~7)Z$6s3G zaG!q<32l^|xc65_2)?NkY0E3Bj8?QayRyo27-u7pZJv6q-VY6C7sP!=Vlk!A`gzJ` zkb8xILDbP?P|k)^OzbI;FqB|-1L4NkQxFxjPD)0#M14hm+pJPP^dV#<=_Hgc5*4E~ zX$M&nWnXy^yfpO^bftHnNCGnz?qynh^6OY93;r!;#J6|0XHjlZ3lbTXF5B2=^Q8+4 zgUqO{?zY3XU!W(h*9_P(Nl_6PbkJYFj+-~!L^7C}7+52!@Eh%Ru9&MNX#@`mh~9?7 z;VUmfhU$O~%{nsA`y78mUGh+?)XDSm$#Et9U>SJM9;JgtT#Jp8z;l4;$~C-5#3L|<&*E0Hp208kvN6N|~psu%gcpfIAG`ga(-Ed=1(Z=u|9ZV6zKF*@Z*A+W6Ac$t* zL*U2xQo@(x%nIf$B-ny%nK2)#x&D0#_1wb{qag{Hv4=GUnhsA)Eym-F6oHcTF6J;9 z^K>0?MdSe|v=%x#3bE*Fpc#7GwTaz#D^MB05(g@c5l!tDy z7<we*n}{6&;xev$k*P0VOWt7VJLZ)Y1Z>MtAvzkUVCs;;bkegLn|q*O59 zj7x+>9sz3TXe*SCl~=yW>h-WYi5dOx;`w*a4O7C%W;aFYmD4ILgE7Dd5mH2ePdGL2 zeF-vP0W>)hzZ)?RV$Fi_hd|=zN=|SVu0Y!>U^;4_TFH0o60B!ITTw(7)4_;J79X@F z52ElUfYJ~^P%gmV=OLQ)La&+Qm;oUP55<9KMlVQ1*uxQpp;#y|W?9F%>NXL0Sp@!M zFHkmKiSyk(j+WwkiIcst6^wrqieN6DGo0||gbWgs&Yy4M&$?hi-;x+z&tB#KiDkMHdzHd>=0i!^T2~A zN*dpvnxXe7j5eR44{*gl;0Z|_Eq;wymQf8Bja1QsR(#MZ`Zjsx@-fgEWtq>o2Qj14 z!yHrpAKz641Lu~){?1=t9eqr;m}5aRmp)mGtQZGVSrE?0^t2PIKY)xhcP?s%eVpYZ z!<{_{iRna}=J%(HsLNk%pP>$*flI(F+MxEez>}mLxTgC%;w=#w<%6d(cFJ$wW#zL0 zdi$c7IWV@5EYFSiV)bv3Sp<00;7ct9r?!sQKu)&tj9|anLT*f4Ir7fZ5B{mP^klxD zC&sOpgoKysD$n7U6sp@Fua0b=@~;y5JE;86)%E9_GGUito_-j(@z0Bs!Ez^+`8BC7 z^-}>lP05gm%7Wvu;jDukOI68)*oy0I-KnQd10s|8VsvK~9)%3a)3vJ#I)0@b z>@;k=7X~FEVSe8G_GOiL-=CMl!ohVpLX@e2}B$q%!IFwf`e58mSt`IyDL6Z zMxy3G74gWQT8<9Kwt@MrNQY&p>m)IAbbEXI^V5SL9N#1X(WD%Z!3joOtqht!;Gm@8 zpXvEjabzDlQ+2e0Bo%9(BjDrqLgf^QtlvZCfZB@AbkhZIhE{G?zOw5XY3NK>#I3MP z-UidrZeRgYbXg&44@fo#zN6>Fi7U_(NMaPe*vV`=l7tPy6d@+}YbMA?722Xe?S(5F zs?-q*#6xR0QVFnu|bYTDi<7b23W(BZ-ms=#W1 zn~!>L$q6d3ger+5fqLhKo2b$Uosut{P&uLwX4Z_E=x!I%T78_DKDvx*DD6`E3%FGp zu~S;Rz6ao4-=)TR-h)gteqwii$vllAx5i3}0k`Jj*Tk{Fu`V_r4c=W;w1>fKyDy14 z27&KL5z6q9{G$95A7r|lmSa~3ofjK3@dZ$WXKw&I04yknfNI+A-rgfws!YWb!uC9> zV)Foh@%Bc)MF%O}tu4nP6N_&xK$s&`d^bmCv4Z57pp4P?TysAZl%aY-UgaGF88w{2hvI4-E_<+2P+)p!K7*bYMxXxjq335yn*s{FIZ&l4) zE|Vva5dyVYd;+Geyb*=s#~~Y z2oU*XXO<>=8m@7MF{AjCEn56AfgvC?#PTPo6rX_sFimnRWW*%gYc>?eRLYo1F7Ogy zZu{gpu??P!E&jcezY@~YxWG;r*I2Z;bFITtwV0ob1SPTul75>{>q0)u0qe!FdpIX) z@~2p;wk<2nswypT-1Br(mO&7ht=+>Q=9G64gRZdtL`MGp;IGdn!+e?$`x5V;|8y%E z2V|4ve7g^TB*6(zoW)F^(|hvgauMs3pK=5E<-*h7kW@3hry`*`M5P47bbucG$9PGq zIVC(I$c;?`vUK>v7TV8*B^x0-sR!Xh(L_W>YLDE!aifE|%fuLF%Ia(n{3ALu#3^T> zg}FI7l|)WRkOSQI({Yx~AB!Cvkno7^%RXR%Dg-aTxWT81EypL3u+y+3m0pa=Ph)fm z!Hyj68)PZ>_O0x^YJ@=52R$Lsq>-!K&x4-%05h7gOI`Rth?Oao@B*EV z-0TblozVm%NsCyql(-gsso~4(rLhdvOP^D12X?Ao9T&@9)9apC4Krn8CxW8D4Js=QPj=fG`!@ z!7*I<04(Lgjl^(b@EBk+ueezy%ESe-eVDms213Uztkw7A4ltZ$RLVX;9=!3$zjh^Ptkz<}Y>InGc|h#Mx%UT>06#aO2k? zNbcV0>9dz+Kc8z~d~U;nV-nWv4HbEbH76J ze7ZOnR5H33p{T72_@7A#7XD%~?a&Vku6Fu>147>2!_Vc@bvC;*eI-8&-o!q=f9_Qr zZ)_bJ1J`;$Wu6@?BTJSAH5~46m-yEz%)wcd{CV7bY1nwqV}vMVHjUMtdTQY6;*Pqb zWTj`>dd2h^8tME%XKKDQmzY>f#48^<0&QZ-U&Lc^n!B4xT)k8h`N~B~O2Gy`+6bnb z!TmxkksoYKsPV~hFclymbVo!`>m1Aj@_@jdhu8*T!k>j9J2E*{Vd?T!st%h>g)fGb zn%X*LRnKe>kAWs=S@`66`o~XLVCxN#5 zWZS@@EPWyPelVpA@a#Mg7;2LK#jqF|N!jrN_MG)Sj?|?|(lfsySyi?+q$pxhGvxCX z5XZg2(HL++_1|sk|4jvB84I0Z0BlHfx*>dZwYcf}^T;fx*{d!;p;-EfCXexM z1+KM1*>B2kafxQTdSUN&teX+fhdz9Ouj>ATX88(i%uqDbZ$u-TwwBpkFk0)%ABCeWzg z$VCr*gb%aSw=0^Y-Yjp0j{=p2^f^@3!X1vEy%@k(}SG-DD z>aQTSK{-&C8JVC+SxBOTubW`%_!(ZpxuYT@H`z8$EncA9t zIqmzt_)vc__)B3@O76WZU)Op%^6hADRo z-R-x-s^yMp8_vZRKd>a-lxvEU!9;Jb zmKJx>f8N(07lETPh8Zs*LkTD)dPfMc_z~L7JZ2VB2s5{>>32)3(fVRvO_Oc~pk~=> z?y{LtMOz5eVcRVd*&qr=conQSEsvdoCnJPW#X`YSOc{5Pk9dDDqe#|I(Fzoga%n)W zTQ9Lqno6u~?n~s=C0+reS`~osHr`!u37tb}l>zwdn*Y>hC}L3*S0a0fo`~2JeqWhN z>eY~A^w!Vz*dbb9VqhCJGCPlY<5!~~Cl>%%-cQiwGzoV<|NnsfV54Zxo*wV;qQ^o6 zH7X4PUMG&}s0I(;o79s_l}4^J^@-1|CdfvME#BA{81vq2mwv%nz!RTfr`-2o=Q?t~ zbF6;wxw~%kJV=naG2LXPR!NeS4Q2@ z_*fUBPMP$|YRdSPL|Vn}3#a*o70QUvPz_#oq=)?tjFxo3iGa7XW zlXZrlK>VjN_Nj@O<=Vw5hp^ca1rhHvJ=SmEd-zaCmVACNe{iI(D!>v4F)VGgHp8$t zGhclpq@`PcLSl;fTyD_?NfarF1sCjH7@;)U0DRZs*%4DPUfWllgBP;XQ zi9NrjoYnbx5ia8OjW-Fg-q(`ezSdX#!9M9ibAn1wpLA z?aubZWeFg$;02@~sA<2`^hmg@za2NvA|qCVEy_HhZU7mE+KhQ0ZRZhnjj#Xd8Wlq^ zaI6rs$-BTHMqYG~Gq17H&gUYRJ4%xj)XN6w zZwb3Ms%?x<^l&1q1I)A*ya+#g7i>e$4&Pz~S*hRq1Rxrjup55K<|m`UmiTQHat6T| z=fI)i!;^C~ZMK#P{vFs^Ytsn_p;V4KpgONW&rb7#3_D!(nw)K$&3CuvGsCFi+mPrm zgg2_mKf7dZktE3fp@h|98i*s$p8@%IPOzsSY3HG$6+AT?Vpjk{odRu*gPQD5Z?6(C6SzzRA@A7!GfIt%MUWeV(WO}W`vp!{h!iA6(5k$ zYzD0ijLtj~oeWm1b5~d7%>2^S4-y&OwBk?5eSb`RuN1s?tQtD+Q;d)}pLqBU8F%w* zZ-Mn?bhR}1ucoQkJ1;+Wa5k0y&?wBy_w36cTYdJm>Hec?=X%8OS>dw5wjC9FW?3n> zgYU$XM_eSQDXTtLr$V0np*@&fp(&_di}B4MCaDJq@fKzq@p~0xx^I0q81j z969ry)boX?`!=1b3UEAuHd}d#E&)cT`|WA%e)3sTj*EW5%UuHR_~FQ4naEcPw6I4F z1V8MiU;Vdd?u^IiFc8H!TpN*RY12X0_D3gtjb}yniuMMam+6b;R>)h56U-Gm+d?hX zP7fZDoIZM;32ssySADtt(?E*n(;w5v^Tuqu^Cb)9o_#xxbUJWS-@8;Kyy26~g1H?QEehJL8KD-%Aj>*ohW8MwkOOoVQubOMqaWW?@8=!ui!sO@F^XWOnpB_0Sj7use`0etv!qzSR1d7$}hQt3`eM0aNPi zQ2N4*OrKMVpvn$722pn|Z*CxJc60vI*vQ;t;R6)hr+^fcZ{_vlU7~_(bNccg`?+aZ zaBGvTJGXYQw4 z4DeWapY_0guKX-{<1^RSD@}jvl}L!b(THhzH)*Lf?O_EDo)IPE1C&=j@};+FLEr7B zy?$l8ubB8lM=(=D+T|(W9{Lt|YvOmo!vj~7AC9^D^A6bFepyYokpJE~@&@((8fHcE zj??ZD$6i_d)a^G$o+nw(#f&?X$VcUwURBlk#gZELE^)EE1m2k*-k?5gAt}-o68}a{ z<4`Rzd-zO2)p<+_td9$+eY=An_F>0_bW9!Uhj__gIt9+-`UYc(sZ`=2;81bO? zg1fK&d)T!&LE89x>6gqhOk!1S5mqUkhQ{r9{fN22V^@5#DS3=JQb4idbSZTcZ-*M4-S9$&>aH6KGm@%WX2X4Y!wcG1lzWY;X7 z7@>4W%e!~TN>|Peg?G<_%MVowwQjp<>rI3uCRIFth6G^KI+A;J|8kZERMuDOI=X!( z6o!`7FU+2O2k>CV#X1|moZU;c0_)PD$Md$zXjb0MC=4g0EJ969jbi1yJVXgri$Jwq ztqHnPrTl|G`4=?sRuD@iHgruUR8c!6$0)!tlt+PJWNwKuqMskBgOwe8yX6F`sJ`(w!{R1>TDP=4h|BP!|M;mlv zZwne2$95NdFV7aHwL!qN5dxG9<6GaD6p*P8$Ji8>bMzaDF^=uykpgTi33uZ?ugqo;Hr!u-3V@!`?t&Gi^u; z8}lvCDNU<=qPMOmc^&DTLZ0*L;`2v8oNX!p%dx$^IcfY1|3HimOJvMV)csb<`7OSF z8Tm=2&@S+AYh2962ynKwEoQP6Aq3)B-z9676JHa_%6q4c)s|zDmNkK(OD!$TRSCIh@^oj4FPV z0xYNj0itYL=BJ=7VnZhgRwDo4F#fd&{|BXdOOEBxc$kp)?w$2(+Le&5z9d}()}LeY z{7yo1GXw>*S{y-n_t+mqltR;ZLWb}@)AVgn2p{A_A`TtKzrNk6f%gTL4EAO+Mk&Si zpBGizRw`@^CLnFUI$2}AbXv{m4bT`$r|r16Q9pI<#RD$PS0hL7NAv9w@iwAQ?-Wan zKtJ%ZT~V3qy{+T(?ZGtT{2gw!v&One1^c&e_@n8dJE`-NY~0JBOcH$b9h_-GtE?=R z*Ogx-KHRS{_&{NzmCsYk-lW5`op?%@ka_fc!BYJc-Yvj&kiTdkb<413$l%)Bd>x~; zt6@`EE`^682kpet`4c}=UNDW&;~GhzS7-*^j&P#EWCGfPQLSeZ>K{o{3<0+;YmcAFAO7D+P4WF5@J(I z1~ZlZd!S)|@+s*bi26U(JGy{}fxn(85RdFLR|NTFxGwoiX9%a52HXDPky2PESjaMhq0 zX^F$b8vc{^k7QFW!q@K%Ij0n`t=1NUPPVVzck9{9zBeyc$G$ZD962huvNX6~-Z}ov zZ%u&tPHkTpo$dZKp3Cm-rt(88SNoT+NA1ui$)4T^k$emMmc2dE;vZC&Q3^IJvbU3W zcO%TwAI8a5{=!H7e*O~devack=pl$Ptf#`Qfsh!Bt$-0{1>BrPfFASx=1-ySTAz#h zVxvYqc}IXlXgA34tb+{kq6s03mL*t^tnB}t4*Y#y0CuQ~$x>U;Hm~@4Vr3ZXwm2J% zx!?7EN;EvkATG;qwk{oP?%Pmt>^IBQb0rc%)>Udy#{y{FqXPh&EMl@-mw0OC3qL8|pbuH(( zOz6)E2OBRWflUPBlp#~hbFEI+hnsc#_jUQ7q5;$mfBn?RWj#`fQLZ^R_cdHaQGWJE!7*^%`aM`m6@(6>qG<2%+F+QC3Xq)lq$0#EG z_0_kD(l+mmRy1`~4nY~1^q#IyT~|@X?0qIe$T4$K3ie`OSa-s4gQC}9%yItc0%|em zBV}A%`cAAsIojvblY+54sNE&!;%{26D9MxZXJK4=l`6nrH`la8;Le>Dkjp&k)dz7H z1xXcX*to=VYraumGU?OHt!W|dKxl;5koJ2mlFE>+*}BI}j=(<37)+e450d=6O z>2g<8RJ5i|vjGE7RiS@aT7N&j|9sG}!e0S3Yj44YXyp)8qSO=?k_k%VF>u!5!~=c{dG= z6T<@C^s{H@)P+T%`btd(WTvY$(Q&||xEJX9!tL&=5saK#w*QvZII3Y7V?muk=f(lD z#YU7MosnLHCr&6(UT#v6k>$#L%5Bl}hY|)NKau1_9)Eh8!(f~?GBWZ+TmHl$bYUPx z=rQxmTY0bGP`es1y2Cf(gY=3P92__UH;_oc%TWO{%FTi29ib#n=O-_z#a)Y!00qIJ zHT)0G|6j}LKhNX$Zz@L0L^-DvHMwW_j1O4N`ujA))oOMeh353j?Vp91UwUluUNg!c z$WMA5FafTf`xMQ#A(iw1Z<`d0?3~#Mj_@_hRM4q2OF`-ZOle{vbwWiBOJJqy*Hntk z%ZCN`nD0(xVq8TZ#EXM(J|cW*@f3|tzZs}*xH(8XKP{>oI_vu3X+?g2iqL~jo6x20sOJkI z|G-55_e-LG$FT-~CG}y&qiRTm#QgIT=$ubeGkiR_W?wy_Td>%)>RIK!d%$MhnRX=yb4&UGz!!OsuFdoIen z#byR((y&n`7euiA{>j6CHva$fO9Vy9fLeY-`G?pL`tpw`VoIm&x(6=*Hzw})%Vr-x zB`rO5;XhMn0`g-eZBhl z#Jf!LXWje}?oSy;eLZd|sWv%6=A~^*c!;Z{Sm|k>DT^u2d`o*+ArrI)0_wqZCVF$G zM!kIk7Z1;-e;*x{KwOcUk|!e?0O@0*1#Wl(SKTlzm0(*NHt$sYnu!`SR`Q3lNN!P?v(*YUthX4!-% z=g(@E_wYEID?VXxD!BPMxF&6niga2$wlwyzRNfdz%^dAjMr^JX-L`Hje_>o*Q&q&P zy&jC1M1CX~(H%vsmR+S$IULpk?WpOPLLw*@EdbvV2uR-mqK*s{IXeZC-Pi#?I3YAN zlujE>$^h|65T_g(`N78kF_L5h0vx{1aK|QH=0gXt^_ZQ*- zD6ow>|AoN)b7lTL4F3Crzuf(nG3S(H;n!pcQm$xZ-}u#x{!)qbi<05FeIxxctctRs z0i02u(hKQr$=le|>8>!5RJNsrCFn*bbqKR}3voaQATN3FlrzmB6wQWk8pCZ-WoQM6j&36ca9|2?#L1GvGnp_2YT z#=Zio$~Ajm5EN7pP(-=~K|tvgq(egKmhSEj>F)0C?pC_HyBp+C(*O79otb-Q=6C16 zmdhovm92LPA0az-aX_|IqvV zdy@$s_5b)|Oampa8IR{hvIF6qIe6%ApX%tX&Qh7IJ*aJE1xrk_2NOXR7QUJmWzm( zcpc>Fxt9TS5)tyb8MudIWnF~|;DwfyyuAP_5=~$!Y<_cT1p-=wo&cB0W#4^f@_$gH z|NfQy`l>U7Tm36usLLW6E^Z+i^1G=uJASur^gydM|EkH@BnAELnjdWknpr3`s2aCCA5mW@=C;?EOL%b@js4cMA?Ca=WRW0y zboIs@_H!2#_}&X7b5@Q88ZrV;$MEy~e|?dcQRLDoUzPVDoJDkjh9^xIhzI;SoM?6A zfn;R_XYqACWoa%3l>!N+Y|)qj7Q1ap1>h`oo=$t}APf&YFwyo{^YZlYV3yrI14WBL zipgAnnB*J`^nl8gv;_t&NfnnHE4`r~J3yrg`k zexxJtc~qDM{>Ixt4pKPCp=+zv+1|dsvugcft~(YkDR~ohFn*yu%Jb{EGMwr05RiB^UXhtS z){z)F#6Fy)x-Nb0p4Fi0x%6u8`|BUh^RNZnV--1b)nT_cTDO|EzhxE_=eb;gea>hjC(~y@sh)$JXxf>k70v~l>Nz;Q1FY^1s z`(NEqNCa;|w{j}FbC&=(>IO!A%D3ndfHZc@lx_~hB(*b!GZjL;OY=(LZc;jh`Wl1b zT|uGB@HLcoIzfz4$wv3RqTW90pF7mQs~Eo^5?is97LP5kpX&y8hGXHip!m_Mw;{Zi z&Wo6(+XsP%6Kby}!Ki?<4^p!T5cZLFM>74>=L+`b>t&%lDZ9*Mms>l@Hysw2fNT{+ zYb(=AA(gX4zWTQ?%wJwsiwUvBm(<$L{`lofeT&=;sohjWw_?{pr*Wj^SpN$-s8Yy=0Z8ni<^`eJh{^FDTRexOwW(=I z!=E(_E9PKW(ge${mBYj2^v7#SLBYY%-~bW?O^f-Mor6QKKTQ)kV9lk1^_QM9Aaqss zb!NA`U1KIQ{>x?i_4fVSXP(o*0g!k zFY8-)NlmsYEYe|Rk}J0nH@CQwz&^dt+mv>aTKv0V`Bub1gc2@9Xq0u{bkTV1pFLeKqQ z>H;p}07h*jFpGBYzXuQSGm=wAwZ=s3G-Pi2Hhm9xZzlTxkAuMYAq?|GBXDA&Um%}LS!m+5EDgfvGGwJ#JEg( zZh6k;Z8J1aa4Og{)~Dq75+Y(IA2d%ZNMwC>PtiWZWb zyg+hz9TfIzL>)0*}@N-!)r8iVo`AVNQn*c2r%6JIC zVfh@~{%eN*;o%xh7C7bgKYk79^8`dp3xRk_FS`7U`+WtgtrTaz+wJ#RSet`_8JB6?sMC?2)J@Qm~#uh@`NMvUDk zIwZ~sZ(UettF-5#77d|TD|nnfsl4obYv^0o}k49V@?uD{hOzo<$9f(+5e}D$&H7Uo%DLZ87ccLUD}tv5%NIatWWaDYny2Yxzb+muK%E7+9-7 z*U8H@yMleOw2;*SiV3lz=ESG9PH~=8X6n5wuVw!imD6HVX>2j2Jf0D(4wDzY2pc=8;nYGL7P2Qg2<;8z%$a>1*RXp?w_ zPv1>+1rKs^$WH^#&d;MkoR2VguFP@c;h`68>7y%m1s&?YXhZ+?RYE`6VnPxU5kcYt z5|Bvf=p=#vG$t5|+4icot-T{VG1W~jt!Oak);%(`Y`f1D^LpbK8N;*daHF%S-c3cq zUur!blBFtTiJtEY8V_UyVoiLce6rqpm#NLI1OM^LwU%uZ%y}9G;7?2q)DNLxwiEdm zW$Vwoi5jA%^}Gzqz9Z$B(ag@yleN;Z?0)(j- z+s?LF1EQWxOlveHc2v!t*zA)>>OJ_Np50b(`w5kJ?M{6D`}ghQy*==m{@MEpY%IUA zGQvcxHlf7{9j)8z3p6}DYLEyW4V||EK~VDovOZ96_DdOKj+%m!_!#jQg7o*zFxcfn zk?RIv4PRz3Nv#K{Mh(Dm83>S~1l!ASm(6LJm<#`bYDAl}4@*Yf$lpapBmqoeLb((y$ywrIMxwfrcX zic)MQ-n04VF_e))lPms0&Skm|=)R55b7oQ1azCeY=e3bu^>)Nal;`7c;FHV-MMet~ z>WzFytL8A;+-gyXEiEXrN*ICYhZLiV_vQbliB~S;VcjvM8mEWexk(PQmrXvLVg7z)WY)So^8gTr;*v z@{3Ktek1L)V=|nQ@H_>N zA`1Y5fq#l9LD{can1B;1s3C3x1{MEna{S@SrJxX(J`OM)++FQV!|5^zmk26nGB)IP zX`WU?FT;@aIYlSU1HJIinZ|%-OJ-a97l|A?;VZtxIu}s)qaV=_w9l{jT|+Kv}O%FlSXS@j-VeZr2DB zJ_ErhyMGpm{#!Hn$A>NVVF5<^ya7^?BU@3&-<}U~d3YEKsNJSDG`_QOfnzn1gp#oL#kWF!__^rHfSV^ zHzvA@>-+jX9-W;H6i?$8gE$(m+2i(8UI76SUS1CnFSrqu{P^jA{?8H+geQ6LYkZVX zQi!aZY&IgZfx_9Fk$SfpftqN6-=z+7A{fx<-{2uyf|CyJEw?HFKTVKYFy zAT3Y~=K{PW$iic*9!z-9yAkl(9?y7rdI|yHkc63;Szxw-kpLO^!w$}5fa$@#0dK&1 ztqT<@6xGy(c2qp(z6(&05sX~IKyevCqpHGcbMOY*nhy5^iIWz{p>T)l&n?ExzW(E- z{Y?o>(S-8CX%Fa%%R*3Nn9b4*=j){T=73d;2oN>yfxU~0)2Z|YctI8kMp9A{IINa} zu=nq00=-vQObj`hSb_x16?v7H1kWUyeHj_Okhr4xJAG&x?B0c=ExBMDsfeO+7(jBFx$ z_GD$20|N{yCjES!d8;0bpG)0LQu~AOj--rB9ar7E+jw!4t20uZmkUY3j^(6Psd;HaMG}Qo?B>k zvv$Ltp{_@7kdctA67bB0S`&`nww7ahDfQc7He{ok&-V6;uv+hJRq27DkAAa!_JNEi zYD)jRVx`)&{!h1HYp=U?n4M%VY@xh4{&e@h7p1@6ihi8nt!ux-JAUMU55}XCe?V6b z@FZ|`49!`9hG6EC7rHIgsufrUB8?}ek5r-J;!*)R)sOkE{cFVn0F>DG0sOBESov!L zMl>1_XLcq7!X`b)5CP7m*NVvBoc?#21SD@nm<=E5S4?5ipDGZ!y864iLcnONd#i=e z`1Tp{Y$w@yGetwf@=-!;?CYW(!q3Fs77`L66^7Fgru&fN<6pZwU?8*xs8pY;jg|H0 z>nb<$`8N@|anaE;lZFw97l0H&Z#-|@;;6HJSf)D=hN#!cF&1OenMm0 zbW5GCtUrhWYafBf6WJeQi9p?m0s>cgV5smho)J7PdMmnv=>O}U6^83@R*4vp(KM`1 zvua|WT@iInt@S^@SszC?hyCH01bmJ+RFi(u9{XWnVC><47*}C8qILe*Bm8vx-G0hP0el1t9$CGR4jPY7^8-naHd zQf`u$7q@)HQma>EvpJ}c-To!%ECwP|L4cr}k8jG}gK+?Ov^&Zi)EQ<2czldX%%OCI zvSDo^qbB^X4Jq_l1E+?B54GKSf~*n(!ro%D>H_21&F;o4Odpi~SlSNd}DFjHi zpt}MB8hS&3>ef~GPohMPj*dzXdDVbb*cT%WmsV#0Cxuz>&*-_hxw)O`2Tw8?&5-!| z`eJ3xp15g&UhNDf*vMxb=x@(z#2J74(f$3MM*bd%Gcl4Kx+!0C!$B>p`WJCks>|K@6tfLF@ffIuzSvoqg&LU(F>%@OId4uoEHXARs;YL7-Bp zFO5uu;@~|MpnB$Fg<1e=VFb zX*`P+05{aCO@@qCdgdQn>!%ThZQlZT^B+5P^9p<*_#-%{{~ox?3>-uzWP|vB5mEn- zHuL8=+*`4qU>zW$ubS6Zh-CV%aIB1*SF7dO!`NRY~bV*uRenzGQGHq`5q`gXasm z*ARbP_Dj!#6ufnNb7hcYfG~;TmguSJGX$VlX_%?m+1b+K<45cfPzLlaj1{0`4~Qxo zuh!tQS&!iib=1$$dz_#BL3+gsn>{cyn)&al|KCTG=`I{}(E~=(VIL3eLu^xqs_EM) zT3R(@4a6$AHR>qWCjfRHMql5ptR(5xzXbX!!$OOqtPHzeAD607pa{^zNg~JHY3~JE z0j;B;Jaa}ut}OP--qO}vNv%S&F6__g>Urr;DBW(Gea3}y-_xGeznaXyE-J zP#NYT1eS{X4Y5d=|LUjwkf4hbVC^DqF)ouQ^;*1G2*CuQA0}9IErV}h4r=_!jW&DO z2Cz$pwzuD(6Kk4*e$5=wTG!=d>(d1=BF+TlVhC7!&ej8F%yR`;lz7yj*+k4>U7J&K z@9eU2*92LIO;(&%TLHGo*!@T76fx4&|K~B*@_}Hrj)8IgLyoTaEDNi(0*ljSh5Zu^ zoZ6Zx5UEK7%2s$Mp2J7y>mz}{iY-SYEfJlr61>i}(x7Y-(8(8^h2cuC-CQ7%>Z{5n zai0M4nUu%D^hra5o*jc?s}JluyS7dv&CXItStlgSScY=gtZTxGy7!9bjJYaN_76aA z$JfcERmA+luh{V)lvJ5oBCsQ-1-ADe);j+zUT{HW?G>*lm@8*r^tM$XJ%FAJW>C=R zb}y6e8fU)|d3r-c!95`$8=|rK6<%F$^#_UFg^f3KQ0g>Js;Jk;z5D8Us`Yh!eRtoA zr;e$XfX2F~YuVOpXYBQdN3{Sv$$wb7dXLXB6EvJ7;L!IiF@G(<$5;D2Egy)dz90JH z$j($~SJ@_*_d7l3+cRCV#GF42J-EyzrLR5|A~e#}@HA00EOtT!_1gv}^`|?C~7ug?^PO&Q^46x^f1?@;$FB zYf7&x0A`9R=<2U~Hig6_O0v>3D?3jqNGC#D#aBq!j17^-|1ewjK_!1nh*ch+C6<_H z3mv6Ee$@E&(*s+Ocq+rUm6AeW2SmqkgbixAtdr^Sr4#K1Wk2Mv=LX zM#HYxBQaz+SCX(;GR`_M4&oN{wCa47_9#+f6n$@}mJg~twV(m}hD%&eM~q)oj*>jy z;qzUy`hIT;xumIX+A+vwe+2|z!-bg`lkgHK>Bs1UQBqNT%SjZ{&|w`$Qze8%&#S)c zMTbjYQOK)oZ9}*Z)9qGTI9P32d8=@cBUvMhnP=?f3G38k6WH^r6_@MqZw<(fj=P}@ z(+sCZ5lGIu=>B0mE?NVi$HU39q;moyyZS&xD87x%@Z4yue5Pi{h#+n+*x8#}Z#w>d z8p*5R9)0xTSDAs%mq6WX6*bCjkfosVfDmgUUErxVLZA^14GmQC|6T$*X|AlS(A&`O z^h8{&!yv#iKDkI!L;21LR%FWF2)Jlr92Vk_|Hsw!;Q~^dwajRhFknh6$G3Iuv9&Ei zMB0kxa@YGP_hzKeft{$b56w?4*oHNeh<<`fN6W53qu|kSK9z|w!Z5&-YBqg@r-^c_ z&yf(3YYtxCLG!B}frk++8F|2mXsKBX3f@=Fi{zc_i;`>~c+xh>EoW>?##wx4@Uy-j znNbE?M4IcFL)1eGIOIMlVtv5E#>H-ve+JVa<yz#u~(wadkPs+?EF$hV|V$wng}!VwFZePq>El!$Ti*XY7eZcQ5`bp6`S3A7J3BfK2{{Xsf$i11B5DL zF{+fL-|tgJ*ZU4 z+|ukHQl;t{KKby+(3R3DK)3xH_c8Jf*-=Lq{)AH@1vX z3PWY>0dC;l1U*OQaI62%llj-AtArxhtRlRGg!?>%kl215cVXJ@+?Mhq{S!O4{TExw zuQLlv`h}vr>A0;NW)2p-%pey#GmtZzK+<-!)M71L5%6Zw4>I6HEjS!4mW5~RlS zlbc?=GW`R)b2a>7mj9{msnLSeI}9MI_~yQceX8s6S+%ab?9ng-Mcp92O8ScQ_9!I5 zvA!t_rxAsOLm6k8%MkOzT-3v|tReTR>OEzvPJi@|AEvQjq*H3J{gh<8f+(6VL^uP_ z{sOwG;zE5TvE{kd(y}LSHW2U$nGlPxx^w|76oGShPyja5!0_{@gg@GDSNv({GtNOo zq2-m=spVIU?~c*WuQ;h_q9a7st^Nt%%xS@X&BKUfSN(cG`x-ClFL>1>PPb{G3ZRSH zg!nrl3tcP)D6*;{^uOg?>{GhBu+sq9%i9@v6q9D#oibd0(dr&M#`g z`_oYKKS{I^I*_kM$1X$=;>8jF0nh)?@({o*rk6tK=w#UwEHWykVcPk7?M_3zs z;|FsBQ4Fc~w9gX;s7pt5vK+b{jEp-GGG+pTgPH38 z=wc;4B0Lu+yaa?X8#0dg&VaA7zHXENo*i%V z(EFcD_owS^ir=RI;7-8nDLyrT&nDmh*NSH2caOa(L~(QwB2^duV30wTl4-R+xu5!} zr-hkpa#9jCp#7o&7PO$}dfBrVzb`OYa&11lhPKEV)^dK=P0H33?TWR!#KJ0|_Cr|M ze21BY<%77H=m$%(_kYZvejHrA{BEZtb`+@t%J~wR9?S+JyM}33FRn4DV(pw z5^O26l`>`zsvClO6;%4hDK)bT+uD_cj-!;xdUmYF>ta>0vE$Xb*LxW7x2@;`su)e} z6~X@g+OfvnLsl@-VMmjVRrw=iRO6>SZ1p=^i91Wl5@R_ko2@g<-W}n+JDtOiOKEwN zcX#9nJ_wHx$bEjf!=NSs{7HHf0aI$;^HDX?dm(?xgscjnJIz7#1XMqj@!TL`O8&;f-e z9JDLiM@X)YC_~+ z<@G2EdrCt*w0$J#qFeS@L+dt+HO#tC6_)<^%{x{RFXeh=1GX7sh`JkT-SrjSik6GL zm7c_=2m6eTe?>q)?QU7nB=C+R1gPWlu_<0M6BEQ;Tj_{%1KeW2vSzKuWOfxT(UXya z;xrs46ZYniL_aS6ywh8=hF4TU@6IN_x!FN3_LxT>h}SSUuy@M7zo8OS(7n+a!>~q| zqvq53#$PXHn#y65WF5zv)tkOS*QN!g7n9Y%T*R=*NK&ZP9n`b{8JCp=7+6LDzHt2R zyCNOnND&RZfj9Xm=XOE85eA6zv`Kyi>46b(=wn2M`|7;T^3t--D)O?dZP~krt{P$4 zR%wJN_}z~}es-;nRO)?$=*Qmn{glyGraJw(T-{Bh|LvxChg@%zK+=(3(P_2m3wnO* zNwshj`*C#t8M2{kVzCP_ZJVK#+dasmU(Ux^7jnQdZUM{;EH#1^17nI(7wP z-DcIgS5sBNNUc#00MCZ>Gc6*#cM%lx4+yAmx$PT}Udi@@IO*cT?mB$4?^nBcwY1;q zzbU)TnQZ#JR?h5bZ1VAxhQKl=cZEfZF?`ecB}4KCX%R;K84`wVCX) zpp*hk@cBl2u`x%CW-81Uo~Zs8-ju5%EI7p=z#AYwK1YT)f3)J%zIEY?o!MuLeu?+Q z{pw{TWW{-g)A(;zH`;PVGlSV}MY}5*vy1)0hAHsy2TP~plZN(4H*Kwi)7~uNQGBWt z6Taf7Ai~Lcoo_Q9)%o6pIQgzQ6Rn8puAE*`Z>_o?o&oR9Z1n{uTYah>$5CnfNaa2x5@%N2Aw|tf)%Y-o8b1*St zjZ_`;Rk8>4`75MtrbeyL(6_nD%XiP5u_xo0*A+T0Rr!RZ8-( z*AUTchCYEpW8NHX3bMmuJgY>;XV;FARdCWr0_Iu|;7IqepKMg^ zH9bQ^3qVPXi?bhK?J@t}z9+P_?VPr$%I5=V$zwOoBk$~0U*GQx?CiZHkqRITmThMi zn^0lo2>B8y6QCwJST;k^5nt=t$ZiVLtMZ+%@#a!Xpnp|M;1eM~Mb}Z8oXpv=A#gX# z8a+0up4Tob`9kA+mteX?o7$P5HNm~NsF$q$_AqM5y)I65z$k^tG@)CLLdeI9bi;s? zM3|>?WG{=!S*OUi?8ggC#^U7HH>`Wn2XToWeYA3s$QLDFGw{!&S+IAz3Y9I>N1XXHh1(M5WAt_M^Y^>FGKg*;i)Glqv9f5TwI|>QkNL)%@h-a8%KF^ zoxL-&kId&Nu1_6>NA&iPOF8%IlXF`IAti(dI( zgg=bVU#}AF^3XaSF-jd#pwriQRFLXGlG!sCh)OHUCapZXk+iuU5^ogw~HW9LZ{kv_K7d_w7 zVHD~WD=ZX)oRU^Y=1*f*%%ofdma2Juv>O$h!FEgyCzIRjeEL>H%M6(@$mdo_VDYk+7 z0#$eG#*@q1!qCt#)Xw%Da2gtAdm=vojkk;qW_@<>9W86Vf79~6tN~144FN^a_IQ3Y zJ=mge3?v#9TWn2HGhSz~n>!6ZNqKKijK@t+n_!kcbU1iQm$2A%lEsWl+Jpv5^y2)EShe zXVAyR7Q+oFk$S;KwlBVe88)$6JgH(F121eVJ+>&0Wmi@=*9dDl*Dq#fK#k$?W}qqx zrDRk`4CI;ke1u_~wRHZ5pw;eF4NY5(Wy-*htcbX*15-oWsuM%!45|GgX4TWaPvPNX z_ziySHpj1sxiwgO-Vp(^i)L7ic3+Iz0lok)Z_M_>iO1rLpGcpT;Fu9xb$8Rhmufg| zuS5wH>f|q!*IVZh&*yvYzpOX-8g&23?_r`xBt_}baLat5A`>z1YUdz;1Tq}X4>rhK zRIWhjW?Ti-vRX{m_usUyz`x#EWkL*tJC*!NdDl+6d--EL7cN#rekJzq zBUUFMaOiR-UZmgnbyY*SGD(-nc@<62%=R^yTXr@E4AY2Cy^8 zkw7tsDn0k=e>uSf=yap?*hD~kTA2v~q0X#v2>%Uick|;XIJ88Mp77N=eLs-Lp`qf8 z?x6g^W|xm`io5<%goLn|7~^zn(0YtgW#Qgv$aKKg2>pIqPP4V(U{eW+=JqmD%h zj2i&wzdGX3#FYn^69Ar%M~2^Hr=0+Id=uVPFvj0KxzvcHvZZJ`&P&bzQXVo#6~7nmtc*<<9^KXzb^il}j7(%nTsB?6srjkVI|Y*Jrrdc2tVx>Udp zz|W04TH5p@7woE4kZ92;SHFe6+s{XairzeDHB>@iJ2Ty?f2bb=cn*LF@z$zGxqq`_ z1uYJtVb}!1IiusL$O zN7`;t(R7)N;he0bO6=T6_mhp=b)no>#P*>@ce@uzYXPsgZuavMv-BnqRWUD&EXs>q z=4$v1@R{g~$=+AV6>uhySHq#!sWv;SP;q}n=T-H&{OYhD!XG;Q;(!*{xtT=4SrJ2z z6`6f5Ti0o)ZA$|JQDvH{W-=DLUpS8WMtm;j4Zv$7pz02QWofeptj0j8*pVZA1sMZ) z7EEKI@8VdH-wm^<)EK3^rq>IyZRoV!Uc~PR&b)Kc_?GlCk+fBcz~FOrjw!7`HJyAg zM$k7sD_kzJu-zHF5SaP%DP?2{f$PHLxE_4_+Y2cUN?HAf?#uQ{xS5S$1NbOm*>unJ z38kXaN43#WWF`T6|KricRn6_QnJ}|C)nM@^lI_hef`_<#tP8f~C3b-Gb9}+0^MW!Y zW(6ln>1#r?nInhu2PZ3QAEvW`LM)ufn4$18#bP(?v9oDE#K*h6G4e+p@M+N#1OxBa zi61k1_xku!t=(1dh{e~Wf8UMf^XK{3>0V;9tD1wwc{Dmp4$q;>Uy> zy6qN3Z$9CHJ*HQ*&YqjFDsBuqqRCH#gZ(*&glbJ_zki?*7QU%bmQimIg}H;qLoii{ zse4o>gWzn^aOBn2*4D-Il*4ksx#*-v}$i&^ylZ$g_y@JlY ziU!s%My0rDjCwj>X*RZXs0gJmt2haGny$a$kZG>+NYt$cvB4#G_s9(Ngb(03NR6?k zR93W(!xQAx6?RlvwOl*SE8a`*&zD5gx;T^Y&9nME!z3&#;1*YAFKcACcgzU6xKpH9 z{#J#RhfPm;(=OD{pm7M~Zc*6sGF@f$c9Qcyb zYqw^074%Y@Znup_4sbTL(D+)*8Udk-p63{fQzsRzNJU2CPUFJXxVfp*3Df3U<-}PN zmtlo!kg@2(^>Vdd65&FxM&&8TlVJgP(x1>c({Fa#RypRUv2V7q*7Pwy&f4X8IYJbP ztzfztke>#o59|+FaPw`NSzzmST*_B|RUtKlPjr5Di%N_?VE9ILhXtN6Zj#0(g5^)) zggrrAo+xR(2m9utzia#-e=jA%3HRLIk1V7xCW^O8*M5#G8{Z2i?GCRC?ZQ)8rHEHo zgU{IXqNiQvA%1cWG822vfQ4!IbJ=@6E8ZB*XXwB~H^1Y@{`?shDpse!)nU)R@;N4< z{XhpRDc}X+O7{69_kS|0&9SodpZZunW!Bhj(mwpGs-aJTLWx*^vIGeZq#q_nywC~P?kXD3KN*NQ< z<*U-0#KXRz%NO3bxq2WkY(2pP7VM_4TWtii9*MI#$Q@o}vQ3D54!GLW$xmD@vAg-W zJ*n1gDgKdRQ_E{&7Z0NLP-yavLPC7Cb*WSKgw)v$+kUv4{xBYvCYwo<)RE!d9`l8r zmU2A`QYqd0otU}8O^qh!sG(uaOuXG&c3o}%JxbA}Qa zh~Sgb9(YWSv5C7m-M(4jEn!Bf=8~N^Dl~>?2lKOXm+5rlbF~)!=xO^XA}o+ri2ck_ zp5L6!I(7A;S$zeulvq)*rH^>E|IpM&&V5rlnev&*J>++Z%eFTZq>haL+>a8t9TP~L zvPUfw{pHQ&f<^baR~B~Ac&-x5;ZOz4K58oL7s@yV@vtWq2yEN(lr3l~o%#5e>PO?d znDpJ`l?bj(dPZJ`k9+Sm>&H>)YTadOU!coBpRQ2tGa}#Cmwn8(&H61nAn}A)V>WL* zS9GYN?CwA!6#Yd-*DW;Aa&I@cZ`uVgx*|Z*xV*$nPR&%&*{qKY{P&V^#?&Oyw>+;l z+kNaP(g+Lozu-(P`P{(IvKO3H`V6Hg#j;xEwWOgZH@CI%(#_8%4PB^@t&zwVY|0o^ zhIn%A4Hzx?B++md2ZTB4WpX-q?K#TOt*Bkyd^f_*aB}1?%@G(3nB0f!k)-gOq}XDY zXtcMgH}yE)JJ}vZd-+ms&z@rDV}5xjS+XM$KwWCd>s`QH6PU=s>P?`Z-!+%%D>}rv zA`y>k^%=j`IbPX=UvcbpB5Jlsqnn<5(^c`BH@$VM_y`VD*4}N786z>)4b1 zS&VQBX|AbjF!yQ!eHF%fxvo`B?tyH~`axbRQUV?dqS(Yv%>toB(Y@u|5?}kh$Wo1g zrEg#1iB8%B80z(7$5qF7LT3AM*!0*`#&0106(9CjbSsZK&Pj^G8=TRAUJ1XstTGbD9xrl7DHCRr_p^z5n)j zOl(H$EP0}6e{nJDPBymuj{RmV1H*?1#R2y|)Aaqy0gI?dvgEybh-yJb1xJx=2EC%i zR?nuMOiGGO4~UMez36v7FP0(i(%i9rVS*I-O#fMfSh<~!0f*vL>U-N$WCMngtmRLV zUyn5kv+K27L}Qy+g^j%EPWfHXF*&p=NJvS$fv{+ApK`PdIE@D)Q7uTy$VAy0lvMHy zz*-C_WKc7Fc%$X*qE+4+kuxuwsJ5IGcI!_d_OL@-YlR|NLGIB zMA|JDse4vf-}h`2|CYSc3L4NuAwC7aWK&i z57*uwKrTF0I2mG?t^BH3^jgu4arew41{qgI;J)P(?nEm%eSU}I(_XIMQ}%Dsi4rWJ z!PCI3K}vk-epVKKs@%CC{ib0j9>o`Ir@DB2Cnw#(>axE4(*pmWl+K1|*T@^$gmH@v zL{(zO75;-lWkVu*pKn1gFsn>0v_mAvrJS z3rE8sBQdYe=Fctz&R4oJONgUKZTerAff7~=1RBtSR3(i;C$KSojJJSucWgz;Vcw-b z5Lx46c(JSHFowEhn;VjmKYO3`DZ$WDN=-=1bISTK*sO`^NO)ZS@m84Iag)5|6) zEdGR$jIB4*8O8XQHI1y#ao$-?M(3F}l2v-iH{}&|#22eRfKC4A-5|JxhQjR9{sV0f zhskUiE(GQn$X&b7fWd=0?QMef-qa^7dR>W?9!W__Ex_k7-x?ycct%Q3FBQjVL_wu6 zWlSzl$qa(X-pH5nLiM`S)6*-Pn`CBYW+Sy0Iu_}O`w$336SyjrIh-34>oNg)Zym@k zYcu}=Qd^+mtsXZ^P7ztmNBXg^4icd-7tL;My<-CR|3- zhg}Vgio5)Z@0K7O9co`Z{id1Elbf~D6DER4$mLA|Q6}zgTcac3-Cg(U>gxK0q6xYQ zoAoB8p?fJAuJO-I2T&{8&t>;2`0Uk3lxxhwPw8GZS+96 z0l)}gNB}C2{zQSE{Ur_XsiGr;?4KEKtpocgO$*7lL`2*>lf^V$`eJ~81<1wI22vnH zOBiYx0`O=i21F<&7nnIzEjqdj2E}h-=|uav%0`4qyr22RNRcGCa#$3>d9_Pe@v95HD^-z{k=!#6^8w2bG){4y7oSht3V~rU*w&@JX;HftUi~p9s=IkMN7;eX1^h= z8ilL8>oNOAS%2cQ$s&Fr`&JS?%hQV{%|gVJSue>J!`(D*s2nMlpE72Zk5w&BX$b+#q)P@$UlwJWb9^DId?X34|4k*n) zWTH~QWn8%#0%ctYi`=t9UY>K^L!E0eb$lNE4n#HPRpMm734C|D2px96{R}8rkXFP4 zS1AK@_0#BcFR$YtbuAh=Xr?b`-GZ?@E*@9LsJVE9Aw=FFeJ#k?;neF^?c$RG{cS~B ziG@Vd!W%n26HCJIxI_!TrS-=^%I43TLK3fI_`*6S$*X>^Ijrh_y{IN0(beGrUi}H% z#G)fc+#V+t%xc2T!>ccvCPb1mvqVHw+V|_tR82#xk?S|gdrF;u`Vx(D*r{p7$;+!3 zMSZy0%5Wc0EMg^FTa$_Wv`7|fpD2D-KF-|X;tsffabnq94Sqpo+uKPx;dL`l#faTR zMDHCio}YRkUCFz#-BU6b_Tvm|U29auGT2zxv$JYZs63~xsIXCbF`YpCs_HE+`;Yn~ zV=rNFFU-PS9|5|cblf~uC5mEjN5c9S-myn`MoxgU$h(Lrwx>Qi0*N+PCmzw z@y52SaudB}`znjMdnvW=>W9qaBjm3UfG}UFm)Br(C%m1VX!`zg_EGq{^Omlaoe6(K zuPF25co07PwNYaL#BXRF6cWh066)6)rAy2)AZ)7f3I-*)yjQqP-@1EyJ;($1!e_jy za+=jVrS%2xR^ZI1ofJG8rOs;{R+x0EV$~P#kpI>6rx+r>C9X5Hi;Ax#JZ2Ht-Ll#m z`ZlFW9F7?Z#KvIH&ZPc?Hr~l`pfS~FonR`(IX)eU$rvlOgTR=Ca2IoWuuGSVARcDA z&j(nNX&^l)bpUKWZOnpam_rC-ab8YLv=yj7M;q)<^))bp0uX-ziAo<~X>$3n^^>Ar z#Z3-IIP152KjCm3|6urQisn3^u^4L_8#X@P*Arbc)pk`l&+JN{G#Ni^u&A!kG>B=- zr<$9W-IIy-rJxWNh?~j3Vatx`P|cA@Y~Ykj+NzFCv>0gbe2pG*ugnLB?pEaZMa5kL z>MKHLW!*2z@SL&(^60Ef--a_&{j^NokE%+HFTZbGB#`H{>sGSpG>v+!l-(6rMQD-* zu3CznE$$Z6T@AWf1!qWA4h4ni<)=w8v*^;kdUwT!E`lOk&8q2TtI$9yRVIF+FDf5D z&uF~hos-E(+g0Ei-+2h{E~3scYlX(NH%!f1Ah9Ljw9HV?)2D8FWg0Y0x2mP8kP=X5 zkvKN8M+f)DJ+#ob#U*5PCYJzEPa43cMv4S>Yt4;|j30mBR8Hw1z{SkXAue`?*;wu7 z<0yfujmL|OOr$8RjAGC7?|au<-8G~YnT^*T_9SlHaR{3cF1dSg#lC^_(B0*%iCyv( zPeO<R2I`t?>{Kl;9|dBG6eO?y`j_!X1a$=vm;_C|4qRTo?2VG^cDt(KjxyDjyjMx?=) zRu#agFJW*uniwhQU%UjaX{a{R`c3b6+!y~q)1t?oAJ+hp<2d`dkp$fgi$pb&{3uEy zmD#kuWVRSCF|Xn-r&UjsuiGiBm)3ezY^>fZ4NkzP>SH!${}9vBPv$cWSj_KM7|L>? zXDf*W9wLA}4v6YK4Wp<SrA_Q6sj4xEFed~8P z>3jNU=&+qV{%5(FNy)@p1A^#p0~Lw=(y+a61ydSOq1vgyc@8BxiLBO(YFC;05P+BU zaQ8o$wIdhg9^b{pns>rkM)GsxYtfGuH#zGi;HlKEiW7I5Cu)g4YWh7bs z0=mAC4{y{)h}VHLOiCoasi9iUkqP9!(%EF3*bOVY8<-TE(Y)I>=?11_)8%&|h_+z5 z=-s7|NZ&BkLRGc~EN`HkcqjNSh)duz<*tKZT5#?MKl_}}O0l@O0n;trNpcZl^H6_<_<#-2HhE>cJy^#hO$ zR9}+v@d0bY3MVgEIu%aigmKjpy9S9oq=n5;$?4ahp857#zr8>?ZY=Ji=22f&ZjlGA z<%;bYc6|StNOdvBZbI3F29jAJP9`<8zx0~n{8A2Ai5F6Jy`a$3x3Sei3PjppJ|OAs zayE}ow_XMxh$eCf6C1MH^c_{S1~yy2=SmS{a#oZptxM4mOLV7wF=6?n@zWG=>Q9@; z9ISijQT9a8U3a=H^JHgS1bXW)|Aa5?h~25HdbkE418VFWw#=A;mgIgta_`e-IXQ`< zZl%E?vG1~#pC2Mqd3wA&Gh_1|HIOb~^}y-Np?2A_dW!2+Plgwo|A80Axn^0K zS|ctNmVVZ9H(E)d5kb8!n4}=P#m#Zp;$0rDiNO44d9b^3QpF)i*H{%(06Ltd;Gl6O zzJt>blrMD__wiGgKH0ciRyQGDRFNwT_u5x0( zpXaT4-%P#Sn~+dAHp|_PvSVw%{;ctU#kGhpdO3*=|Bt=5jLNEA*S}Rz=@0>F0qO1z zY3c6n?oMeWMY<7??wgeEZUkwhJEiM?@vLV(YpuQa+G~G*-!UAF38LKhWX^e=*Lfbt z@1XQxrJ0vq^gNYvK&*Z9>mtzM_9j>;sh+fdvS5E^rF@D9Sy7Htvjz|03vy7Yt%XeJ z*`PX(Ua^;4RFlH0<0r*m*lM(^n9THQY2e~FXB@nB^1e@^98OUlnB+H*^1O|WdV6D( zW1QTUdr81%9bEyzNuEX{(4d||{sq!;-w>i1i8RUuyx<4bZ6eTX#~GgOeK;qmfqa;T z<=SQszRv5K`7j50IB4jhs(b))nbg_p z$H?gqv{8jvW;K@lIvGs%CC+eI;~t`@8VZKuE$0S)6%agUt1-^Hd>!~6Qq_W0pJ$vX z%WZFc`TRewayUzyMBPOgiIt@#Yi(0OEpF>~{=%5`%}(|{VB2yFip1wt8VOl7nU9lw zk+Pqvsk3jHXtE=iogIu$zHkzSQv+k}o2{_Ahse5Xl3MgIWltaJn_OESDv~L}B4jbP zlsvM~*mv*o9FGSD=$#oKE=+*1fisSHu7**lqCM9JPvLN0L2p8-2N!gkHOKZss3#8F z%aqeq%8}^Cf_g_3A}#G#cbQpS=IJ|p7p+S1BL=ZEglRbkz>x((amMs`bOgW zB5!F(2Zj|aoAd9-Ib5AswB2ql(cT9Sbi?BLdAAG%&pe1a>hxInM#(E&SM_cr4Q)e? zM)rIxOQV4gsrAplc33nfXb-BY423Ylwn|E96*#oP!jJHv=ZO#wSz! z=dyR@Xd<0DIcgU^>rSqe_B|1N>q7^q4gEA(A%DR=ETHgie;lW+HZCFzV4#}Y z_UxRgk*p;hW#zK zx1c?0gQw#We*RL_YEM)G$OTJ&xF;3~FL&Xuol=qu?t)9P5{y9EFDTzGG>FW@vwj>) zl2v(jvBB@+ynI*pmQcW7x{qhta)OaJP@swpJ_Yn0XpXZtwpn1Pu=CHxZN?tt*mZEB zHlK^jz9q&+Rod08O?zl?iBK=PG3OxREBH3e;leWOc4z-?!0sP?5wQAfQbriHAeJ7> z5FLRLRR81lqWJd61SgB08X}j0#UqzS!(6gZm8-#$TCyD}D3K$QW;#zTSi>Y_2yHGQ zsLJbkS?+~{QZ849jSO8`DmOs+?wt^z1DI0tRLJDV1@O@Vv+rjx=J-*y{)hG`_W9Gv zp$->G*BW7tJk?%`8=g+`1ZJRUPeswfLyziopaVsrQVr<)+F6|iVavBGmUwHZ&p zf}$yXGfcgOVCzxabRG$H%_L+!jqn;~EWboM6~&4JiGAu4qNcArW=YpWlg4$w@Wpgh z(pInms=FOW`4VRSqtGO+EkpUV|6uC8ugznL zOK7$r^-?EzHD4Y~bBcA>{9T1+%nkt+^UK|4K^k+7gYPW zNPM~iDb{&;Br`JPTk9}+91p${*gf3RfXa+??XCI2^<9mI92op`p}9H)1&@WX^F{T8 z`il1XyMke3cBJ{Wd z0oP13#3%t|l8L(JObrmEz|Z03<`Nvu?hzxbu4bzppOhFUcxJZSPv=VT5FIy#YL=@A;cW%8q;xU7WhOq@ zY1RnRuAVp-$S^T4ne*|=c*jQ_#wx&U{($({ks5_zgS_w=m3te6wBMu<6lrEdO{igd z$6>RV3T5#jWVHOisBICr8pDQv>6Sr>PbCZ$gL}T57688I-*yOakPel zYl4A@x${A^Y-#5NH^`}E&T=;2a;Q9?)FvODCUQYAxnv)a!S!hQ)zXHu({uhRc`k*X z@&KcOuS%lLo>@q`*lCvd<`vk&%}unJ-tj{h zAEDks(u^fzUrJ6CjR=P8k#X!rSa z3cqjQiSn;R!9>wzM7)`vtI2Q zb1p$#ktt+zv76V1aqKNlKP`wyr#R#ltk{k$eiW{A7l~nvo$uGA90N}`PafW0-NoJv zG3eZ4PL>GZSp+BlHdqSol$4imbQyFAsk@ygFrab$a~MWCO* z-gol(24{kj6)m?)`L}LU6zCev@&)yN&Hk}IlfqWGFt6w4wQZ5>T*Br>mQ@VBvXNpF zPgwxCU6oR@F~0}g5F=>5a}mxmJG-}YKsQ)VN(;wR6YVEvR454r`-zs$ZO8*q_A7{) zQUGsmO-dP(YTiutrm?;O-r}2WWkjc71V`J}2=~eSp>gW070!`NUNUo-Y>ZW&Lzh^T zYJ(+jaxvhvz@)k)_PEHIT5GJl(yKPpgF4-)@ta45y z3d>470O1XNraVEeKIGwO7@l)W_jS^aEAH4Km1Cv7Q}&%es#XmOq ze}e-2^G8`Mm@O;Upjs6E?h|0e(Oab55!HxJOS3X(>F9=kdW@Mg*y|mnYx%r#LUt7^ z%Nd}HSgukD87*bQuy^x;Jv;hJO--%Rbc|?10mmsDH6ES4xE@uR)LA#PWU4T!m+Td} zDD;t<7a;ONpY<dn09h_8VWwZyeEmTET78f zUGM-8k8_f{|I0IouW8QAE=kSApjlc#`0A7XZ(JhXWc2t*}MP55=Bn-w*wfS9D=q%4p z)+0Qo!~`7Ui|25t**D5H@K|QfHMZ(E)0r? z{csTV+9F;uX6W5SP@9KL_ttcO*ZpDD1q926fLy;}1iJ209*yOD9iOoxynO3ke#My? zM|edZ1ufg*31+;0*xP_?4>9=?k(^2nnG)&yQ>sI9a4q3d%+{XW5CoYV#&Qb8nd*zT zhBY%k^t677GW((-Tg8#Hxk2_tYrVr@tkD*=Lf53BND%XiSbPTc4+9vB^?l&*bppuFShtoG2thatrPHUitC-&LcjwrYAXSgi}mBKjGS`*fB zvQ);mq=N=29IB#g*Y{0kkIT$7Qk_YwU1qd@g~$&-3Gs6ZjWkuL;s7$$%?I5`NJut& zeFOjCVlKvbl-Htq4SqgFJ*6Ql3?Ctzi*z&)Ix<_(vZg50EW|KFoLkpx3{=`nU4K~a zI(RZpi{d>??KDOIs+YDuf833V!s&hMAOKIS>~9l^-$Nmhy?zBt6LI9DGolqaG<2z1 z!sW%z#GgDhz=$y)iD-M!bUTfB{s9cUK1)%wophpg7y*Xx33!}|KhAc+oVVL|In8A; z8UXm_<{Se4#VPvO>-g(W=5H|bOGsA>RC|_W2vB5@LppOdm#xwF0648C^tOgEs)P`u zbx3tGUK@DzPzx9klQKZwH>JEF>fA?R255Zf0F#h$0Hm*hA2bf)eQz2hy8y;0MAg~A z`~f{P_6VH^K#f6F$EY_9?X-#-0}f)()&rkVe?iTUICzt<7h1jp%{9!GaZFw^aCPLQ zv7U=`W0X4!*YGOK3}+8Z&GVwwy%wxN<3#Mzg=QifFMi%z!!xPHca+i;pK*J?FNV`* zpcKOYoM$c0it~n{BJeFS5Bb}X$zYKPQ>wT5cdJ1(-S+G?!?0#cHF4=Q zND`sOqS+$Ml^ll+_6gOAp|kn}y^3-N^(k!Io^!=Q=-`O=A|*eeAN|ohqUOD~L%)I|NW@YwNw|N__i_I+iInIFK|7zCc`on(%JJ&+RagZs zP6fQ@@z#h;c>K2b7gjE^#g?;W8$GiSvzG-haM6l|sVqH6&$d=*^vv(PMBJa3eE}2F z+1k|-Vqglqp26hsu1ubUt>H-jD4edoOrUVy%j~jMqzpX)yKj1w^&L5N@HD#iK#QgX zE*00Eg^wu8XH30&$VxX-W%ax8u3@_|fnAaFJ91i9vz{+>lPgUc0);?Cr2^d8L?AHp z1lXsNmY7}U?SethOBe|HMB#|~r>oF60>0;4hN1AvQMZdaXcA{6PaUGPO3Ai^hY-(A?L7t9O(f^n~_xdK39HdXb8=YNk5c| z4c(U{d}Y~*DeM(1A{;b3O^c@^Q>%M#R7Z|@=fY7UwnvsKKiuC5`c+uoTRNwPTA6(N z1ZRv=gp)68^*mrB;znTY80QPg_qV6mrz<~BJTi?xM(X5SW0=q2VJFipPI0_UOG@`V z`Ea!d88PfO*LWEPdN}>ZiI(!l$~7fj;|nM(A!O(wF}02}Dmh6%^wvrb8jQA-xb{y+ z&7Pb!>TMztV*g@IIqx=9tIw`!yi)Kh;P7OglgZ^d4xmp_en)f}n>o(5!`&l{G5eGF z6c>+r$6}+tL2Qb1c#C^fZuh+g5%Vl-Q3gqVEwA7L;TL4+PuVC<(iTNzhserQ+X}h7 zRSiy?j{`n;&to_D%8ZiiKET_N+I;Gm>yHC3{&~3;?Jw`i-+$%6krADtlGB8aU;3QQ)vyyqzgy z9>-O2>O55lA>c_^yOOl@CSMR)TWu2>GilMlh<(Zsb4KA9n4kI=lLRU zbkGfxlh%?x?hZaC4p~< zKT^WI`Lglr=Om16#+li!<8t%rQ=t=1!gMEdax)#uliSa&0$(p{&uT1xE90k{xQ_I4 zie_#=Ac&3&vo#}ayw%b!;R>Kt^z1Hm2YZ(}VWtY}$Iw;yndsK>Ns#3%!1usahM=mc zu~NDHRL>T~>i2mZJaDPznCGR&knTQIjLeBOh+wJ5)aO=oL?MpEr<+5p0wH+b21A}@ zMc-hN$mjGjs$!(2#^3Q&_9lsP5PTVxkc>d zXE26Yji+}x(0q(z%B1D8y#3eOdU+q3Qap%;$)f2lMx9~FN0_69`aAas^($O}6Nrrm zII!_o1|Fb`R7UnWA%Tpp<+k#XO!z3`@=CdVw?qf>gQ{xwpa`lH{!mT83DK&Q(@IF9 zaToKKECVTrg@}&Vm9B^ZdRBrugd01)pVie!%Pnv9#T4&Z2sKU|*)zx_BMwVbv=@Th z1YAhiOrGaAmw}F{cTDrnsY8ub(WF!cNdYvLS7+9Y=wdHc2jQ-$yC&pBYs-QB- zBB9WZ{(yX_quB2csz4#OL{OsgAz zFTESmM~*-95UdJvZgwu%?=bM`imLEB^-9CM$Z6 zRQHn)*A8>HCj)eTqJZAnX<}1cz|`m}k*H3)Il$yIV>x0yJXlIP5^iJUjDc0zD^|18 z;Ad%?ni83n2i#G+PPI0%*0_lH_;bqcg&O>Slp0k)uWAoe(8amNTdXOpoDyB*^hrgL zFFfR-nut<>?5{%jXPt*{_`S=^vUtss3UAqx;p}REkQ`y+e0kO()8F!1eb_8 zvaBp@{cyWN!}{?1$huw4c}X&Du=>GJR+L$UoyLNlg*Is1dVU!PC-85kKi`!{)L%OC<`@1z;?H?R#&%~6C2Y`ottwWM)twh^!l7`vWcFS;AG9u} z+Bd6L;L1n8kVp(_1NEV7@%4^wWU)BRSl^9B{I%BG6y#rd)pA}{Q@^hQL_udLFfsk0 z{_Nsv?_~m^FFXkfHZ~>T`B#Dzi`bvADE;R8eX z{wKTQAFA(v+BX~VgGVS9{~D#(vOY>;4WsyD=SS@HwRq_UlwHLQGn) z-@AA8GRXz|d&JW=G+zi^g_y>9?|7u+t;=YzdhVp0^}d913c4STx~H(F%*NC!t^eVD zQnx|#+lm`PPN8NLwezqscf#V#^`GfZF&Eso`gM3{vv>lG?KvQS@VvIg-6aI8Oo3^d zcOPtb3M^*7zt2^N%TT;g9s8d5RDHF|G;M{#e7~d{GOj30Zzz@_<7$ZGnJfOS7Lk|0 zSu@uYbqgfDJPPM?k&ctPq}~-OM-Cvd-c!{Gb6gu?vZ2#lg`}2!OWT0la zBlF!f{*cN3O!S8}MwY&_G|R-EX^oEE5KA_l+`eIUj&p>j0bPU2z8+@b=f-{A&z|&- zfdxsQ5ZdyZ&5al@^gAHy{l=n(J|a#E)0YKw8UrRB@k!sib%rc5&seh!%hiSR3d>Jc zPfX47lYH;E<|^S%Pm6k~Vq(HuPdg*}x(-x~(%ibxReCFp2G+oOA77$TD-QX1U3n*q zsFpL;7Q$=(+_DRdu^Elb!YdBEcXACmiqoFHXq*sMXJe-`7~rc)>+ACYFOhId6O{ZM*i-WIH{Y&;K#7;uAsrjnmhv` z+OP0RG9F$rnrXrIxay!3@F1p&vOI%h%1lc*B~O|93Ke>K;t5hl{^=6@FUN~wfEdn( z@6^0$FS86>wOFW}A1tD*#bIfkeg2qVF*9EAdfV1d7JJVcF7RGR!J~_GC@Dt9&Nb}E zAbJRf!)WOUZ`tWe#?Ahn1<;W&GtFUcpM{#9U(i|0;L>I-q<7D9PyCrgl0_ zeyt!f+Hkgze0%<#RB{o!+7}cag+TqzzgehOm8#~C{<2Vsgba^TTL}3nNNt*{$Gox2 z&7?jaL;JxD#W^Q=ddxxf{%IbGD8v*M=i&uCV=kS9SX}cyQnK0NwIdiJ!r}w}@Ruu# z@PNWrTwGPv?5Pz-P@2{YL;6at^U7hpw<8hl)rIc`lkw8ZlxNEEoQ(+Pv(*X*@i}_b zQxYcDgJlIsG{2;#QiG_|Xkcvq6cz?1*U12} zS2J55`I^;ck@_)D33$huS^$hezIvy=OW9@bC*h+(Sx7{rAK=XP>);||S~J9#i!o5V zZ}w}Yc*cWoEMu0~CMoQEYKrbyv*KpCxkLnbG>rG3RxgQU8Z#GXnCxO_4Q)cDKNbmI>X_$-F z)&3TCkN_UsNfqIH(>%_x$WsX>>AB_cQGJbnztI1=PD#Aa)Px@cjHQ!)s z9S)Q3=ubZZQ|sn8@MJ4>N8(xdL_~LmVGf|laLXC0TU(cgUWwfZcU7PXb0NSQJ6 zDF(CIinIsxi@}f^rBrmpl=@}@%4kx{;+t{=caM}^>c8D>|MPD>6op({je7j|c=N}F z{fGO*2XSEJ>C-#_Ghi%%5Rs|VoDVV3Ng==v`Hf!_fTgog`E^`>J!iV>!qkZcaf`{4 zJK#o`b-90h@ZH^9NC3iR4(RHhZ^ya4d#fWeQ=d{yy*HgKRccob;HLmAIzlyY~>qcdy#s(P0F- zV@R0kPd<)@9~H&>NIL+1hnYsY0{f=3)1kz)il%Nj6+v&@W? z&pzhQy(1)S+uPoofefLv+dDhg@Y{j}(29V-K-gI#=Yx4OpnC+MM%|IBYWS!6oFVZ?<2FHSt0=E{XUIB)A{M)iXh1P0Qx8NOo075v4QA&zp#4 zBU0z#9PcyS=@EagOo{CfprKlTAohNK8E~n2d#q$Z_I3;NCWCJT=H=Ullld&abu@Q{mwb8Milg03Mq!*F9ZU*Ll1!#Ac4^rmq zc>1&Vch@B+J0G3wz2^Kkezv!L{2Nl@-xmshJFghN4oWl@!x`*C5xSFbBCWYI9JnPl z(TG;CqbR4v>~7=AI&9?Ry^;bo6y{PLkvKK;izsq(awmYhJn)Bk6B}b{=6tQmrl?ItrucbN55@g#3v`%V-efzR z*S52UllqGyLxm#Uzl~rE#%pbKBu~D#Zla%6qn1ZKn0rH=szT;Sr|1@6(E{JADSNUA zk_>8F&FnDF1^T{b#Tma8kFrvZ_a*EEcG^=Es2}cgl|ns#$Z)s&Zs57>6Jf7^LyUPl zr0>%48{ScDBpOT2N!H;A8>W!jJr{%<`xhPeKi}B@dd%O-l(8W(*e@p=l(>VNkEm6m z&eAt9fHwKVYCPxD<;_is`zQc@vrf(X+#Uz3A!M=ah|_6(S1d0DgL2NY((z_%ActJG z;~fKK`IKcTp^c0M5#3_*`@jLFlRle>!NO8g=bWEP>O{^Tr>JYFb1kNOehsu`53~bP{POU;40wU8Mw8oV5O!TUQ@4 zFDvnTSfTm)r37Oah^JVk0Hn!0-aSB41D#~roa~9J1#$C)5$Ye(>zYXDtm>~85|C>( zW9)Qgi+H9i=^;&~s&x5rD9Lz7dB^xE^d&1&4c6rzmI-V-aSIt@kXbVHS>~^M^M5;d z@@!o@q*Qx>RGX84Rx%g`alk74Vs}~w6eI8Gntp$2G6ND2lDGW3=utR%PWDA`=7%dR zoxI%i5xS>{W6qkGO?%n6V-sKq4Jeuq(Y%iWQLrLMB6v z5mh`Ax$pZSAuuc4QbT@5eL{2Zrl>9`48M>=X@%EdTprB;2mcJ= z1TL!TTY2K$Tdk6jyK$cw!x(3L)kh9ewNzgBA41XD;i+bu8z_6CX;G%)-GN&vr$MM) zSVT^e3r&dBwG;X4EJ9|%pWe4YhM$liGxOP2o{BZ<9*6PA$={k}X&Nf_LNwUE^T<>n z{Kw)L+}5o(FtHMZ-$f#@&xw8A$CMFGZ-nqFsKfE*I5p%RE?1+T6gPO zfIix%v`W1~3Ts9A5+13eQfSpyNACO5;2`Jw;RR zZ&KO&a^rr$Y2s5UeVJ@@GNKp<7DcgaF%3LD<^ZkV%i0ZtP-RS!Dlu&YjF2D8S%NFfEh%|jOMsoi;+p@9DAgV^euZHcVP-IwbW_! za8U7#z=CZG#|X)Qbp??1^*^#Qf%dwj4~foth=YUUG3y(a^C|cJ&cupA%ZmnvZn|HpXk*5|M4-e6aJi-;c}>Wjo0MTt!_c(}hi58onka)QSe zAP*c(%E+JtG4!7wfrDUxGNh^Mtl_)_3kz$k4w5KNNgJ=vK%U1s*qHeZ+8tFsQ{H&A zI?|1G!`b398Hb~O1uF>cBO{(=VBHoNbV+fVY?hjHV|=@oD#glxw^9x1tmPj&-v8~X z3vd%r{Pn-Hkq{7=&r+g&8~%>2s|>@WV=c?h%6iAjTGPwjm`JbZmy?r|D7jr!`n`WJ zSapY%T5opY7Cz0J!TnXNa|XyVw8rqYJ#6F?jl_)xktpJz;36R~MZfz#mt)7aOrv&~`r**$LF4xBP6|gwhMg8{K)>YVyt@M4uF~@{ zUXfOjIgq~%83v5_B;m0aJ&FIfqUD`z#p4Z(4b$MXYh0X}0P^5_Gxryp8VDQACP=TY zHZz7U*AsNCgCb=>UKT*ZU3E>yc`y6E+=CPkZu5AY|I8zVU_X6Y#rPs%jY1(evDVm# z-hUI2VTY;+1%qJBEu#qd7ONT#zVC)hxdA1D6vh=;#UJx}$3a*m$Qh;AKLa2&Xsm(! zo`^(NtK>(aZFn|ffN9UiAFO+=s9Ok6QXHX+TL=Y+Kf6VIFs?3-RLJ zkqizFPC086!Oo%(ngM=B_`Mt|19rHvsT)B=6K)ijMYlZEEE>W^?CYy5toK;+(O%Xw z*(wF{U6VdR>{dn}veT>3CE}?oMD_)7p(H|rMjP!`MHF0SD}RY}Twc0jvD;>%cFN&k zr@asff1_6`TU!=R{J&p|fBh}GAAcM`pFn;pm^5shlwSaEdhYoV5S{F)HKMTU5%QRh zW;#%364H;PaZRhb35GZ$z3%n~!PX=+G(&@-fc^ZdBN&Hpvc9CYNhH3CgqEw>WpXaa ziA~7Vko@L4-Dm0fUGWDHm+81a`@;_H+|H5;qVIk<{JGeM3O-4u&H{Y| z3~59D4iTKB?_Hl_^T{5uZw9+snrZqWUs05!5r_V3m;8TRy8rkhInJ|q6tDa35+eu! ze#!H!itIQ@L*Q(U)vmR2^fur`5usgy3B|uwod5Py(B4s`iK(_F`RlCy=L1>LsAlGa z`_JF__YH%QIOuFAt*rI`+n4d*UM;!MDbwHo`G5XFvbm4K5(??k@c;80e|!s2|M`FZ z`)m0B-@<=BSO4GqEwm=Uh-FqmKc?IQ^NRiDksf%ae+Da40qBI9sjODsftjum2*9-m zNi-iMC1ZinC1Ibv!V$W++Od&Reiu-*`$4_^j-Fl?JYQ9!ozrf_pP?szY&cYMnV|Ze zLKDn&7zt_EUO%F-O%ThRfAte^=^rbmEU@m9lIG4Ikhz3IQ(`~8?O6cj`^-LwYx|Uu zp(E@8uiemc;H^`aa!{Swe}%JoOqubrcYz1dWny5nVObkpv~{6=)7sr`ccn2*=8xm+ zt|ek?Uuaq$-_YxZCho)95fB~7{VJG|ntE@XK+gI&>(wKc1y~(Efy_<-Szzrud{kY5 zW}q+t&{92ifk6wf|BX>@iZEwk0J5^?w2fTwrB`CSC9NgV>}cffU?m} z?rJKe@h<;$xv{n;1|rBpdAirsFUNX;+80b$VH1}dk1j6gQq`XJG~XO>C4mwHNTUeG zK@YY-{#$>>39WQGH${WXVLX@*yve;>qzGdNIZIcms}0XH9Auu|eC!GIzR=pbNFux0 zL9}W`jXG^YTHoBnk@T4|nOoRwUAQQ0*p9nX)#$;RCgrwmq}^a}=$Tpg-9NlEaS=Qm z;O6G$Q+Vl%^X3t_j$mM5z_V~>KaE6vjDsK`ZKhcX@f47~H_7e^=UZ=yJP(GN@7xTfw$AR?XyKWzoYZ%_L3bcERpAlHQw66))x}1L4AC_`+jv zW#4FfN)z9kOnpf!eZGfcp@FJl>ZfR`!PHeC3+9E!ra2@OaBkkj ze&EuX&9+FR`nhwJN<4MkUAYOl4Q$qMX$hWB^Ek-W+45FgTem}y$o5}masYe?2OaDl z+z-O(_#V_L>FL(-T}@W=#3fr7RbgD`QxB%}=fAye?RKg58~JWr8X8U^ITyz!hqXm_ z5&&otGay1?Tm4-6W&)P`YAt4C01<28;dYac&ei8=b9FU}kCknryb|Q>IWVE==cD(= z-h&$7K-+q$SqmSefm&b$8%gSdbg2!}7s_Fl==02msS-~xcfd{csJ@Dj-MMrVwlF+( z0hr9|-`m85goK-=9~I_bwIXK%DMvr>Z$@P5vUEx{gJ{KiP=)1yQPR}fDYo}f+sj9d z@oC4N&cn6Jr>rj8eZwQ5s!Fz6uHq%@*YdSg~GY>Qs7$SG^I9-p~SWFf7 z@)4%nB)p)t-M2|*Dhz_R*BAhZx0dZEh{7^?h&Tprx8&XMBCo*2^HD+Mq)1|rq4c?j zz(BZ*QF>}$>Q#aC*>R8@SPzoAw-Z-j@<6U+hs_I{>qCTSn2Xu5w&%Vcx^>7= z53MB656W7OyKzX{;H~V+pf4QhoW8vEL5_3G5l=ttGr_<327Hl(pmvM~Rrc#EZ(*`B z(1A$t7za-Ur~3Dfv`zGw2^L00 zByB1>FXXXCMk+tV(4gYs++sl=<>BZd(m4|762JHb`_vbfZQ0g;?-5f}HoM~$47*j) zUdImoc^@Pw%ir{0?9Z79G2K8%zIj$qP_SW4+uS%^rt|PH!v$}vQFsy&6%yhnMnOm} z=ARSrk$l0n06xL`y}$>vocCt1(T#82o<7~S_Lrwe5vCw{{#01#VdvpKzIo1RV{ng$ z$-_DA)O*BXcrZN){j^l=cpaNn?{pOC&tf%Q7M%KkW-_f;py#Msw9S_* z0m6<{`uBBPSz*}Xrs+N#NhazfyS-jMFH7{gj;O;Di@eW@be{Y2jVX=?@vE76mN>w? zJwo|W&8p~a6ls^CZ%H83{zgHlDwA8EcJii@v<*TX6&nTJB;IqXt+4rZ>S0>6o%^R% z?2i^zI{M%>)Tw)-EXQ@!X9C=mZ`+3i?H;fv?Z_K@$?rCuL`|^ohq(?V0TQ?c+mfW7t=cC&# zlf8Q10&<=^_FdiXu;_3ae0(=o3Jmn9Dl1sCs)9 z3EdLKFOqdH;Y7*w_w`qhj5})bMs3idDAjn2M0604#sQv#I7&T5NM$E8#wj$^ooYgD znOTnGysii?&TDB`me%gPTYy~I)Pm`1oy z`P~I4WC23a4-(dzC7{UEnZLT;6Bfh!^UcP zX@|eZdIC?^XiM*yqdB#8D~;AY=1TNw%L3-1QXr}OPS!0Vz)tLtH#th%Wpesjs5vaK z(N&qIYE%GLq-{Bz0o@C>jb7j^FT?wLbGxXVv_EN&n$r&@sEjDjD4UN~SM(?X?9!cZ z&B$2D5n0B2vqeZN$$C{~I-g#3PsNo8&`_CcPV?rbg2e6qr@5W$#!K8Hajrv5-U1WS zTvnCsa8&QuhnDXm$1n{jUm)~yCmAqY&l*JWV6(K&h(ww)4blr%0!J~K!ytp`=UUy~ zL1%Fg0B$a}w9{Z=IJ-%v>l;Z-nmPa@$~(k7(U~Cn`$}vtLs=+lOQvQq!T%SE6w41q zPCR=<0=n}87AK6SZ=Qsdk4svH6v_wAlA^$k7W|D@Y&)mEyrnnLih)uYBL`kfO zi;qJ6y)vYHUS2!vjZICNuJZM7mM9Z5p~45_w(!w6P}qqOG*jw=a+{n&2uCY2W<69cPlc=*s zx&q46S@e~eaiLXEz9OSy2oM|L!qG8>KDaH&$*fLffi=_kSVH{_lqRCdyr2izuMyhW8_QO_ z`#5fx&tM7waWCwkr|-I}vs?gx_*2ub#P z8@p}?aD|~?m3E}|bh63hYo@oEl1qgvtpV!D@y9dL)t7GTYHrgUBj0voE>h0uFt$(z z?bK0++#{}gpjnIONWqix(*6y+*7#f;$TdtE zueDkn(WtJu9bT@&uRiG>HPdFWT6=%DEHe>m>WTsDs#a^xZO($Egtruc<`rNEI9R7g zWk=n@`P2{mIYgJt*W7jAKCv+0K{ux`@JQ(nvVYc=(8q-;gTWYtFD7t{GB&W|Jk-8D z|DKJzGkEdFHteSikA?M=*9KkVU9#K^hk%=sl&Je6)V*)>P-4i(^m-y~w(|Ja)@pPo z@dLR8+&rZ_E;oWCxJa^4f$q!b+)4vgPA|w(cwVfO2NOout}i>D0k)H1_or6|qxmrw zFUrX$rU$t&Z`|p}FArQ=aPUbvb(w^#!Vj=y5;Uyq0VTRDC(A4JzO4}vIa;{5Bl=}m z<(TsB*J4j~t)h07ntn*e;pVaBZwP4W-hAEB^#Lu_4fj)`9`6mo{!W4HvCIBo>ksy$m_8t_1e6FKo>o&VeDilpQ+{r;TtBa z=|Z2A2&Hv4_cJSmIE1r84BFaVnDUlg@WhR)Z+Q}SPUp2Pff%>}X82sx@7h5LJ)O^_ z$BzCC^Bd_6Hb1p2mY6U?&R!Q)6tY&!Q9FE0na|yB#|!CLI(p$p`wBjy=nJ)C+N5$( z;;vs5vZKCep{614>*I*92^VXd&r05oO00@{7`att!{}!iy{KpYN>0Qqrk(@Nj+;YkO6 zOa(o3cX(4p4twZ8M@T3GwQ(yK!$_AAE7#_NZX`qV;K^+4Q*^iD=Ifm{|M2^$NY6*oqVV!1$+rv)#g8S#Nr@ZmqID9dEuWkg?@tx=WX$FBj z;u#%Lv94v0i$9Tj2O=Bp3GPxPbS2}-vaj(f%0*G6O-@(2KaZmvXIFq6h2IW$VLxBF zi1+Cbn_!*j8Cu+Xt_;_ePfrP4J-eq}2iG|+t-4932pG)`sf}^)p>@slweW+(C6^9^ zm$Y48+dL2dXler^=x;2(MO3exMBG~ot{g_?Bdg641J@U`uB#C-aVpwZe0=uz3G37L zm3@u#rMP5F6Zo3pc&PeUvOgG#2dQ+qg*61t(xtA-huOg>%)dCMdbOelzeoehsn}x( z3_|9>{S9BgA&Rw8$*ZSnq;0G%g%(8ng^p-MI5$Sr=;rl{H+iY{;@Zx?+|`XI&3Pu7 zA>k=XSN-9|FE-@yM?`cweqywb)+mW>(|P`aCoFttkT_MJV{}I@5ET0@|Ou`S2koeoW^k9V}n`*=Emb1bT?fpe1+vnjr!6P!YwvdEaWOmj?Oc$MiFbXSFdc<~jCi;b?XG?S(d*by`qU_c9#qr<%@<9F&wrtrq0m?8 z(7C-8>eI)zuKN`f^eTDt(!bpA{ipOe(z4rVN3Hnx)P-hT(D5hQZhI9yezo?H3*onP zBXR*f?QzVYw`J7PAnNv(y1VWPslUyWr_TjmO9jF3fNENjXYrJHDVVyum7HdeM*V#( z$xA#(S%c`1TB3d_?v8R}|JLL34kVl`$--pmvz3OI4gtolWT?i%`$Z;OF)(oY@HPt* z(eAjOM{S5C%vMqv9i{<})ZwrR18YjL`7zudV3Q(Lh znv;*|#Tmuvon0k2`5eQD+-AHp3oQ^@=(0umR3grqBpK>d>lPDdJSWcTDuZCAUMEFQFmt*EFn`Ngi<+BO4O5_3zlMJfXgwd=18*l@*u}O)_Cdl0_1s z&XOK<=x^wacIm`!p)YtkkDF5xkES!%tIkgtmt{_d!?;EO*)fqYTR%Vwfnt# zuIaH~9Y!|jTDgNO$AYKC-y1asT}jxiVu_rn^L+{l^+&Dp(A@*K$wBr7A!Hy54kZnb6`s$=iEzyBn z>a=vf#OQwX9=7f^I`XoW)j&avnapWG)}>Ly6_vw_Z_m#FdksI~pZ zAW3|v;g{+ubu@pZ7W|XIGN#`5%3)TxQkwIWY`I>9u2_bb$%c^YlFX*VutPe9W14oM zCf#a#Uq7QL%fP|FYyQ7=YyK2SXkVbM+7jvt`8R!yUNwKr(By3C`}*+P*{gbptg5U{ z=2ReM3OtKdD{=4L6FF}vc_}be`|$t1qej%N5}^4}`RCMWNt`{qEDbEF+|^5CXHk;Z z`9*{9a=atFjwvmm6!BVf2S6sgBkW%PAlN`VhpcEzi*&5n0HlW3WI>)=uzXK)-7Qjf zn4vr@2@SCtn+=;;3|_XBH80&m8+*#c^vkX&HH)RH7;mAls6uXHSItI4u>3k4rJHp+ z4>SH}8coZa9{r`La-A@{o?;-^q=Onr6Gl`N=#|DQwn&dPT_c&I`#sY^xGp%*s9Lz< z>p2qJrE^$pbchti%_6fBA`Dk%*?R>EDX+0G^NLV;2`!%hqF?N6sNpYIx6nPI-B&8D zF8wh|v0eC-hQXpTl~Gxr>yLWEJZgOp2bnEkwf>QL8 zm_)oizf3vqcvn&oVwO(dVD{s6n63W_VUEDG9f3mm{R>|6;_q`il?0g-eG)6r6=U%hUr0b^*Tkyt|#$(@+ zAHg=wg$$e`e(H`>ZZaa$$K;D|^5k z4TVKtbCNM8*|8(}YHpQ1s7r)>rB}Pb|I=banoE~xD1GDT&@e-y z%Ta$76P1Td7)OXHg^@3oy2naVAw+1Fb=e`|A<|nyu6(8Qnf$l=(7A~|+QK9)v9N^&iG9!{$7^n3cx3AVUo;?`8;p`% zHtF0U+mm(DkI`?b2E5r^$36Q-G%@k0I`W@#8U;$yhz|3`qtFU zq3=b(!k$0@;=eoUBt~L_NfOQMer}~QMAsIfKLNtu%FfpUh^DgcBN8k5PR2trrg6hJ zSU=Fr2qERnFPJxT)pwFa-X>;|m18I0VeY2_V6NZFrc3@N-wBI&`6@4&kIJ;+FLX;0 zwZM64LdGn<%RF#r2>60zQ(uLl(o8Bkt3KSRdQ-lK8iq1kBfj=k*Spb6U8k2qv`tmN zD_QD%g=kAV_#~xV8N$EOe$LSJa>rfaf3LZs!%Us4_Df6BHc#wRFODTcm~nGni{#>J z$O~|0EX&_QQ+U?)Rgb%N49bzHF(bWEi2arCi`N8ZGO~Je8|1D&gvtXJ;)euL?{(k9 zXY6{&jk?@FkM4B8lESOiY*%tn{{Od>NuCoG52mgHaYj(3l)Z%h>K!Bh>0YYcdyqD9y0dQMX{y=4`L z(54TlAzWs#P6Ad)U!%5LdjfOg{OC-*nMG~s&f>5DTIh=Ol`fxTnRmntjo`5G#p-c4 z-K){OdndmW3HBaMh1Sf`u03oa)ywkNEfs(H&ly@19~C@)Y)#2Fab3PS_N(bE3oE9S z9J1oYqN0Y@64N94NQ?L_KfYznvFraG>k<`_IB~eNg_CRuw#SM}#gH#0ezXVvLiX*C*xiAC#~qu zg+CgqZ!EGLuJDfPr&2HDmLJLCd>6%Y(5pLs7(k9vKyg|9YRFQmyOehED_QZn$7v#4 z{8BR-v^Tq^cDY6!uH$BssTn>mzg4MlRu|L;)~5&z=X=B@3!K-+To=^7HEj`d`|PXG zbmU7v%|pz2S|E|%BNXVH50r0Xm|4fF0;s>AG|1(IPKbrrqFAWQ|Cx8dFx9WmvI zFR37MO&?~b#r#Ej23-$F_~%N|X#*ye2AVYTGvd^^y#lYPqbx4DEYnxqNqX4tNm{R+ zxy((tsGpnMF_{!^rYjCRI=D%WXen8;e$e{$T``PfTeRJC0&#HqNU(Cquofa9P}3$#U*=rHU)}Xcch6*KjPgB z*pLh~J6|JDvvweKzIOBM80>qYv&Md}DTriJa)TcPU*%kGtwy2_dj*&+5I>%KH}kqL zjtTGHn7u>pj)0ML8W=?FgPXvhYQo z>}|&URDt=;%X7{3CI3igGgE>rpB0hroo?T~NP2j!yQbwQRBO+w5 zm7KNMg2b|!rsG3eo+`BTAtaV#)lew`bk#j0vH7ivRWy9m zwbZU((Gqr5I}nrJZq=d7}QW@mD%>zU5RCa_Y$n0A%mp`I%K z!?{?8@6(E1)3ZV5JPQ>4h)UM_nWM~v@-H`QA)>qbp1DQ&>Fx~jv6`l83UsuDVnpGu z@R;O_T&1a8gRhu>Ffdwc>z3A$L1#69L^;i5^#zXz3ITowoqU#j*9dG@XW2dP~1`NNgcO51z%(!FN*fI_wG1#V< z5wZj912cT*fZsbGv06cg4j1<2Rc?u!a;b{5u*#!lwYSPcGyk~xRZSA{wc?SzVNtZ- zcbMQ$@*uJ%d`Et1+=<=bqb7pmY1!SG^2z)tL868Fq%_wm7D5 zMe)Kn>ZNqgvL>x)%HXziUh3Cz*;nZwXL!}qs0)6m{@La9EF?I<*?F~kDfD=bZBjBA z)`m}ZcPMh+>3mhq+j`Z+!l#&OUOPglx+)xG?SDRCmSQulS&AaCn|sk$V4&@c9c837 z%G20GB)9kClL3dbnx?I5BvX=BrQq$kzMfT(^DWU;qKoZ)9Gh7O8e4D057WiL8~nTT zVdJliYlB53d)ul-acr~!%e|o?bIiMl27ihhX0%?1x!8OU#xmSLNY!`^4+mSpXx#C9 zijHTqMfsmm-DPFcHcU}`C$Z8rf4@|Cxze08x>6sLLn$5Gq&vD!3b>AL#l_H77Uzic zngS`@s-U}@t>jnMu|u&xM3bM6Q`G`_5gnSghumwn-w~)M8gSd0w?&IS&qDBjap56lay9Me#rFcj!6*!^HOLHIWAfIO1DuS8m1q- z?e@>8%HuwlV{~{E$y8E#gE9m1NtG@?iD~)pF71R)1W{a*h01~&RwtNTjZcMR!T3WC z@^)js=s**_8v!}5pZ@{?3oJfh!?%j_Q`G3N&_paKXnT+(pkLVA|Wz&ZYu;q_QO#E<77e+aU z*7el}v^lL58T=h}Z5zGf51*S7T9FWZLHtrwuDm2ZbX+8jIoEDjDQvPx36mjiRZ44; z{3(k6IQ0%gJxys6KH&XA4+_TZCW^ZzWHq?KwX!*9CX->z?k3I(k;GCE2~f7^^;E2z z-E1v7W+qD*Ff445j2k#2J!WiU1$UqYk%6<%9|-(>=x3+p7&+(TC1r7B7|Ok9%0*Cp~AvJP<&$*F2;$vv!N zYE^rewv-w|v7dml@+PR4QtG|uBlZKfaKjr*+x2?0CdW7R()k;_#Y@F$x8QdnsnSs1 z%}V{+QRkHDl`{LkI(&UM2y`se%#uaBCEE_m3* zQ3M!m23W722N1w2s$XLLN(tQ9Y?%u?E%4d}@NNqN+)A$xOj~qtMV0)k0mZ4>@7F!^ zdWHnQhm@tyqrW2xMhW&gu?l1NZvuce$V$X*>9qJ&1ywSiPMF8^|+rnsw1U z$k`h2oyE3lJgBj%|E-WedON)OrkxeWANJ))L8pH!uXv5$ALHT%($+>0WKWcp3`L)d zqSM1W7!P%bs_5sz)6E^+bm6(d3fly9Jc3~ED>GwBt^v1$=gDfVwgx6D5zlb_XyLr5 z7B@+PVcP6w1$F`lVQj=kZy=s6jF3T7W<@piv|eJnrt&Q44?Q*_;|!%D0j$j5y~uF{ zBFv0F8xI7Rivcp)$6N=SJ0c{pQ?A-JnR2i zAQIm#s%uJ3fUd*RxN8JkISb-=ZL`4(s7+bk933pTwmVyU+MHESCANH>ylcD7k#N=) z-XbGFPyZJaCH5M>!!Lg-xph%E&{I;1urE;ay;HzVVCjBOBOX1qy2M&^uX_<8rAZuB ze3e^}!9W0?ZA$0}A8)pwm%4F_i8UgO3?a+hot>eH0xNRqzD|f=5h;PSF()hz|V*POXuE34ruaqbaLYj$< z^6fY{L7a91-n`YO!TwlpW~yX)2VYB+y;v@aDjW@hMKCBv$&@1rl!!1o2<};po2;XF!^ViJntkc8$#{mh? zz3}$VY_%9_RUaG1YbLza_Sh`$b9X&9^TbscjMM!v7c9N$idFz;_0%Emv5<&!LNYIh zb&R}&E|oQUHxrqeFn+$#Y6bx>a-*L;%+nus*M{C}7WU$fX2GUTE)L?eVGS~&QQ1u4 z?dD$8R;P7;fw>(U81}7g&LE5faS`SNM!>B{A_;tJd6YPkBeu@Gn}{5!^ygaQzIf{9 zEykg(e$*`zJp2ZBle*h+nX@e~h(qk`{Qem+T5=clPj7xR`k!mZMdL8sVqB$FF} z$h$Dxso^E9Io=9Hro7U5s=~)ul;E)br|M?Aa;OKv>jKhM^V|ETNcN(Af!7@kCjD;{ ztX&t{!+Wse2Ab^DJPUuQE>@Yd>yKJL*0duL(^wUiOH}}z4)RG zkpN}qufG8*5`XO{T0WGR-2IJ}S;p6cn_O8<(@q3R^xWg4areXNwU!VNr0yLEt#M3o zcuwsF2SK}Tmgc~XcnFhJxX?FD+8?3OpLe3mCEnJv(DWPN8DYgx=`CdLWXA!YG z;I)?H2s^GOs<4mgp%BXRGTqJwZUr`hh7$W^wF!DV6sTqGR)&u-ld!VX#3s|Hlt2_! zG7_xy&+HH#bCl-Qt-;hB+!+L9YYf|H zNj@5YWd-9icHQP*5^nL6Ik@6D7m|mHZQMT5%zi;Ky{ka^r7|Jtod^L)aVT#;R_pDU z!*7=^A#SeWocdqCe?pWJkz-89dKQUSFz0=+$|-Z7yWNx*R~I6244SbRfx+10k= zv98U3M!Z&s`)1*g#t7;U!5>JeHC)bf-CGyb*bvIw&SvInVY|p@H+`+zYOeQ|nl`WW zgYzFQ2l#iFpcBIfiY8X8ajx#2F2=4=+!aa-lV--XwCsBs6)Eoz@mZ4z5X6_>?ytDE zn3VJC`uDA&&OmTdkzDIh0Z+8dOEyWXgKLjaJ0dm z9Xj)5s~U;bZ8%GVj0kZrkXxJLGV0I1M$3j<_*ma0HCeGV9d-2F2q{Vv^*nzV$0hbO zJ8wNZy}xJ#Ci{TE`E5hM`7?bH)cJx)l%)aIuIFMZ9OZHHG!1qGYf76YIbi-ji zUO60@wmP#LyT(=M!+JRNE-20m8B%7Y!_uT!{iCgps@V*j;?gQ)(3_m~*?{v<_rR*n zPlkpX1-J8q@}B;5d77{ju&TJGAh98u~S+@AcJAsaHb;ogF{| zKuOlo&57hKjq199me=`|czKa!487U0nV|0r)SkD>3%#9z&u@aC=P$*u3%-YES!qXg zT7?Nl z*aeeDnB$+S8PBDX-bP_D-D9fuO8F7#*0 zs5^HJdnIULlCh=h?c{LO84RJb-}0r&bqluXovzl+=hLtDIFA5E=so0KqCdDHlh8oC6GGWiIy~$=mf0kLW16d7y#4PAHj^xgUm$`O* z^Qc{PM)OK-xd$lpjIx_pCWup^foKN-TmDTenuu5hPaIG*xR@C&A3h<@s#lG&MCGFZ z#L`}hZ&<$@I)F&WO#7vR(TmU%sMg8G=ILWM(fHv|4wxne)KAUaawDxF!8H( zz!$*!1882tZqT8Oi$#QgCUhbggQ6VE)>_H^Ng;77t@qRkyT>4G7m{JPWUAP1f)n_5 z4|kIZ_pdVDRz1Vv+YNe4Q*;WG6kZYBpazyeqJykzojdFNkzkRp;QcFTtVkeR|^Daoujqb`C#1{<9zZnBH4m9RXR5hCYockxr%AU2`geRFL zTlRGpVR6x6)W_VYR4c6%(IQi?KNO+RtO4eyIUi%H?ToZnjKU&1?zrkWS&@ySz}@@6Rh)vYC)T518JRUjzF6Flq7r)aI8o zoS6u_9mwgGABlm{`;!^&`mTF3nO=$nxG}|2qQqN*e_^WkB3S*+VhW!}c)Q1qbD1i7 zHF()0(MxXA2N5ybK1uv=a`3YKEN5X$r#I}o%a6ZqT|D%xpVQkBUKMH+*xe9@2XOfT zrHp;HPj=e{JAEECq59;SK3PL={5YMqqTGM?s<2QD$)QEY22oS`lr^op;pTwDDQU&% zbFb4LgzKim07U{mtq9_*XNu;E^8Vc(0#2%&BHBWu!6VfbJ zT?!sO1e6;_L*o}LD_Jb}ozGzQ6$Aw3y>B2#T>YHi_R>mr{>AAmtfR(xr_jpCm9Yslr+)QCB2B~0c$jIT;_H~X6&tB zCtH;e_OPRSvVRp=4WSOg?QH~Ch#FxafQC8SIbi|~`~q-D+DoUHd;1GLM0887fEp(YSf34P=1Kw))^x7%r|I_K z+Ia?D?KatMYu5ll>X6qmgw2&+PVMGn6Eg_uQy=<76~H)tn_}1EcJqa~^)|G$Xe01* zAL7QH7&0ktY~Rt&Q5dg;Dc0RGK)xbx@^i{ye^t+U;<|oY-l0Bf52cIlRP^-19MN)N z`21||O5KDv$dN#+Y&uftS7DPCu$cZ-6r63DJtgwPyUOQFG-^2fdUlwiGNsK-T6crH zNs^N83~L6-s%0)YL@G+z3(uZZ;~*z-Qr(AV1~wH~8`}#2xf_ z%u$X+x&Gc~3~#x@YD>KitRfoB^DvSm9%D-*$BO-ghOIR8L6k^cw|FWNm7;2zmU$5l zivU$Npq2?$77PB|AGHMyo!Gd;aieZ65^agUBb7V$SUnH!+Sq&CO$$eI0S@XQ`JUjLxhHWk~ z*DyLK9DM>*a-}8DoSkC^J+n9m*1tSA7ZcYf2fy>z^?u$iD>YjPl$DG}g~gVhe1$RR z{SuG)tt2-enqxn4hn3JSl*k!kr?C{h0y$H%jMMna$TU7UVxG*i=DB+MmCx-!`J!P! zbh{C##lP!$LwMenTV`Gb6CXcUzVR01w#qR(FCJ%&Gf=yeh1=ar9DY0xp~6AHacewI zaSJ=Z#5tRMYr_^BMrt{RfG{Mzt>B}`cUg6RAy}BK# z5!t$=IR50HKT1*(A`ht7Oq=%sn;y8v$ok#1|npAzRBg*IwM>%bQTK z?1d5H2=?F>i;r^TbK1%B#pxf`PME84pH}zqmG~haQf2EUaY|0o3-u zI!*<891H~<4ADu>uohUcMtQzCqgRPHLbxOdUUkA)3+nj5SRDB|s_D3J%B!03x_b+R zzCwO94!gzJ>jGFMT&_fW9#Mz2og&+5yeAJtQS`{&j`E(IO`KhhT1{^|FN=1Dt0D!R*$G?p}!MTXAcdcLnZ&ivNH)$DRdwqRvG zEUK?|1FY}5H$mxz&2`CBW&~{^_$K@EgW3~2t9J!JCMUGx;9(IFT%xsl5`H1nY<}Z* zvZ}4t!l5@RcC%=$zaq5UAM>jcug#v%mU<+~ZUs51aXW{-oxC%}L1fUL@DDftBE?ROGLb!Qy-j=Xa4+F8O)li$GT{L~E-gyhUE)i6_nJ&y&P}YJj z@3}^7?`IQl6H0kvcDL>T2mOcK{8uO|GQzGyA@zgrC+uc^2^>D-1_fRzhGTlYF6CTG zc=;)&KgM$~H^7b7nirKG_xBFRm6Y?M(@Zp-Q}FQ*h3(ej7K|J8X%|7I)||(@Q&xZR zB~B=r$v&lB5ABUBo$TT8WeT2hEko^4-OOjfut8(bT&1xsP-iTUV*HM@=|Db{p`EyALg;l_sye-a{9P@U)XS>MlC5;_$(7PIhmRsa-)Ae?&lNG;eV5X4qAl+?IPYMsbbEs?^Np+hs)I zzWQbOnZmT>5WG@>>-Cg2W*&}-W2LZO-e|$n`l!pxjp3Drt$HSJ$hAFFhfde1%dM`> z;(m&S3z@ZxFnxpf4!5&|+f6)SMq33TJD#s>rCZe$@8quVhIxdYL$ieku0c_RNr2=H z^C3=DxC8&l;1R}773?;)eU?g?&09)pW#N6)-mxP=DaY&6^r8c|VDFXYb0IF`oK=^8 z{!)&hjfi@;8MVKeB?4FR%Qjc>TKVxS!x%&SW<<+(?!%IL(DR8dj07 zHCe~2Cz9@BGMstIGBFeTW1GD+Dz2F^0i}$VjU_FxkHmkZaRFYC{4J^ytc<*T4Brjq2wegO7hk*=B{v3pyYk+$RDz2 z6;us6A&n%|hKMjxG{mECpCBElYfs#c3oaZwIVn6c%_%I%@FzrLM9_H==9#6{KBxX|0FuEHvh-0KtY&|@8B8CsEXd=epRUtecYjby zQ^YN=T!Z?SDhC?0`a2*8pS{%$_AK6hP8=&-r$ZzO=8HuT20V_k;N2jI;Q7;6@x|IE zLC|;k`dY_EOB&?~!mR+feu$H z5ECrA{wX{$`*|~j*uu*OqTuucMWin2I2r190F8W}#us=3Im|LD_gJnfg9%``AEXUS z>Nn+Du7~YtfnN14C*L#fXxJvfdB?Wh6)9f98oCbMp8!9vy~hNHSB;U+Zridw%~(6) z)U|)@Z8VAk!b?zH20DbXKJq_zH$;>|dK$43(m$$Bz&V8BAr1#3ntq_q8*xp2b{C-E zVsRD2F+y+o`6qoptuV{+FAc(e{|(R?CP?T5yNn^L$N%v{TS2Z-+YdIxGeiITh5Tol z{5ul=*B{uFK^9TlvPcEfB}M;xZv68y|F0ka|Ne3&grF{Q>4j78|KINdOo0E#H3WX` zkhZYx_Rp*{)Biu7@%JT26KnBOVIB3qFZKWRf1bWE654adY~;dbU7i0-&i~0ZFi|+Q zq2yOMPYk2Q(}eyXT?4EHrl(0;bn(R|;(vS%8-iDq@0&3%xI{_j|MdR=k7HPdnfSG= zU~lffL$OVY05l``asoJk(~&VI zyFHlwMY6!12DuQq-dss>X|Akx`1^mncW|Wd3}bgW5tL5a|E{o6cb}rFVDH}%^a_54 z^SAZky!6vISz6hp%a+S`#c+{_Yd2oCfwCVGfGM2f7htCgUYG_fhCcyn z<98eT)-cgWip=ZM&*UE;Svh?0-1f$}$Wd+y5*qAbr$hYa%#J;T<}^>3TJ$_1zcPK= z+tBN$cgO=KH??f=)>Ny>yWP$vt|p_E{Q@{pS2>gbV^QarR z0U(N3XgN(gj4i-G>#9H}wS3Y$tc#Q1D1-Na4=S*<-kq$6(xnr8;J#!x7X8B~14#x> zARZW%jRE?7DrH3 z7&#r{RH^{VZb(RtYO-u=bgA|b6Q-O;pVJh${0{)|SC{qjp+az_th^A_#KH+10uUtn zYv8s?W$8c;7>8OdohlvUxG74Xh4pj!l|mY3W>N)KpH=1A1mHR`BW@sPaCz7cG0ZKV zK|=GIai*cPhkISVHBWO=cg+jH5avw+gvHY1dlAVl?1zGy3I`Xv-X3mgvtC2tv7pv- zV1Qn>>~7~o*(O8z!kkl0BKf`n2=AA^%3yMD*OPhp19y_!X3`4`!S&59@~3J29Z1HQ zd3h=6r1TeHOwv|Cisv5c5*ocD^#9o$GSNVpA@Nk`XmtRV4%V`7e;J_6ttWW-Ha8-$ zxBSb8%L&W-##T(_l!RYu3nP*fF;eiZL+{JRbwK9wUU@k;wCpFqd-vb~eEaBJ;jdAF z98@);;N-RIN##+xbwxkXOr>JkLvCsJ!4bfI{3N8kGY3c~liPfANaii`(}LVmY8woG zDbsNvya;N0(gp%*oq~xB!eGGjY~n=0`>K27PJpo70zj z*bm-kIpl8vF`VO5QHu6GQ#q9E*M{rk{bjJH@KrkoEnsiq$pMJGX*7>>K!^Co>1~EE zJG2-1quv*lLdDwpDS3?D+GAX^AXwM=oMCN0=D*lSpTpSv({v#=HLQ0*j}5F%YeUjD^i=kMZ|z_6M)R{Evtsu}5c zA#7XB`rXW*f6nbNEMKDz6Q?Ml3oxwXiFkkUt(!j*bMVK5^s~dYr#L{IVW2}LD^cWr zM+rj}w8oyH--ag}P)m4rVCU1A1?{=TAN9b^^8tqA8$e*8a|7g)9ji_5Yv>A4V|cQh8&AXMm)a@H-Yk1Zcr&Mz0@$?gat{a#(ND2$H~=StxiIhh^BHk{K(c0_3^}D1Qk+)T?1o{U z$ruTDqH~_#z+QZY`TTT52(cA1W?PDy zIhEM8*G1I6e_9mnlLjc?UcFcw=ALKLvd9XQn`tmux5#5$GDj&2`CEzP;Pju9F7q7% zwD9h{)F%!14J}|F(S+jj1L!yn2)+B*D8E=&-c8 z>SVMT10S!7A&x<4^x}#i_aji$(4j=$gs8e9CRTbJWa+g`E2<#+#kA-%`uk=46W8Wq zu^pyL<+l1XHoMOY>(gD}J=(@astypEr;gc|wLLQ`uAXL4Yud2LTmF1q_GXNk^5n&T zPPX5ypfZa(z&}OYiKTx18`MiQ3=>rfcjoDos#yQ_{`LC~gqMf37sPJoQyQIQZQ%Mp z)P-6E@*OG_mt0FJ>%-nLxk+EgrzaSVgr3|B%E(c(gw2yo<0;zA;Qb5|v!5Q0CaIuX z`G8U#aizY=t8kq!d3dBF>K5gv;VFWHP|9_=thA zs0?zHX?vj$3iJWwk;sw{1`j6*%aS9PO;@Yo7^ll?x49xcu4{x5JOUD3{$`gdJy?Gz zqP9C*uew>WakrYtJ$5?;*T;Y(cxxSq1{7zlzF}IfN7V|QN1<$xANHlMKFQA60uYx{ z>{l_a1-_%sVNW*Q(JEr4}&s*n8I9DvI<=m)ZV)AWpINb zJIO=&17|R>wtP^rC7hU#AIeq=D0tb2tEi}74)=4yutD- z@U)8aP9QU1sa=`K@I2{{zjKZ|xxqRV`>8rr?uxl&Lo+>gpw_H5Sd=lomM{}K}4Q`oXW zBUm?-`(pHyVY8~L{%#k2X+Y#eBx_iNa-6Wi9>n1SEF9DhxRn26y7I$6YUrk@L&`Qf zYhM2)CS!K@CAX~&hD+hg`q99bGQ2D{+s@-_Po6gn5RB&bkJI7ezlzujTqW%)hq;rS8rC8O{J*_; z3*nUHGWy&bMV_$$WIaRpERF6VDl7ouP|}xdI z*a($?I!dU86lxAI3f${z0N2LUI5#xdb`A^~ zH7RS?h59pv&%S>cGxi^M(-qUb+(l zXGHmNm zxapjf)2GO+X3KEqnprA7pfHAwdqoAq2G#a2Tmltfx~)`4MP>CpVd}m{i7I778#W zlaoL3J4zPZFYDxZJ5bQXm3V92RitOVlQo(0-do?~qEExN!-Thuzj zvm~TcnCy&OS}RBRtw1cU^lQ(%YFU$==PWzlmEhNxem%x|86Nw4y_3A_N|bbbfEyPj zKh0TN8Itp}GxKGXs@IDqjUoySP5qNtHRdgd5#KesNNb;YRA^C^!uM-Mlvc+3#CW|)ne?5Af_7YAR_Np5khCBTq3g2NOSzB%p z?kLBXdqW8v6{Io^ung}kg6(W-;qAelQDaeINAm~qGo=T=OV11bnI8+Kvl_n=<_k$a zACzP?DKZ7SEGpp`DFylUrCk7nTI2d1m>ks|KZ`sI)x6Uh?4q_6Dfc}84Og8J%d)4u zSzMtjLeSb%BC8^mUrd4;WoJQ0hBQMzAhO0{!NI0W>1|*HQH}WH&TfLb^*)+3^DGoGBY{E6AJnIi@%G&*Z%G_ ztra}zhxwHh@DggTbn1Yj^aH6!j*jpqh3{4oo48xlw+!XKCOt6O+-D)*2V5yyrVqEk z6>Q6GR`@x=XZmB{jP=Fp0fs^@FH!<+7})jo=zFciah@ZDMqpM2uy+AnWX3?DPqt`u zcPm1$69WDfC=SuRFTD8gsXX3F`{h)r_d6IC?2W4xr?CA<%)_f@zz(Aykoyyks3b0z z(*&Z1U8ySay#=TI2~TIM0q-a)TSe(Z2f=mcE#P|)K6sVnt&gCy3|~Yvzq%$kB1$R~ zgaGw2CkC|`ZGw7ehqZF^5Bn(sim9%gO}Z^~k&r%u<@>~u7#4yZM2^~C1aPESAWL43 z5e=VW-D~3p3p0hI*mwX&WI;toHF-iRUwn4n8+^D}(R7)A)!)>Hfjj1kz^Ttrfduad za3OsPCl@lKg)LTvT~^b1+B6KC7v&E${-{VOLvAPFnG3vQFZ4DVTH6>!(B&HfQ^XiHw%2uww2ob*(3~Z zwxohv;XJrEJ{;0Is*=!mV4=OLjoC5*+(jbTU4y{mnr7#})Axzycm+P0`8CagpY>lb7!|>l-I(-M`R_Ww6jCk$ZH>W>5E_6@)J;fv7WKyZSjbJIXU4cx zOEA@h3XuVo31pPlk&AYeet~tcd!u^Zg3UH(iEt6+@}EJkdSkCptE-c zl<8?|HwYwIwUoM>^i3uhyvx2)1TJ|JqW-nojE6=-@Vzx@k()6oaNkqhAT#THA*;dr zg-JNCtmK1vhfO%H|A$rq2LP{qL%hNkCqh|UElqFKA-QjdI@XF(8T*$9{G+;{x zF=kD9-c?4NpsEi|o9wr4)F@zg@4%7-6E#J|O;14|*O6Yn#=!EI50Y7d^!rRG@aW03 zlZZcBNalD6k82~O5@=-V8-1ubfS5{;iA)NYoW_9W1wDarR3Z2a21V8OVd{Y$?gL%rS-nYJzO?jt2{^|ZVN z^di0SnUzLwHA(y`OpY^;N4jGOV04T6!}I-nU0EF6VzG3_M^w@QPx31uY4k|uac^(@ z_k~^URts(*aonkq=5b5|p6FqppZbS^dkT#z2AdUdeC6bsP_w69GJl ze(In-OGy8SKZ+8bAW+6~x~b=FeKILnDFt3Djh(6$qGnShF%Jtk2So$A#di=2IR~Ig z49dwod+RivMZy`pFyA`U$g^qf)VX;)_*+h5{C4+=h^&$;fP(%myiPo(Op zx2Qe$ulWw(-`voVe@YUDi0*#RZP+Lgoow);P;t7Jxv1f z!gB<}gvwN;wyc1_tR zUR}l+c782)fU9fJo$zVWDAYUXOLX)6AD;B)yf;5mss$ceh{#D}C{3xuaMvpCSrzkXjZuFbThQ01qm^wf!g)(RHf=e4lLX5%NwS-R*lpf)MswgBGuJergpsqfa3VJV&<;kzwr?xsO*s}OXg1_ z{-K2+cKC-Pi*UNX**k;6_=l89`e)s4D+&x@6Whs=4%=%0XjT%vDrV`$6#Q)ZJCKr^ zeE}lOAn5)9Z(o=N-Yqn^^aq z_ts%mZEgD~AP5oyl7h5!BO#3_pmcYNbSd2>p&+0VA|2A*A&r1^x0H0lqWg@w_cr!^ z_jlgkIscsNI@k9P7c5}Sx#k$-iTk;q`2Ldm@J|gIJG>Fdww6 zIs}%lyinhwq&xknONq6Yq;fn`-BzNiZvVMP_2BmAQ3+U^rFaQfx9Zw}^5^yWiEE_G zr;CEG;a3oDFP~i|^WB9tjVDNf9L(okF?xbS(!>jryhwb69KBYeoUWz^m3&?%*Bk zvu+zt9BT?VQ+!RXo-qH|tP+$j2p?7PqvP|Q2v()O!l$5H#B7SS@j=4x%S)H=kN>j1 z+;9yO-9Y|?r6?bajc<=l53!MiC)Eq?)tU-RWVQ|cz~QH#w{bbV*DLIms+aOgRoyz* z+QlRqE}|Ff7=`MTgLIIFN*)A+ z6rVN-d197R0j-Po9XoM{JDVS${}XfhNzqeGY7*Y9_1P zEJOB_Rfbhb$JXdkSGGm`=m3;RFl(wc*#++esSe#9K_x5$&a9r?`?=oT-cslD%%%#* z&od661?CnFg%DPY?q5BbSw%+trh8@K?|i;a%d0gtuU%hbQghJ^w52cW10KgG%j370 z*)x0Gb-f;BB9>&p+*)qGsVLmuzt6JywIV{IB2@K}6Pnov!#`_kBoFk*jXq!?*mJO! z$UmA3XhZN*Gl_zWs{QuqF)C*HZkT_9sCP*sO9Hmp(-)uC0ZeXsdiZvRuBMd0K=&f> z?oL=biokx~!#REtkwlvOes0PdFEK#B(}ukyJ12JmNP~kJd8m7s;{E0!(=esOVJ1mb ziQoZ-JZIyX6=PiCM1P%}vu>ER1EzoM3MerQm?cvg1~8qvNNkpoFn%P^c=qlsfm^MG z@Z+ZEOBzS`3}cwLeQ*(Wg|-x*V}*#It_{sU^vvG2Rp@vk=q(b%o{S#Sdh&|-!K?Qa z*Y3ZODYX$8b3hGuoM6G{NJkQUqXs&1SO+&6^&dzE=f0>>Hr8>kRI(b}Vk{jj*DyL- zcQYO?>nbb$b6rMxm*eh16@}*cNBMoO@GtQSkb_cImaBcdo#8s(H`M9Wl}%pcPcAy7 z>YUcTDK;#cHj#HR8TOJ7vo83~JX)}xQQcPM>C#)Z6qD0`AGj8c=E~|9FTS@!)et|o z6!pUt3k?Z$KSS0PAUIoSRi#s~XsWS29_r4>STCzvLRlTY!e{f>fw_;MUTp;m+G(8+ z=xabyzz%E z-iio^ieL{v&+eXv*tn+_^|Dgfl=Y>OGy+Uc4zuk63?;SSPz=9bp|}-4LEA#S_)!Zn zR@d=OgIhaT0=r*uVyR`m%gaMpu|CR9rTimr)uUMB>F+chj*DJ^2}BJrcr!LQifjZg zKiy?MtFr+qR*X)W1Zlby+R+n8ou0liM5j^y+;n~YqpHNTLWk8G0{QafK7Ao0_YJi*U{KHMHcPEk z$1hG3G#r@i@-|%athGCb902(NEv$swLzqBs#>q0!Hu1 zLLk6wmGqFLb0MTXIu636_rP7S3tX!wW-MrKp}BfH`1odU%@#ltZ%htyJsE(hvVvU` zX`xmZ8EKM{@A5aqzyf7<{I%`l>FZ)>#t(e;Z#UAT#R9cpM|DO2@jH&GO-P5kpF&YA z$45^;h078}p>;HXq$io;8_A)Fy$@EdbJJY=my}IVbGzw;P5PLk8>xmJfJXB7iR-Q% z>ULAUAvgrdM?ea4T24eS+elnzrx2q@y^djj7|*9euFG%BLZdsytcq%N!ne(nx@v)n zye|hDrY-uT7;MrjeI#Q!f!#;P^DgZ8d0RZ4pN9p}1%Wq$4%rw7jq~Z z0rnHbG=>Y`7r6`}3nwTKV{2A!p!kSB^#hy3wdbd_$S3 zj>03kXf}%6kjF3Q0{rsC_IB*VldKV=k6!n*=t*x_^u(wR-JF@&^c>xU`SqI$MUezI zn)kUZKd`o`P^%hQZ#z(;ZG~(E&<1Az%JKSUyHFoU4y=t|HrA`Y5GCSww4C*N?kZyK z3QQ^HZbT1LZhMPvM}Kkla6YgSy|Owb680Hy9qRt9lgOhXNN1-QO9~-27T@}kqBXbk zI7gt8l<_NyZ~*Lft5$C&6JjRNP;h8=&DRi;#2IB38uh^a;w)-Y*mym}+&ydq^vVz*WUH5z;at~SZ>nB_oS-TACMT&wFu&BVBMz|6{*C!P|{pX({eu9 z0_`awHX4%Jrxb4Dop6-#q5Fi+8^xV0%}oe69Q9v45+2;ny^K@2-n&vAs{N8pyYbrF zLfPs-O0f>3wW7<@by_xevy`a=>u;%H3Ix~}in@c^&8TVy`v-6SfO<~3T~sgFWZOtT zbunViP$V}E;ASX!x~CEjH#`cK7rhsdSNT1t9pYiLw0TJP($~KeB4Z~o<;SSypU1Gc z){241Ll{yN@KuSa?4CqUp1m^3gc(1qp%ZSA{p6LpF!|6pK<|kSvzIgz+AZw10C4Ww zjBbU3op|%4rzD2SJB`Z16*(c+u=L4w#|1W7Qx6PEL`>ajh{Q(zd*2<70da34 z7o2Qe+r;ClW9<{Vnf+Sbul-M_wGWhPK~P5@W_@Rdr$2aY2G`HOwEYKoEllZsz=Za; zRQ`TWKFZ+7q!#;K^kDQFA%n~k{9CR8Sdq>*=8E1@_p!dmbfErXhE2g_q(CF8)zOI~ zi%0*#DajIOHjq;Dqm|bPxOigSFrt{AR^K4-s31lpn~1d_7{zk?I5;U}sgi;{9tmk^cg z(>F~u*$H`D;lhjZ6}jA$f)~C;R0ST&UK4TEPnDCLDy^x$c|w;H8KlE_PvTw$ z+bEaCXt5Q+==<#@e*#^UnManDZ53ZCpH!t#TJ;Eg<;D<@@^qQcgKZ39zoBS_aVVEs zX&Urzc??&XPL?ysovfHeH=zq|Y9Ue9Av(8cil#I=)Qkv05(OOpL6WT7B@I5z` zTjpa?<&4X+ZiSb|#+2@&X4m?cCFg@qKI{`*Tg7AfJBYAnmAjL;@6J{}8rvEy(%)9E zZ|~P^ug?@-S6;9)TF`RcmL(ZK#RBw6B@}+W|B=?^qYwz^I|LYmTyzfdBB`&qZ~u(} z=hN@!joW_}Om#nIdV1UZ|Ayzq=iP8|G(#5A_bh(eG2>bbuhieK*;fG((<)@Q0Qu z+?=w~q9oG+1BS1}N%uGY6+{e6f|!a)F`bb=xQqIT0b=9#erN&wfLVXM?}{2C4Jc$T z#M$+%j;>YxSQ0{X&sm(bW{QW9_ArSw3vp;!qKJOq#8&!SCMZo`)|H`KXl)L}E z6nM1%uOA?rnVOW{|LIbtBKQE-&rCarUfOkS5y5u z{ju_~r@_x6RaD!hk@f&#&W)PQ5U?WMiuVVkOoVs_SSjTz0blJs5K?1y0l{Bj4NK4p z(2=6nJ8mKf&_2TayOH>VGD}E;5(h{U?Vx?~wyqGX@#Cic5qOjEIwahS=ehEgRCY}Sbq_y_) z!eHX1eetyGIyDfnIDWf+X#g2D!q+IAJZVI)Z8n@Zewr6^2{RdK)76W57x^8QW3SAN!Rw7h2~aW5r_N8P&F<29Wbp zFfpd&E(GmDnbzG~_aRN-M#fV^vNQFox>pv)|8c);2qY5TuQVZ11z5YBXYssImG>dm zaetU+1cWD0gx@xfrWgm9S2&O*q9WV@BjM-U0YxjW2H-^xu&2x|N)FAx4?)JwXBr|3 zZ%;OG{b~CERh$yrd-W8ePj!J#Kq?q$f241+E&!uH+TTmD@ZQZq!dE8;57*;8#1b?h z+nhOH;(3kN|BxJT8GiAq|2%aKnGe0cKK-C=`vj$5a81{&Cm{Y`FqDuQX`lD|!a@;myUyGrR3(vdibiLyA%uIoAkK zxI*9YA5d&UY~mB`dL8%Gv=2lo9UM#&LKKVk^Np`*n@DOJ{K_fb@L0*dvU%P%mgteY z_iFjA99B;}^N?}8!-zb-!dQ&LuzlVHZ-Mjzt-BoI9KlHf47Te4miv^<$MUu1)#xM_ z=g+fwiVGi+N#|(5pn#S7rQ48UUd&2YFr34)e;CDi70iXCm%gy{`S}$Z?c?KFHV*kax>)TU$Hy&2Ln165Mxfcr$cNo-q z>;eegW;amI@WB9HA&OiccZ(Q#RB*pNKBFPq$qfSYZ|*fwqt|$y4krH5E^+>Nwi`@$ zXnO{53}RanqczZ~7aIO51ggLqcU_8RWtKpVue&3+TC{NlGiV7h=i6r%6;HgJ~ zs;wOfHQz`xmg>ajEz&v44xWFBDH~QE!HMlt1~c8F^zea8_(aDO+z0K7BoXl&GU}$s zD$Z6eZO}mZc2EJp6A0Ub@dJ7_v7{-b_b&2WnUv=4uA~<*9n^H4RhTwC(_ihnxzuKR zt7vY+Cyb;ZlRArJJPiE`Pacus9Ds?6JuHKW4+)w0I#uQ4@g~@I(1ZJbH+?sUocV(-*WHh^bB@r66NxiFVzpD=0!YIJU91V*~xra?BH38-(#l>;6kR#5B=1j!Zl zT>vJ^*IvJiI0I4_9xzQ4h)q1#x%zTv?+ipRH7sg;iY7Jf6ii7skgb3D%Ad!;r#}Sl zd;?0^MH(ifGY!6L?7fAA416h_L74CaR8c;P`?1$(^n%^G0CDH##f1S33EBORAG-mh zM&LYH0LH)hj0Uo{b`l9C+J){6Vi`oGk*93{LK~}}&JwBk6yYlzE({y%i0{r?^`M^N z(@PU}pO0`-8QTbh5~it&virOQ^D23xEtdEp9Gj?FFr_n4my_L*?tw|l)?ZI(2E21C zSY&}awcq@b5a~eBSimT+Eq)yTWxfKCA^!r?2@8g9FgAI2FS5djeK&8U>eAjx^cRAamuAQkdH9x$$6 zfl&{_yRYT$LMZ*?4$_`(6r^3KsT@$q1E0Eq>ekIC^2&^YZ=#z-isV#s{gw=vN}zdg z4R>M0iyUWS56z^kn5q@DAF8`ntT(ztCD58<+VtWVjkJ)T{YOf~#(>?xs2b2q7nZhT zb^W^AHke@fuCbDDdI@#W=ne}}nhj|(rvRwRzPuGM9$dc$8MUUmnQzyeB0BnjLg*BrY{xo2CBD}W!s?X4@_}T! zK+Q6{)WWNTh?9f?vx`X(Gc-Pex}Pk0ewr81Zsd)}GTn|v+76~+>MZif@l4rMFh`Bi zdG0+{9oxMCz+~PojyQ%WMTiOqA{8yQUFFV<7n{Y?p&OI6pj{`B-P{X&<@k_tm^Q`%AXSXVDNlnbH=I%Eh~R?1YzCGy={Pm zQS#)NA)=MdRDJw?9YbSj(6b`O^w;Z86Aq-Ov9EuWSH(i-*uGBB#DhQOC!QloSISzw zlJYig$6P(A&7eJdY2E(E6@H#^V4pa!8Xm`f)qva}4tm**6KUl_runtw5O!ypyN4jM zCOo!$(_$1yBK*N2;RBDG?88TQaN13YsVmSwJFu=lR%TC0no|kMVA)0#mKK6$mpt|z z-6!8`-LUE=vucRan+OWdYm=An^cD`0Bi(xBfKv-nmpSFiL3|*jn>zM9Y06$~k~d!t z45Rgsd%>3o+007FW?bz&Px zV$N*Qccc5WBU%=|-ir}1wAe5hug!jdr>(9I3+INeK4=eb5hgUVOp6LE4z&hU&L6QUc`D7U#>q zF!F6C3~4djGZ@jOFrkGw8VKvH5(S_sxdI4|H+Wd3`Ndm0q6cTeP{WI?DU+09 zWhj?(K|$xyB68VY;rZpCK>25+WWF^7jjMs^4ER1$RpN;1iEz{a*ZB}0*|Evn>8Mg` zloYiAMiUE7Z9<1dty3WVD1R}%_9$vX0QqIEV9wYX{d7i_icmEqQ$!1i7`%J}(NDuc zSxC3S<5z6@`#C{}Z{qFc9F(5pn{IaIHF;P5fwG#w4Ue_x%Pj*phKTW=C73zp#^g)#^W+3>gE)w-7#o0F&ngOLdj47x5j}Fay;o0f-+Fl4^ZaNgr=V`) zIBRFLTEv-UGx?LnCh54+4NMc1_2-9ek~m#8(T-EGfF*>(R7@e7L~ltrJxTYi6HA&` z(lj?J8H0O_9PDX_j%RZwKX%w}3$Kq$DdM~Yx@U5Cmo|#x#J-m)IH%+FZfI!pm<7|n z>9YahtigE60X}x`CCJBbRvkf;KF?d5&p>2u{Vj*cCHLKfKskB*&W9S)?T=*UN%TOs zV%LB|sfEq5@$~egD2SRGxbQ#&_I#OGKg{Ye48O;wdX?s;f2MhL_NI2GqUTrOJ+K3T zc?_$hIY9dn8LkK;7NSpsvlR}aC%zJZnSY@r8ol8m2%aeoM2>EnZio2iu+K9W`7i22 zacXkaG>jq}k{a(qO5N?iZ^eUV8YYSj?zv7#t@Rlg{33-9>faP%EEV5u+ws)2*`Lj< zF?8+TppEMr>qKLGM#y{EDvLA53=C?Sn}0^8g*pqMD0bv8Xzx~ba)`MsT6t!yM|^a0Z#vzOT!(*aQ_v2LTX zuT!HGu~?3P&v>VLLPeQc>5~9}yI7;d#~+}$pMk-YSmn2xl0=4o%?B zTZvE~-`I0{3n;Os_*^B2@*I7Y3@BeOKph0lv31Z&Nb5)goA5>m>h|@p+T!^c$KoL3 ztLyt<)oJQ}c|mQQ=y9mu9=v>ybPy^7@35)9$~hjd?imj}1%s$%!Qi*DhKM3LyO?MF zck}`kzEs~%$MW5c1=!2Hgk}O2;;Y)4B1K_vJZuL^fYQu^hPP+lHoxu1A65xq1p1)s z;PFkNFR>_-vQ7a-sN}X2HO@qHS`Z}v9WwGT6O_R z{cX-ry4BYLyyP)CH{OzpJzz9u9;13rN`G@Zg))J~X)xIPhS$)wG|o-tAgU%68MU%_ z6=W#OC)5Fm^3jU9O?}>9$eT|nIo7>*s4np;edUwZv0u+_2b=m=Ex7|G2l|-`J&B~L zXkcR(iGAOLIoREBX^uA(D~i1Tf@SPRLYM} zEz-TNA_!opr%h_c1&T^H-o{3LxYZre7E$9%;iJug>UQbbTR&s(nD8-vYJAb<5XM_D z+Pu{hckjbdPfy`l>>j_wIOX|eChZHis5*}`<9ykf#bGo-rm1QNyKq{$!`cKcoq8@) zvIDJ$0wmQ-jAyZ)SA_hIxw`353aR2%fXLqBUyn@J;(B#?F;!|dnDQdo3cfg=-7sK% zv_Ov&_oMF|zMpD=B~pPymmjswXWQvz$k2-ioI5Sh=5LOM-m6PAOAZ6?RiNwIz+xShG%jjUVXBs9o$xMNh0rMJ}1S zP7v5z#crL-qFtKb&QZ;S(Kp#2!GJZwp2_tcy=T&6-xPNS+kS_!I3(G3}#Vy zr{UJ8BYX!sTJ`XEJt{_F!)L1VYU`g*6Wq2w5=N^C;(5@j!D$)o%r*+Cuj>uX3KuIA zSx#l<6OV_xmyUBNUgtCtQFnYz#NQiRb@rv! z&RC4*A8hBj%1^qHjsy$$PUc4(RS~r*xx2Z9z#)_|_YDku=PBGQs4uIXvLyNI=?!Ii z*HuPo4j{Ce7qC{ zMwFCFe6h&vO2$d*z)QV((W5Co?U+{6JA6N(xIFO$gM-cMlS#5kIB77!ofr+YuGjKDT%*t^MCAm=*Mhk@X#hw zm@t1l-hcU-*?+%*G9;8oYFZJC6#wyA3jEK<{*OiBtOD1k5r!M{`0qC` zfqmAr!wAm;T${N*jdZI`h1@e-5a54{MCIO?^oWoMucxG=pN>Qw1-4OtvbDwb4b z_Qvn)9l-=n>@x<2Uv`T#wjo_6_8S0DSF0U2*FYflxob*Ir)(S~*E%0qbU4=NJDbSUs`%m> zQ}JS6-0RXT-L6CU>O}4c6dOM?Yn4~(IF7e$0v`HhP0TChW}Je$Q}YVjMac>f+*hv- zW{mDcMOB#bu{g*wsIJZ3aR7ljGSqIVyCiRwnw@)I);CMeHNdN6~ zReAf8LuAsLCID7#(8$C@&_i7NY6()8FQcCU@G>(=r9YCZVGZ1kRdyDE zUV(1yH-K&~h2H=Esv3Q$Y&?rEE{U|jZn@_GUP2I4>S$|f3dkmV9*y^Uj%eEHR2ao;`e5G{hU7j1blS_&Pu`N-Jf7^|jUf_%# zT#*(d|2oZ+BKkn?Vy8BS$7Xs1uxe+jpwhaKhsi`X=M@+{yjOyE=PpV-P0PM5QUWOF zeWBr7O7J{2yQ+BBev8KiwlI`X_8Lw9X$t}fxa^=JWxT5>Z6s7^ll1~j?@D4}&zkj_ zdM{;}TF*ScUaJXEg5G9A$s^Al;O8;_DC|`id(T8RJJnTu{XDxKGa&)2 z^V$6p{GZ;*hY$BY5Xf;S*NnZ4ojGsK5SE?9=&nd{#073YsmN9sa3UTBt2D^+NJ+7AFZZkJ8To=yeY> zl{o`N@b7N<>n@rKtVcl9sHNBQY;tDHM(9<>fvkm=%^;K{i$5?$iU)<5!6kv5qVnPJ zbAW0b8XgS{mOD-_g6c)@0mu{_xdH$EHz(v1g9BW%oy`qmzn`vAKD1$oJ-#DrI(=)1 zSaVXka;;T5GR~f9f>CvAh89r`%86v|Hd%T-r zpU-!>Uq>fX8j4)Yx>hxrCiS6fIATzj{u%TM`O{)l<=R09P!F?a>DN5XvJU#UoRZB!|n!072#7&Kqklc*J)e_(2n+uH?30OQRl=`3T`%DJ)v z%C=sP;!dVrNNdcfNv%?b?3R??>3gpNREDzo;LEFn&Y+R<^ZGC;s{-8wNnz{7V7>E{ z!OA8xv1XDupa)}CaslRfV6!Z$Y5EA;Rekwc6P2>U9<}LdEB*f_LWA)HSVbN2ieKw}Tv9w@KQ$W=S6FM<3}jkDXLYV*Le zCyw`?<}l#fZ|ZuUEUuJU*Bq}%2ou%jkJ3hQYfVD!aJW+;fDhkD;(0u`^`ZX!DkNgtQ>jLL_X!SZS-c*AU6N*lUXOvs+1jk;vk{gnw z&F4+-jClcFT+Y#RV^)CmWLTJ)?V?vt`!^cvn54@kZrQ*FWc20)NQkR@1VP`C6^aAZ z=t6tAPV^;zZ&`V_uT=C~6TvyIb>PLAj^id79OfrKJqg#2z7Tl77MGwX|%LSBlH(Hw~B zbnQC&j$rgif!_8YLk@wCq5>oMP)f|v zya28$vSu^aBPOidCaP`cVjI}*(n(a$v2K)@%z1Gh7WJ@aBxFz#$U|~FQ(@=Rz#$~1 zf$KNIo;-jec!aAxwERE}eo@i2E?^nSVx0i%Jaspnc_lqzzK!I=bnJao)e1ShXC*LD zT=*0A5Huu347*xeoNw?iy{(;2KqLIF57eu zdK90l36nFn6yl|gm6m>!^dh;W%hk1o99n_vgjq%v7!lMMb7BN}K?g}akF(8+&rn6X zzh>w4QjXd_sQPVsb*Z^3MpA{|Z(5ab?^b)uGu_F#@iF%>4HxZJPGE*24kKQ^?{yHq z7GY+Z&Vbl*P5cd^LSZFfX|zEBt!IqyG!BjQs{!W%t%{!RX!hm|)yTOff3(y?mq~v{ zxxE|foG?2iuFcx5Z&h}Kk~ZfrvVAlY=M?87^{k0tpYQ7 z%InDTiUYcX3Pq)yikY?H95f*^)r@JXL}OHfw-io5{S;~!+~F6F6} zBJ@}DN@rCG19hJhnL#$V?1#U(U7)MCf~~{4jXU}Jr!nm_KmfePOuY(U)YCaZCdqeM zyU$gcrUy?OHRx+qhDz?Ei(16EI%#;M1*q&$X3^1cd7e98rG7?eCYn?eYcxXpN_YPb zKC_hYNKU3&v1?P5=FmL_VP^{+2FWyc(6aKVJCaC%>D1@jp^|U{Bm|{^rFp z**{jz0Nji1277ng63<$yZ!)R~qs-?JvTVbyX`VDkwIms%{gr;C2c|NEA!v>-eGyV6 z!|y~%PEHm_n5#Z8#XHzwG?F&A(8>vO1d32{ZBFzpuE{z12+_Mwvx2!UK=+XOJypvZ zPpX=7kx~x9FK+JH8O6RW)j7(*=2m@Iqdo>L%LZZ zd*wih+7PmFpCm}AP&?@EXXq}g={Q6xiD-(|q3UaDfT+qVd%r+(FHU(VXF4ck*rI^Z zs$g-&xetRNM71eaTyk@lC=@SO#HfZ&tN<^h%sP=icH~uw<-hh^4ii&PknaCdhx0;VZAi2G=|(BLIzrWO|Yx8A3Q zi{>-WGD!6RlFN(|_hFDF0ktYxj{Z!=#$kZ~AIVodglI8T0QS|F7=I%G{$wn=%z$4I zGpyn`1S_(96h?edd*h|?liJv_?cH0~KQz6l$xCQ`sT$3L6pVba&PB@GwYmDu74 zu`faSIaCs%r5`R|3}>tKTYQFRh zsmd3rS5i4PKKHvZXE5D#o}86Ba5vc_T@7DT6(U+!t#xhI9e8p^7@C(b(y3HSIgI9d zy1#lUUY?(K&P9ry7XS^UENUewDkgVtKX zQBnAAYZ0!3Op)c7ABJD`ZfYlOzy;$oGgJqorjM(`Iy~h!?6Pzl5`oC<1uP=v2xM+= zteNPm-}A+}>@*^u#X6U+0;*X=2?bKeROFxDzym*8nVe6W)@sjTT8b}m;@sE`Y9N>b zb2#Um&Xf{Ql}pcn_kd||Sc*ooH{|nkv#zq&L|JxPSh?lVnvpe|ecHOox3&vrx;&E3*u!k-l0%Rg?4F zn$4y~n1D`_)An?K7wApkceR#_XqrlAT-^0RCN(WNH%!|^!yoZx#eH02Ukz1x&9PK! z(?D~;Q{nt-LfQLsek*(LTT_e}nJa=2;!uaUo+@DO$N^GYe?CY@9k#%`ywd$}dUv}h zrXt=-ZA_?c3RTo1ZkvtzbD2eJ8;B_As<~H}vclzr##f2bV4!_Me3`O#M20=efogLi zD;A$Kp<2>S$70e#&@j^xGXWv^liQeDzaV`1hX`x!rowNv zRvrhYufvbmw52ZSHM9^;mBK#YK>b2YHMR5hyYYd4neoL*bJ!twOk3!*C z>uGmvl-puQBi>=fLmV?#0?a$jnJSIJN>fMp1LjkGZw_9m@2@(R6Q+jT2Y7)!$`riT6O<9VCt(m@)_2YR{GD zVcBo)@D_JhwnUa6p;^y*MW|}7o~rDB99&j!t3r4W3fXa2=#Yl>rQH?SoX4)H&)T_i zR=y?UPB~I_0`cLMrQXEE;arwwOfsQw8Dyia>jEx-)X6u6o~k^>r)ORXKlRlXFwe`Y z9<(~%|6?)o)ZkSX->qa6oZ+LRHii}F3&FXY8ru>;&u~f6?gr>VR@Xm=Xn!B-K=mY{&VG^ty0dZ(>IBL z3#2g1+$qLW`G!n*8hJ_?!j2p}#0x>HXT+74bAno^1C#(R&(mg06yq6JGV4ok$x$U9 zsdrEIluH>tS}(chw52WlglA(+hCik+O^RSfYZRe)B>x?0q)H$tlG57BYyrBdkvWal#1@(CZR2Vq-}GbVX)49CD|cmqOQ)H)dLQbmCA` z&Ry?7^PxA04+N(r$FJprpoom4d1?1|n1mUnkE$9UgqiTa!c1$#01RS$s~lgtIl}J# zH(tUD9rI8(z!s2BaDs-%a!&g8tx@DQln+D%v?!YA)TUiwEVm^xn&MqAi4@hIjLOh< zp7YO_OB6zqDLV&!O?aRBF&OCz^K~sy8T=aIhx-MPTJZeBjs|)r^R*M7Ypw5AZ5ABV zF*%Ug-+{15WHtvTf3f+(P*TAOWca*gtNVJrd!?fYq}c9*D;6| zXc^0FQzH@t-Hg*ejMl9QDlK7tR(P1zh4&Ss^FN?N>>754h_e2aN zonFvpONSE}rM792mv_$-BmdB~i&8)>px3I7R02eP_Od9!)&Z{)otjPrez`}HBz#1^ z+F|}w72;cD>v{Oxp;o_@@{;h8B3P8oGKby9JPTQj_#;!4Yd>>(W_p@JP{h*)^*so{ ze_5Hv;m_&;tY*=xK{1ymo-|^jgs|C-4YVlNe4nCSdCa3d`4~9w+ZT7$|G#Sw?MG ziy>l-DJU9n@aQXhJQ|U!xLP(+{)8tM_w#kN>V)UQrA#EO8a_t6HK4`*@(LS(XEL;y*1X#b02D{sAKX{?z;(>)y+Ji7{9IA&;K@+gevJLXftcWWR{kGzvbC2d8 zV5Wa>`)Jw9?y=W7_?xHvb-<&6=_MF6`0s^eZ;^I{H`qEHczC}(sDJxR&}$vo`fpZj zOEZIi1~&g=_5Cfq2svN)E7adU-Y+lk_wN=#g25sYT>XFWryoHWb?i<5~|G(|k!rEa|Gqa)$fnR0pzaOXHkI`GLBs&X>_2O@NX1|35|KsTui6j?z p4F?B@{^ZdEW%3 Date: Mon, 11 Mar 2024 16:37:12 +0100 Subject: [PATCH 045/255] docs(README): update badges, links [ci skip] --- README.md | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index d6d6521d40..8237049814 100644 --- a/README.md +++ b/README.md @@ -1,17 +1,20 @@ ## 🐸Coqui.ai News - 📣 ⓍTTSv2 is here with 16 languages and better performance across the board. -- 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech). +- 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/eginhard/coqui-tts/tree/dev/recipes/ljspeech). - 📣 ⓍTTS can now stream with <200ms latency. -- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://tts.readthedocs.io/en/dev/models/xtts.html) -- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://tts.readthedocs.io/en/dev/models/bark.html) +- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released +- [Blog Post](https://coqui.ai/blog/tts/open_xtts), +- [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/dev/models/xtts.html) +- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference +- with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/bark.html) - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. -- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://tts.readthedocs.io/en/dev/models/tortoise.html) +- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/tortoise.html)
-## +## **🐸TTS is a library for advanced Text-to-Speech generation.** @@ -26,22 +29,14 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) [![License]()](https://opensource.org/licenses/MPL-2.0) [![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS) -[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/coqui-ai/TTS/blob/master/CODE_OF_CONDUCT.md) +[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/eginhard/coqui-tts/blob/master/CODE_OF_CONDUCT.md) [![Downloads](https://pepy.tech/badge/tts)](https://pepy.tech/project/tts) [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/aux_tests.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/data_tests.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/docker.yaml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/inference_tests.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/style_check.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/text_tests.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/tts_tests.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/vocoder_tests.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/zoo_tests0.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/zoo_tests1.yml/badge.svg) -![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/zoo_tests2.yml/badge.svg) -[![Docs]()](https://tts.readthedocs.io/en/latest/) +![GithubActions](https://github.com/eginhard/coqui-tts/actions/workflows/tests.yml/badge.svg) +![GithubActions](https://github.com/eginhard/coqui-tts/actions/workflows/docker.yaml/badge.svg) +![GithubActions](https://github.com/eginhard/coqui-tts/actions/workflows/style_check.yml/badge.svg) +[![Docs]()](https://coqui-tts.readthedocs.io/en/latest/)
@@ -57,8 +52,8 @@ Please use our dedicated channels for questions and discussion. Help is much mor | 👩‍💻 **Usage Questions** | [GitHub Discussions] | | 🗯 **General Discussion** | [GitHub Discussions] or [Discord] | -[github issue tracker]: https://github.com/coqui-ai/tts/issues -[github discussions]: https://github.com/coqui-ai/TTS/discussions +[github issue tracker]: https://github.com/eginhard/coqui-tts/issues +[github discussions]: https://github.com/eginhard/coqui-tts/discussions [discord]: https://discord.gg/5eXr5seRrv [Tutorials and Examples]: https://github.com/coqui-ai/TTS/wiki/TTS-Notebooks-and-Tutorials @@ -66,9 +61,9 @@ Please use our dedicated channels for questions and discussion. Help is much mor ## 🔗 Links and Resources | Type | Links | | ------------------------------- | --------------------------------------- | -| 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/) -| 💾 **Installation** | [TTS/README.md](https://github.com/coqui-ai/TTS/tree/dev#installation)| -| 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/coqui-ai/TTS/blob/main/CONTRIBUTING.md)| +| 💼 **Documentation** | [ReadTheDocs](https://coqui-tts.readthedocs.io/en/latest/) +| 💾 **Installation** | [TTS/README.md](https://github.com/eginhard/coqui-tts/tree/dev#installation)| +| 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/eginhard/coqui-tts/blob/main/CONTRIBUTING.md)| | 📌 **Road Map** | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378) | 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)| | 📰 **Papers** | [TTS Papers](https://github.com/erogol/TTS-papers)| @@ -140,7 +135,7 @@ You can also help us implement more models. ## Installation 🐸TTS is tested on Ubuntu 18.04 with **python >= 3.9, < 3.12.**. -If you are only interested in [synthesizing speech](https://tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option. +If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option. ```bash pip install TTS @@ -149,7 +144,7 @@ pip install TTS If you plan to code or train models, clone 🐸TTS and install it locally. ```bash -git clone https://github.com/coqui-ai/TTS +git clone https://github.com/eginhard/coqui-tts pip install -e .[all,dev,notebooks,server] # Select the relevant extras ``` @@ -174,7 +169,8 @@ python3 TTS/server/server.py --model_name tts_models/en/vctk/vits # To start a s ``` You can then enjoy the TTS server [here](http://[::1]:5002/) -More details about the docker images (like GPU support) can be found [here](https://tts.readthedocs.io/en/latest/docker_images.html) +More details about the docker images (like GPU support) can be found +[here](https://coqui-tts.readthedocs.io/en/latest/docker_images.html) ## Synthesizing speech by 🐸TTS From bdbfc23e5cdfe53cff40e69f806c0d7a24b77bbc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 12 Mar 2024 14:53:49 +0100 Subject: [PATCH 046/255] docs(README): fix list format --- README.md | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 8237049814..11b1ddb701 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,8 @@ - 📣 ⓍTTSv2 is here with 16 languages and better performance across the board. - 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/eginhard/coqui-tts/tree/dev/recipes/ljspeech). - 📣 ⓍTTS can now stream with <200ms latency. -- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released -- [Blog Post](https://coqui.ai/blog/tts/open_xtts), -- [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/dev/models/xtts.html) -- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference -- with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/bark.html) +- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/dev/models/xtts.html) +- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/bark.html) - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. - 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/tortoise.html) From a7753708fbcb6ce88eb9bab3c114711e0735bc2b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 12 Mar 2024 15:06:42 +0100 Subject: [PATCH 047/255] refactor: remove duplicate methods available in Trainer --- TTS/bin/extract_tts_spectrograms.py | 2 +- TTS/bin/train_encoder.py | 2 +- TTS/encoder/utils/training.py | 3 +- TTS/utils/generic_utils.py | 99 +------------------------- tests/__init__.py | 3 +- tests/tts_tests/test_tacotron_model.py | 6 +- tests/tts_tests2/test_glow_tts.py | 6 +- tests/vc_tests/test_freevc.py | 6 +- 8 files changed, 10 insertions(+), 117 deletions(-) diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py index c6048626b3..16ad36b8dc 100755 --- a/TTS/bin/extract_tts_spectrograms.py +++ b/TTS/bin/extract_tts_spectrograms.py @@ -8,6 +8,7 @@ import torch from torch.utils.data import DataLoader from tqdm import tqdm +from trainer.generic_utils import count_parameters from TTS.config import load_config from TTS.tts.datasets import TTSDataset, load_tts_samples @@ -16,7 +17,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import quantize -from TTS.utils.generic_utils import count_parameters use_cuda = torch.cuda.is_available() diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index a32ad00f56..6a8cd7b444 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -8,6 +8,7 @@ import torch from torch.utils.data import DataLoader +from trainer.generic_utils import count_parameters, remove_experiment_folder from trainer.io import copy_model_files, save_best_model, save_checkpoint from trainer.torch import NoamLR from trainer.trainer_utils import get_optimizer @@ -18,7 +19,6 @@ from TTS.encoder.utils.visual import plot_embeddings from TTS.tts.datasets import load_tts_samples from TTS.utils.audio import AudioProcessor -from TTS.utils.generic_utils import count_parameters, remove_experiment_folder from TTS.utils.samplers import PerfectBatchSampler from TTS.utils.training import check_update diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py index ff8f271d80..7692478d6b 100644 --- a/TTS/encoder/utils/training.py +++ b/TTS/encoder/utils/training.py @@ -3,13 +3,14 @@ from coqpit import Coqpit from trainer import TrainerArgs, get_last_checkpoint +from trainer.generic_utils import get_experiment_folder_path from trainer.io import copy_model_files from trainer.logging import logger_factory from trainer.logging.console_logger import ConsoleLogger from TTS.config import load_config, register_config from TTS.tts.utils.text.characters import parse_symbols -from TTS.utils.generic_utils import get_experiment_folder_path, get_git_branch +from TTS.utils.generic_utils import get_git_branch @dataclass diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 4fa4741ab7..e0cd3ad85f 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -9,26 +9,8 @@ from pathlib import Path from typing import Dict -import fsspec -import torch - - -def to_cuda(x: torch.Tensor) -> torch.Tensor: - if x is None: - return None - if torch.is_tensor(x): - x = x.contiguous() - if torch.cuda.is_available(): - x = x.cuda(non_blocking=True) - return x - - -def get_cuda(): - use_cuda = torch.cuda.is_available() - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - return use_cuda, device - +# TODO: This method is duplicated in Trainer but out of date there def get_git_branch(): try: out = subprocess.check_output(["git", "branch"]).decode("utf8") @@ -41,47 +23,6 @@ def get_git_branch(): return current -def get_commit_hash(): - """https://stackoverflow.com/questions/14989858/get-the-current-git-hash-in-a-python-script""" - # try: - # subprocess.check_output(['git', 'diff-index', '--quiet', - # 'HEAD']) # Verify client is clean - # except: - # raise RuntimeError( - # " !! Commit before training to get the commit hash.") - try: - commit = subprocess.check_output(["git", "rev-parse", "--short", "HEAD"]).decode().strip() - # Not copying .git folder into docker container - except (subprocess.CalledProcessError, FileNotFoundError): - commit = "0000000" - return commit - - -def get_experiment_folder_path(root_path, model_name): - """Get an experiment folder path with the current date and time""" - date_str = datetime.datetime.now().strftime("%B-%d-%Y_%I+%M%p") - commit_hash = get_commit_hash() - output_folder = os.path.join(root_path, model_name + "-" + date_str + "-" + commit_hash) - return output_folder - - -def remove_experiment_folder(experiment_path): - """Check folder if there is a checkpoint, otherwise remove the folder""" - fs = fsspec.get_mapper(experiment_path).fs - checkpoint_files = fs.glob(experiment_path + "/*.pth") - if not checkpoint_files: - if fs.exists(experiment_path): - fs.rm(experiment_path, recursive=True) - print(" ! Run is removed from {}".format(experiment_path)) - else: - print(" ! Run is kept in {}".format(experiment_path)) - - -def count_parameters(model): - r"""Count number of trainable parameters in a network""" - return sum(p.numel() for p in model.parameters() if p.requires_grad) - - def to_camel(text): text = text.capitalize() text = re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) @@ -182,44 +123,6 @@ def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict: return kwargs -class KeepAverage: - def __init__(self): - self.avg_values = {} - self.iters = {} - - def __getitem__(self, key): - return self.avg_values[key] - - def items(self): - return self.avg_values.items() - - def add_value(self, name, init_val=0, init_iter=0): - self.avg_values[name] = init_val - self.iters[name] = init_iter - - def update_value(self, name, value, weighted_avg=False): - if name not in self.avg_values: - # add value if not exist before - self.add_value(name, init_val=value) - else: - # else update existing value - if weighted_avg: - self.avg_values[name] = 0.99 * self.avg_values[name] + 0.01 * value - self.iters[name] += 1 - else: - self.avg_values[name] = self.avg_values[name] * self.iters[name] + value - self.iters[name] += 1 - self.avg_values[name] /= self.iters[name] - - def add_values(self, name_dict): - for key, value in name_dict.items(): - self.add_value(key, init_val=value) - - def update_values(self, value_dict): - for key, value in value_dict.items(): - self.update_value(key, value) - - def get_timestamp(): return datetime.now().strftime("%y%m%d-%H%M%S") diff --git a/tests/__init__.py b/tests/__init__.py index e102a2dfee..f0a8b2f118 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1,7 +1,8 @@ import os +from trainer.generic_utils import get_cuda + from TTS.config import BaseDatasetConfig -from TTS.utils.generic_utils import get_cuda def get_device_id(): diff --git a/tests/tts_tests/test_tacotron_model.py b/tests/tts_tests/test_tacotron_model.py index 2ca068f6fe..7ec3f0df1b 100644 --- a/tests/tts_tests/test_tacotron_model.py +++ b/tests/tts_tests/test_tacotron_model.py @@ -4,6 +4,7 @@ import torch from torch import nn, optim +from trainer.generic_utils import count_parameters from tests import get_tests_input_path from TTS.tts.configs.shared_configs import CapacitronVAEConfig, GSTConfig @@ -24,11 +25,6 @@ WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") -def count_parameters(model): - r"""Count number of trainable parameters in a network""" - return sum(p.numel() for p in model.parameters() if p.requires_grad) - - class TacotronTrainTest(unittest.TestCase): @staticmethod def test_train_step(): diff --git a/tests/tts_tests2/test_glow_tts.py b/tests/tts_tests2/test_glow_tts.py index 2a723f105f..b93e701f19 100644 --- a/tests/tts_tests2/test_glow_tts.py +++ b/tests/tts_tests2/test_glow_tts.py @@ -4,6 +4,7 @@ import torch from torch import optim +from trainer.generic_utils import count_parameters from trainer.logging.tensorboard_logger import TensorboardLogger from tests import get_tests_data_path, get_tests_input_path, get_tests_output_path @@ -26,11 +27,6 @@ BATCH_SIZE = 3 -def count_parameters(model): - r"""Count number of trainable parameters in a network""" - return sum(p.numel() for p in model.parameters() if p.requires_grad) - - class TestGlowTTS(unittest.TestCase): @staticmethod def _create_inputs(batch_size=8): diff --git a/tests/vc_tests/test_freevc.py b/tests/vc_tests/test_freevc.py index c9e6cedf11..c90551b494 100644 --- a/tests/vc_tests/test_freevc.py +++ b/tests/vc_tests/test_freevc.py @@ -2,6 +2,7 @@ import unittest import torch +from trainer.generic_utils import count_parameters from tests import get_tests_input_path from TTS.vc.models.freevc import FreeVC, FreeVCConfig @@ -19,11 +20,6 @@ BATCH_SIZE = 3 -def count_parameters(model): - r"""Count number of trainable parameters in a network""" - return sum(p.numel() for p in model.parameters() if p.requires_grad) - - class TestFreeVC(unittest.TestCase): def _create_inputs(self, config, batch_size=2): input_dummy = torch.rand(batch_size, 30 * config.audio["hop_length"]).to(device) From 89a061f1d1ba3a97f5176fdca16eb2d0a2f1f0b6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 12 Mar 2024 18:06:50 +0100 Subject: [PATCH 048/255] docs(tts.models.vits): clarify use of discriminator/generator [ci skip] --- TTS/tts/models/vits.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index e91d26b9ed..b376f74204 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1233,7 +1233,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int) -> T Args: batch (Dict): Input tensors. criterion (nn.Module): Loss layer designed for the model. - optimizer_idx (int): Index of optimizer to use. 0 for the generator and 1 for the discriminator networks. + optimizer_idx (int): Index of optimizer to use. 0 for the discriminator and 1 for the generator networks. Returns: Tuple[Dict, Dict]: Model ouputs and computed losses. @@ -1651,13 +1651,16 @@ def get_data_loader( def get_optimizer(self) -> List: """Initiate and return the GAN optimizers based on the config parameters. - It returnes 2 optimizers in a list. First one is for the generator and the second one is for the discriminator. + + It returns 2 optimizers in a list. First one is for the discriminator + and the second one is for the generator. + Returns: List: optimizers. """ - # select generator parameters optimizer0 = get_optimizer(self.config.optimizer, self.config.optimizer_params, self.config.lr_disc, self.disc) + # select generator parameters gen_parameters = chain(params for k, params in self.named_parameters() if not k.startswith("disc.")) optimizer1 = get_optimizer( self.config.optimizer, self.config.optimizer_params, self.config.lr_gen, parameters=gen_parameters From e95f8950eb673ee7c37a49bdf2542def4aef5f47 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 13 Mar 2024 12:06:27 +0100 Subject: [PATCH 049/255] fix: torch.stft will soon require return_complex=True Refactor that removes the deprecation warning: torch.view_as_real(torch.stft(*, return_complex=True)) is equal to torch.stft(*, return_complex=False) https://pytorch.org/docs/stable/generated/torch.stft.html --- TTS/tts/models/delightful_tts.py | 48 +++++++++++++------------ TTS/tts/models/vits.py | 48 +++++++++++++------------ TTS/utils/audio/torch_transforms.py | 24 +++++++------ TTS/vc/modules/freevc/mel_processing.py | 48 +++++++++++++------------ 4 files changed, 91 insertions(+), 77 deletions(-) diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index b1cf886bea..a4aa563f48 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -179,17 +179,19 @@ def _wav_to_spec(y, n_fft, hop_length, win_length, center=False): ) y = y.squeeze(1) - spec = torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_length, + win_length=win_length, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) ) return spec @@ -274,17 +276,19 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm ) y = y.squeeze(1) - spec = torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_length, + win_length=win_length, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) ) spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index e91d26b9ed..0d2187d206 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -121,17 +121,19 @@ def wav_to_spec(y, n_fft, hop_length, win_length, center=False): ) y = y.squeeze(1) - spec = torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_length, + win_length=win_length, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) ) spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) @@ -189,17 +191,19 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm ) y = y.squeeze(1) - spec = torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_length, + win_length=win_length, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) ) spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) diff --git a/TTS/utils/audio/torch_transforms.py b/TTS/utils/audio/torch_transforms.py index fd40ebb048..632969c51a 100644 --- a/TTS/utils/audio/torch_transforms.py +++ b/TTS/utils/audio/torch_transforms.py @@ -119,17 +119,19 @@ def __call__(self, x): padding = int((self.n_fft - self.hop_length) / 2) x = torch.nn.functional.pad(x, (padding, padding), mode="reflect") # B x D x T x 2 - o = torch.stft( - x.squeeze(1), - self.n_fft, - self.hop_length, - self.win_length, - self.window, - center=True, - pad_mode="reflect", # compatible with audio.py - normalized=self.normalized, - onesided=True, - return_complex=False, + o = torch.view_as_real( + torch.stft( + x.squeeze(1), + self.n_fft, + self.hop_length, + self.win_length, + self.window, + center=True, + pad_mode="reflect", # compatible with audio.py + normalized=self.normalized, + onesided=True, + return_complex=True, + ) ) M = o[:, :, :, 0] P = o[:, :, :, 1] diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/modules/freevc/mel_processing.py index 2dcbf21493..1955e758ac 100644 --- a/TTS/vc/modules/freevc/mel_processing.py +++ b/TTS/vc/modules/freevc/mel_processing.py @@ -54,17 +54,19 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False) ) y = y.squeeze(1) - spec = torch.stft( - y, - n_fft, - hop_length=hop_size, - win_length=win_size, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) ) spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) @@ -104,17 +106,19 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, ) y = y.squeeze(1) - spec = torch.stft( - y, - n_fft, - hop_length=hop_size, - win_length=win_size, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) ) spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) From adbcba06dac530ce410f213a5a56e773c7f63b84 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 14 Mar 2024 20:48:29 +0100 Subject: [PATCH 050/255] refactor(dataset): get audio length with torchaudio Removes a (GPL) dependency --- TTS/tts/datasets/dataset.py | 8 ++++---- requirements.txt | 1 - 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 9d0c45add9..257d1c3100 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -4,9 +4,9 @@ import random from typing import Dict, List, Union -import mutagen import numpy as np import torch +import torchaudio import tqdm from torch.utils.data import Dataset @@ -43,15 +43,15 @@ def string2filename(string): return filename -def get_audio_size(audiopath): +def get_audio_size(audiopath) -> int: + """Return the number of samples in the audio file.""" extension = audiopath.rpartition(".")[-1].lower() if extension not in {"mp3", "wav", "flac"}: raise RuntimeError( f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!" ) - audio_info = mutagen.File(audiopath).info - return int(audio_info.length * audio_info.sample_rate) + return torchaudio.info(audiopath).num_frames class TTSDataset(Dataset): diff --git a/requirements.txt b/requirements.txt index 6d5fbc245f..a01efaa648 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,6 @@ anyascii>=0.3.0 pyyaml>=6.0 fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail packaging>=23.1 -mutagen==1.47.0 # deps for inference pysbd>=0.3.4 # deps for notebooks From 7630abb43fbe62ceddb43bbfbfc739d061a1914c Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 18 Nov 2023 14:11:01 +0100 Subject: [PATCH 051/255] refactor(bin.find_unique_chars): use existing function --- TTS/bin/find_unique_chars.py | 14 ++------------ TTS/tts/datasets/__init__.py | 2 +- 2 files changed, 3 insertions(+), 13 deletions(-) diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index 81f2f4465f..f476ca5ddb 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -4,7 +4,7 @@ from argparse import RawTextHelpFormatter from TTS.config import load_config -from TTS.tts.datasets import load_tts_samples +from TTS.tts.datasets import find_unique_chars, load_tts_samples def main(): @@ -29,17 +29,7 @@ def main(): ) items = train_items + eval_items - - texts = "".join(item["text"] for item in items) - chars = set(texts) - lower_chars = filter(lambda c: c.islower(), chars) - chars_force_lower = [c.lower() for c in chars] - chars_force_lower = set(chars_force_lower) - - print(f" > Number of unique characters: {len(chars)}") - print(f" > Unique characters: {''.join(sorted(chars))}") - print(f" > Unique lower characters: {''.join(sorted(lower_chars))}") - print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}") + find_unique_chars(items) if __name__ == "__main__": diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py index 192138561f..4f354fa0be 100644 --- a/TTS/tts/datasets/__init__.py +++ b/TTS/tts/datasets/__init__.py @@ -167,7 +167,7 @@ def _get_formatter_by_name(name): def find_unique_chars(data_samples, verbose=True): - texts = "".join(item[0] for item in data_samples) + texts = "".join(item["text"] for item in data_samples) chars = set(texts) lower_chars = filter(lambda c: c.islower(), chars) chars_force_lower = [c.lower() for c in chars] From d76d0eff1c76816eebae5ecbcd39a0e197eaf378 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 30 Mar 2024 22:29:01 +0100 Subject: [PATCH 052/255] ci(tests.yml): run apt-get update before installing espeak https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/customizing-github-hosted-runners#installing-software-on-ubuntu-runners --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index da5352d1fc..20308aab6c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,6 +30,7 @@ jobs: - name: Install Espeak if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) run: | + sudo apt-get update sudo apt-get install espeak espeak-ng - name: Install dependencies run: | From d7727241255adeb78bc6ebe72005837ff0799c1f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 3 Apr 2024 11:28:22 +0200 Subject: [PATCH 053/255] fix: update repository links, package names, metadata --- CITATION.cff | 4 +-- CONTRIBUTING.md | 26 +++++++++---------- README.md | 10 ++++--- TTS/server/README.md | 4 +-- TTS/server/server.py | 2 +- TTS/server/templates/index.html | 4 +-- TTS/tts/models/xtts.py | 6 ++--- TTS/tts/utils/text/phonemizers/__init__.py | 2 +- docs/source/faq.md | 4 +-- docs/source/inference.md | 10 +++---- docs/source/installation.md | 14 +++++----- docs/source/main_classes/trainer_api.md | 2 +- docs/source/models/xtts.md | 14 ++++------ docs/source/tutorial_for_nervous_beginners.md | 12 ++++----- notebooks/Tutorial_1_use-pretrained-TTS.ipynb | 2 +- ...utorial_2_train_your_first_TTS_model.ipynb | 4 +-- recipes/bel-alex73/README.md | 2 +- setup.py | 16 +++++++----- 18 files changed, 70 insertions(+), 68 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 6b0c8f19af..28eb65e23c 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,8 +10,8 @@ authors: version: 1.4 doi: 10.5281/zenodo.6334862 license: "MPL-2.0" -url: "https://www.coqui.ai" -repository-code: "https://github.com/coqui-ai/TTS" +url: "https://github.com/eginhard/coqui-tts" +repository-code: "https://github.com/eginhard/coqui-tts" keywords: - machine learning - deep learning diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a83b8c8296..8a0fe3904a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ Welcome to the 🐸TTS! -This repository is governed by [the Contributor Covenant Code of Conduct](https://github.com/coqui-ai/TTS/blob/main/CODE_OF_CONDUCT.md). +This repository is governed by [the Contributor Covenant Code of Conduct](https://github.com/eginhard/coqui-tts/blob/main/CODE_OF_CONDUCT.md). ## Where to start. We welcome everyone who likes to contribute to 🐸TTS. @@ -15,13 +15,13 @@ If you like to contribute code, squash a bug but if you don't know where to star You can pick something out of our road map. We keep the progess of the project in this simple issue thread. It has new model proposals or developmental updates etc. -- [Github Issues Tracker](https://github.com/coqui-ai/TTS/issues) +- [Github Issues Tracker](https://github.com/eginhard/coqui-tts/issues) This is a place to find feature requests, bugs. Issues with the ```good first issue``` tag are good place for beginners to take on. -- ✨**PR**✨ [pages](https://github.com/coqui-ai/TTS/pulls) with the ```🚀new version``` tag. +- ✨**PR**✨ [pages](https://github.com/eginhard/coqui-tts/pulls) with the ```🚀new version``` tag. We list all the target improvements for the next version. You can pick one of them and start contributing. @@ -46,14 +46,14 @@ Let us know if you encounter a problem along the way. The following steps are tested on an Ubuntu system. -1. Fork 🐸TTS[https://github.com/coqui-ai/TTS] by clicking the fork button at the top right corner of the project page. +1. Fork 🐸TTS[https://github.com/eginhard/coqui-tts] by clicking the fork button at the top right corner of the project page. 2. Clone 🐸TTS and add the main repo as a new remote named ```upstream```. ```bash - $ git clone git@github.com:/TTS.git - $ cd TTS - $ git remote add upstream https://github.com/coqui-ai/TTS.git + $ git clone git@github.com:/coqui-tts.git + $ cd coqui-tts + $ git remote add upstream https://github.com/eginhard/coqui-tts.git ``` 3. Install 🐸TTS for development. @@ -105,7 +105,7 @@ The following steps are tested on an Ubuntu system. ```bash $ git fetch upstream - $ git rebase upstream/master + $ git rebase upstream/main # or for the development version $ git rebase upstream/dev ``` @@ -124,7 +124,7 @@ The following steps are tested on an Ubuntu system. 13. Let's discuss until it is perfect. 💪 - We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/coqui-ai/TTS/pulls]. + We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/eginhard/coqui-tts/pulls]. 14. Once things look perfect, We merge it to the ```dev``` branch and make it ready for the next version. @@ -132,14 +132,14 @@ The following steps are tested on an Ubuntu system. If you prefer working within a Docker container as your development environment, you can do the following: -1. Fork 🐸TTS[https://github.com/coqui-ai/TTS] by clicking the fork button at the top right corner of the project page. +1. Fork 🐸TTS[https://github.com/eginhard/coqui-tts] by clicking the fork button at the top right corner of the project page. 2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```. ```bash - $ git clone git@github.com:/TTS.git - $ cd TTS - $ git remote add upstream https://github.com/coqui-ai/TTS.git + $ git clone git@github.com:/coqui-tts.git + $ cd coqui-tts + $ git remote add upstream https://github.com/eginhard/coqui-tts.git ``` 3. Build the Docker Image as your development environment (it installs all of the dependencies for you): diff --git a/README.md b/README.md index 11b1ddb701..782b48ab69 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) [![License]()](https://opensource.org/licenses/MPL-2.0) [![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS) -[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/eginhard/coqui-tts/blob/master/CODE_OF_CONDUCT.md) +[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/eginhard/coqui-tts/blob/main/CODE_OF_CONDUCT.md) [![Downloads](https://pepy.tech/badge/tts)](https://pepy.tech/project/tts) [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440) @@ -62,7 +62,7 @@ Please use our dedicated channels for questions and discussion. Help is much mor | 💾 **Installation** | [TTS/README.md](https://github.com/eginhard/coqui-tts/tree/dev#installation)| | 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/eginhard/coqui-tts/blob/main/CONTRIBUTING.md)| | 📌 **Road Map** | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378) -| 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)| +| 🚀 **Released Models** | [Standard models](https://github.com/eginhard/coqui-tts/blob/dev/TTS/.models.json) and [Fairseq models in ~1100 languages](https://github.com/eginhard/coqui-tts#example-text-to-speech-using-fairseq-models-in-1100-languages-)| | 📰 **Papers** | [TTS Papers](https://github.com/erogol/TTS-papers)| ## Features @@ -135,7 +135,7 @@ You can also help us implement more models. If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option. ```bash -pip install TTS +pip install coqui-tts ``` If you plan to code or train models, clone 🐸TTS and install it locally. @@ -152,7 +152,9 @@ $ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you $ make install ``` -If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/how-can-i-run-mozilla-tts-coqui-tts-training-with-cuda-on-a-windows-system). +If you are on Windows, 👑@GuyPaddock wrote installation instructions +[here](https://stackoverflow.com/questions/66726331/how-can-i-run-mozilla-tts-coqui-tts-training-with-cuda-on-a-windows-system) +(note that these are out of date, e.g. you need to have at least Python 3.9). ## Docker Image diff --git a/TTS/server/README.md b/TTS/server/README.md index 9536e0d55a..f5df08011b 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -1,8 +1,8 @@ # :frog: TTS demo server Before you use the server, make sure you -[install](https://github.com/coqui-ai/TTS/tree/dev#install-tts)) :frog: TTS +[install](https://github.com/eginhard/coqui-tts/tree/dev#install-tts)) :frog: TTS properly and install the additional dependencies with `pip install -TTS[server]`. Then, you can follow the steps below. +coqui-tts[server]`. Then, you can follow the steps below. **Note:** If you install :frog:TTS using ```pip```, you can also use the ```tts-server``` end point on the terminal. diff --git a/TTS/server/server.py b/TTS/server/server.py index d117494060..01bd79a137 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -12,7 +12,7 @@ try: from flask import Flask, render_template, render_template_string, request, send_file except ImportError as e: - raise ImportError("Server requires requires flask, use `pip install TTS[server]`.") from e + raise ImportError("Server requires requires flask, use `pip install coqui-tts[server]`.") from e from TTS.config import load_config from TTS.utils.manage import ModelManager diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html index 6354d3919d..f5f547c7bf 100644 --- a/TTS/server/templates/index.html +++ b/TTS/server/templates/index.html @@ -30,7 +30,7 @@ -
Fork me on GitHub @@ -151,4 +151,4 @@ - \ No newline at end of file + diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index e42288fe83..1c73c42ce9 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -695,12 +695,12 @@ def inference_stream( def forward(self): raise NotImplementedError( - "XTTS has a dedicated trainer, please check the XTTS docs: https://tts.readthedocs.io/en/dev/models/xtts.html#training" + "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/dev/models/xtts.html#training" ) def eval_step(self): raise NotImplementedError( - "XTTS has a dedicated trainer, please check the XTTS docs: https://tts.readthedocs.io/en/dev/models/xtts.html#training" + "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/dev/models/xtts.html#training" ) @staticmethod @@ -789,5 +789,5 @@ def load_checkpoint( def train_step(self): raise NotImplementedError( - "XTTS has a dedicated trainer, please check the XTTS docs: https://tts.readthedocs.io/en/dev/models/xtts.html#training" + "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/dev/models/xtts.html#training" ) diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 744ccb3e70..446f288302 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -63,7 +63,7 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: return ZH_CN_Phonemizer(**kwargs) if name == "ja_jp_phonemizer": if JA_JP_Phonemizer is None: - raise ValueError(" ❗ You need to install JA phonemizer dependencies. Try `pip install TTS[ja]`.") + raise ValueError(" ❗ You need to install JA phonemizer dependencies. Try `pip install coqui-tts[ja]`.") return JA_JP_Phonemizer(**kwargs) if name == "ko_kr_phonemizer": return KO_KR_Phonemizer(**kwargs) diff --git a/docs/source/faq.md b/docs/source/faq.md index fa48c4a9fb..14be9d4c9c 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -3,7 +3,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is ## Errors with a pre-trained model. How can I resolve this? - Make sure you use the right commit version of 🐸TTS. Each pre-trained model has its corresponding version that needs to be used. It is defined on the model table. -- If it is still problematic, post your problem on [Discussions](https://github.com/coqui-ai/TTS/discussions). Please give as many details as possible (error message, your TTS version, your TTS model and config.json etc.) +- If it is still problematic, post your problem on [Discussions](https://github.com/eginhard/coqui-tts/discussions). Please give as many details as possible (error message, your TTS version, your TTS model and config.json etc.) - If you feel like it's a bug to be fixed, then prefer Github issues with the same level of scrutiny. ## What are the requirements of a good 🐸TTS dataset? @@ -16,7 +16,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - If you need faster models, consider SpeedySpeech, GlowTTS or AlignTTS. Keep in mind that SpeedySpeech requires a pre-trained Tacotron or Tacotron2 model to compute text-to-speech alignments. ## How can I train my own `tts` model? -0. Check your dataset with notebooks in [dataset_analysis](https://github.com/coqui-ai/TTS/tree/master/notebooks/dataset_analysis) folder. Use [this notebook](https://github.com/coqui-ai/TTS/blob/master/notebooks/dataset_analysis/CheckSpectrograms.ipynb) to find the right audio processing parameters. A better set of parameters results in a better audio synthesis. +0. Check your dataset with notebooks in [dataset_analysis](https://github.com/eginhard/coqui-tts/tree/main/notebooks/dataset_analysis) folder. Use [this notebook](https://github.com/eginhard/coqui-tts/blob/main/notebooks/dataset_analysis/CheckSpectrograms.ipynb) to find the right audio processing parameters. A better set of parameters results in a better audio synthesis. 1. Write your own dataset `formatter` in `datasets/formatters.py` or format your dataset as one of the supported datasets, like LJSpeech. A `formatter` parses the metadata file and converts a list of training samples. diff --git a/docs/source/inference.md b/docs/source/inference.md index 2c57f6182c..0b05965f46 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -4,7 +4,7 @@ First, you need to install TTS. We recommend using PyPi. You need to call the command below: ```bash -$ pip install TTS +$ pip install coqui-tts ``` After the installation, 2 terminal commands are available. @@ -14,7 +14,7 @@ After the installation, 2 terminal commands are available. 3. In 🐍Python. - `from TTS.api import TTS` ## On the Commandline - `tts` -![cli.gif](https://github.com/coqui-ai/TTS/raw/main/images/tts_cli.gif) +![cli.gif](https://github.com/eginhard/coqui-tts/raw/main/images/tts_cli.gif) After the installation, 🐸TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under 🐸TTS. @@ -81,11 +81,11 @@ tts --model_name "voice_conversion///" ## On the Demo Server - `tts-server` - -![server.gif](https://github.com/coqui-ai/TTS/raw/main/images/demo_server.gif) + +![server.gif](https://github.com/eginhard/coqui-tts/raw/main/images/demo_server.gif) You can boot up a demo 🐸TTS server to run an inference with your models (make -sure to install the additional dependencies with `pip install TTS[server]`). +sure to install the additional dependencies with `pip install coqui-tts[server]`). Note that the server is not optimized for performance but gives you an easy way to interact with the models. diff --git a/docs/source/installation.md b/docs/source/installation.md index 8aaec01c9e..92743a9db4 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,6 +1,6 @@ # Installation -🐸TTS supports python >=3.9 <3.12.0 and tested on Ubuntu 18.10, 19.10, 20.10. +🐸TTS supports python >=3.9 <3.12.0 and was tested on Ubuntu 20.04 and 22.04. ## Using `pip` @@ -9,13 +9,13 @@ You can install from PyPI as follows: ```bash -pip install TTS # from PyPI +pip install coqui-tts # from PyPI ``` Or install from Github: ```bash -pip install git+https://github.com/coqui-ai/TTS # from Github +pip install git+https://github.com/eginhard/coqui-tts # from Github ``` ## Installing From Source @@ -23,11 +23,13 @@ pip install git+https://github.com/coqui-ai/TTS # from Github This is recommended for development and more control over 🐸TTS. ```bash -git clone https://github.com/coqui-ai/TTS/ -cd TTS +git clone https://github.com/eginhard/coqui-tts +cd coqui-tts make system-deps # only on Linux systems. make install ``` ## On Windows -If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/ +If you are on Windows, 👑@GuyPaddock wrote installation instructions +[here](https://stackoverflow.com/questions/66726331/) (note that these are out +of date, e.g. you need to have at least Python 3.9) diff --git a/docs/source/main_classes/trainer_api.md b/docs/source/main_classes/trainer_api.md index 876e09e5b6..335294aa4d 100644 --- a/docs/source/main_classes/trainer_api.md +++ b/docs/source/main_classes/trainer_api.md @@ -1,3 +1,3 @@ # Trainer API -We made the trainer a separate project on https://github.com/coqui-ai/Trainer +We made the trainer a separate project on https://github.com/eginhard/coqui-trainer diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index b979d04f6e..014b161669 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -3,9 +3,6 @@ ⓍTTS has important model changes that make cross-language voice cloning and multi-lingual speech generation super easy. There is no need for an excessive amount of training data that spans countless hours. -This is the same model that powers [Coqui Studio](https://coqui.ai/), and [Coqui API](https://docs.coqui.ai/docs), however we apply -a few tricks to make it faster and support streaming inference. - ### Features - Voice cloning. - Cross-language voice cloning. @@ -32,21 +29,20 @@ Stay tuned as we continue to add support for more languages. If you have any lan This model is licensed under [Coqui Public Model License](https://coqui.ai/cpml). ### Contact -Come and join in our 🐸Community. We're active on [Discord](https://discord.gg/fBC58unbKE) and [Twitter](https://twitter.com/coqui_ai). -You can also mail us at info@coqui.ai. +Come and join in our 🐸Community. We're active on [Discord](https://discord.gg/fBC58unbKE) and [Github](https://github.com/eginhard/coqui-tts/discussions). ### Inference #### 🐸TTS Command line -You can check all supported languages with the following command: +You can check all supported languages with the following command: ```console tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 \ --list_language_idx ``` -You can check all Coqui available speakers with the following command: +You can check all Coqui available speakers with the following command: ```console tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 \ @@ -280,7 +276,7 @@ To make the `XTTS_v2` fine-tuning more accessible for users that do not have goo The Colab Notebook is available [here](https://colab.research.google.com/drive/1GiI4_X724M8q2W-zZ-jXo7cWTV7RfaH-?usp=sharing). -To learn how to use this Colab Notebook please check the [XTTS fine-tuning video](). +To learn how to use this Colab Notebook please check the [XTTS fine-tuning video](https://www.youtube.com/watch?v=8tpDiiouGxc). If you are not able to acess the video you need to follow the steps: @@ -294,7 +290,7 @@ If you are not able to acess the video you need to follow the steps: ##### Run demo locally To run the demo locally you need to do the following steps: -1. Install 🐸 TTS following the instructions available [here](https://tts.readthedocs.io/en/dev/installation.html#installation). +1. Install 🐸 TTS following the instructions available [here](https://coqui-tts.readthedocs.io/en/latest/installation.html). 2. Install the Gradio demo requirements with the command `python3 -m pip install -r TTS/demos/xtts_ft_demo/requirements.txt` 3. Run the Gradio demo using the command `python3 TTS/demos/xtts_ft_demo/xtts_demo.py` 4. Follow the steps presented in the [tutorial video](https://www.youtube.com/watch?v=8tpDiiouGxc&feature=youtu.be) to be able to fine-tune and test the fine-tuned model. diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index db753e801b..dda2abbc36 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -5,14 +5,14 @@ User friendly installation. Recommended only for synthesizing voice. ```bash -$ pip install TTS +$ pip install coqui-tts ``` Developer friendly installation. ```bash -$ git clone https://github.com/coqui-ai/TTS -$ cd TTS +$ git clone https://github.com/eginhard/coqui-tts +$ cd coqui-tts $ pip install -e . ``` @@ -109,15 +109,15 @@ $ tts -h # see the help $ tts --list_models # list the available models. ``` -![cli.gif](https://github.com/coqui-ai/TTS/raw/main/images/tts_cli.gif) +![cli.gif](https://github.com/eginhard/coqui-tts/raw/main/images/tts_cli.gif) You can call `tts-server` to start a local demo server that you can open on your favorite web browser and 🗣️ (make sure to install the additional -dependencies with `pip install TTS[server]`). +dependencies with `pip install coqui-tts[server]`). ```bash $ tts-server -h # see the help $ tts-server --list_models # list the available models. ``` -![server.gif](https://github.com/coqui-ai/TTS/raw/main/images/demo_server.gif) +![server.gif](https://github.com/eginhard/coqui-tts/raw/main/images/demo_server.gif) diff --git a/notebooks/Tutorial_1_use-pretrained-TTS.ipynb b/notebooks/Tutorial_1_use-pretrained-TTS.ipynb index 87d04c499d..3c2e9de924 100644 --- a/notebooks/Tutorial_1_use-pretrained-TTS.ipynb +++ b/notebooks/Tutorial_1_use-pretrained-TTS.ipynb @@ -41,7 +41,7 @@ "outputs": [], "source": [ "! pip install -U pip\n", - "! pip install TTS" + "! pip install coqui-tts" ] }, { diff --git a/notebooks/Tutorial_2_train_your_first_TTS_model.ipynb b/notebooks/Tutorial_2_train_your_first_TTS_model.ipynb index 0f580a85b6..c4186670c9 100644 --- a/notebooks/Tutorial_2_train_your_first_TTS_model.ipynb +++ b/notebooks/Tutorial_2_train_your_first_TTS_model.ipynb @@ -32,7 +32,7 @@ "source": [ "## Install Coqui TTS\n", "! pip install -U pip\n", - "! pip install TTS" + "! pip install coqui-tts" ] }, { @@ -44,7 +44,7 @@ "\n", "### **First things first**: we need some data.\n", "\n", - "We're training a Text-to-Speech model, so we need some _text_ and we need some _speech_. Specificially, we want _transcribed speech_. The speech must be divided into audio clips and each clip needs transcription. More details about data requirements such as recording characteristics, background noise and vocabulary coverage can be found in the [🐸TTS documentation](https://tts.readthedocs.io/en/latest/formatting_your_dataset.html).\n", + "We're training a Text-to-Speech model, so we need some _text_ and we need some _speech_. Specificially, we want _transcribed speech_. The speech must be divided into audio clips and each clip needs transcription. More details about data requirements such as recording characteristics, background noise and vocabulary coverage can be found in the [🐸TTS documentation](https://coqui-tts.readthedocs.io/en/latest/formatting_your_dataset.html).\n", "\n", "If you have a single audio file and you need to **split** it into clips. It is also important to use a lossless audio file format to prevent compression artifacts. We recommend using **wav** file format.\n", "\n", diff --git a/recipes/bel-alex73/README.md b/recipes/bel-alex73/README.md index ad378dd998..6075d3102d 100644 --- a/recipes/bel-alex73/README.md +++ b/recipes/bel-alex73/README.md @@ -39,7 +39,7 @@ Docker container was created for simplify local running. You can run `docker-pre ## Training - with GPU -You need to upload Coqui-TTS(/mycomputer/TTS/) and storage/ directory(/mycomputer/storage/) to some computer with GPU. We don't need cv-corpus/ and fanetyka/ directories for training. Install gcc, then run `pip install -e .[all,dev,notebooks]` to prepare modules. GlowTTS and HifiGan models should be learned separately based on /storage/filtered_dataset only, i.e. they are not dependent from each other. below means list of GPU ids from zero("0,1,2,3" for systems with 4 GPU). See details on the https://tts.readthedocs.io/en/latest/tutorial_for_nervous_beginners.html(multi-gpu training). +You need to upload Coqui-TTS(/mycomputer/TTS/) and storage/ directory(/mycomputer/storage/) to some computer with GPU. We don't need cv-corpus/ and fanetyka/ directories for training. Install gcc, then run `pip install -e .[all,dev,notebooks]` to prepare modules. GlowTTS and HifiGan models should be learned separately based on /storage/filtered_dataset only, i.e. they are not dependent from each other. below means list of GPU ids from zero("0,1,2,3" for systems with 4 GPU). See details on the https://coqui-tts.readthedocs.io/en/latest/tutorial_for_nervous_beginners.html (multi-gpu training). Current setup created for 24GiB GPU. You need to change batch_size if you have more or less GPU memory. Also, you can try to set lr(learning rate) to lower value in the end of training GlowTTS. diff --git a/setup.py b/setup.py index 2465f1a6b0..a25b7674f6 100644 --- a/setup.py +++ b/setup.py @@ -79,12 +79,14 @@ def pip_install(package_name): ) ] setup( - name="TTS", + name="coqui-tts", version=version, - url="https://github.com/coqui-ai/TTS", + url="https://github.com/eginhard/coqui-tts", author="Eren Gölge", author_email="egolge@coqui.ai", - description="Deep learning for Text to Speech by Coqui.", + maintainer="Enno Hermann", + maintainer_email="enno.hermann@gmail.com", + description="Deep learning for Text to Speech.", long_description=README, long_description_content_type="text/markdown", license="MPL-2.0", @@ -101,10 +103,10 @@ def pip_install(package_name): ] }, project_urls={ - "Documentation": "https://github.com/coqui-ai/TTS/wiki", - "Tracker": "https://github.com/coqui-ai/TTS/issues", - "Repository": "https://github.com/coqui-ai/TTS", - "Discussions": "https://github.com/coqui-ai/TTS/discussions", + "Documentation": "https://coqui-tts.readthedocs.io", + "Tracker": "https://github.com/eginhard/coqui-tts/issues", + "Repository": "https://github.com/eginhard/coqui-tts", + "Discussions": "https://github.com/eginhard/coqui-tts/discussions", }, cmdclass={ "build_py": build_py, From 7fe6a011207d3c385b2bd07ee35dea997174a161 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 3 Apr 2024 12:26:13 +0200 Subject: [PATCH 054/255] ci(pypi-release): update actions, use trusted publishing https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ --- .github/workflows/pypi-release.yml | 41 +++++++++++++----------------- 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 2bbcf3cd70..f81f5a7493 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -10,7 +10,7 @@ jobs: build-sdist: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Verify tag matches version run: | set -ex @@ -19,7 +19,7 @@ jobs: if [[ "v$version" != "$tag" ]]; then exit 1 fi - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v5 with: python-version: 3.9 - run: | @@ -28,7 +28,7 @@ jobs: python -m build - run: | pip install dist/*.tar.gz - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: name: sdist path: dist/*.tar.gz @@ -38,8 +38,8 @@ jobs: matrix: python-version: ["3.9", "3.10", "3.11"] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install pip requirements @@ -50,45 +50,38 @@ jobs: run: | python setup.py bdist_wheel --plat-name=manylinux1_x86_64 python -m pip install dist/*-manylinux*.whl - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v4 with: name: wheel-${{ matrix.python-version }} path: dist/*-manylinux*.whl publish-artifacts: runs-on: ubuntu-20.04 needs: [build-sdist, build-wheels] + environment: + name: release + url: https://pypi.org/p/coqui-tts + permissions: + id-token: write steps: - run: | mkdir dist - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: "sdist" path: "dist/" - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: "wheel-3.9" path: "dist/" - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: "wheel-3.10" path: "dist/" - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v4 with: name: "wheel-3.11" path: "dist/" - run: | ls -lh dist/ - - name: Setup PyPI config - run: | - cat << EOF > ~/.pypirc - [pypi] - username=__token__ - password=${{ secrets.PYPI_TOKEN }} - EOF - - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - run: | - python -m pip install twine - - run: | - twine upload --repository pypi dist/* + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 From dd3768d4b1b14ef3e8512daeca7af6f61f30267e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 3 Apr 2024 12:30:11 +0200 Subject: [PATCH 055/255] chore: update version to v0.22.1 --- TTS/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/VERSION b/TTS/VERSION index 2157409059..a723ece79b 100644 --- a/TTS/VERSION +++ b/TTS/VERSION @@ -1 +1 @@ -0.22.0 +0.22.1 From 00f8d47bcffdfb8b61a20132a226d89273f0d13a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 2 Apr 2024 16:24:38 +0200 Subject: [PATCH 056/255] ci: switch back from uv to pip Reverts c59f0ca1ce5113c650a123fbe9634b3c27c42cc7 (#13) Too many CI test timeouts from installing torch/nvidia packages with uv: https://github.com/astral-sh/uv/issues/1912 --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 20308aab6c..b056e3073d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,14 +38,14 @@ jobs: sudo apt-get install -y --no-install-recommends git make gcc make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv + run: python3 -m pip install --upgrade pip setuptools wheel - name: Replace scarf urls if: contains(fromJSON('["data_tests", "inference_tests", "test_aux", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m uv pip install --system "TTS[all] @ ." + python3 -m pip install .[all] python3 setup.py egg_info - name: Unit tests run: make ${{ matrix.subset }} From b6ab85a05028a54c268e102e2d3ce3701efaa16e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 13 Nov 2023 15:04:52 +0100 Subject: [PATCH 057/255] fix: use logging instead of print statements Fixes #1691 --- TTS/api.py | 3 ++ TTS/encoder/dataset.py | 15 +++--- TTS/encoder/losses.py | 12 +++-- TTS/encoder/models/base_encoder.py | 10 ++-- TTS/encoder/utils/generic_utils.py | 11 ++-- TTS/encoder/utils/prepare_voxceleb.py | 30 ++++++----- TTS/server/server.py | 11 ++-- TTS/tts/datasets/__init__.py | 16 +++--- TTS/tts/datasets/dataset.py | 53 +++++++++---------- TTS/tts/datasets/formatters.py | 21 ++++---- TTS/tts/layers/bark/hubert/hubert_manager.py | 11 ++-- TTS/tts/layers/bark/hubert/tokenizer.py | 11 ++-- .../layers/delightful_tts/acoustic_model.py | 5 +- TTS/tts/layers/losses.py | 7 ++- TTS/tts/layers/overflow/common_layers.py | 7 ++- TTS/tts/layers/tacotron/tacotron.py | 6 ++- TTS/tts/layers/tacotron/tacotron2.py | 8 ++- TTS/tts/layers/tortoise/audio_utils.py | 7 ++- TTS/tts/layers/tortoise/dpm_solver.py | 5 +- TTS/tts/layers/tortoise/utils.py | 7 ++- TTS/tts/layers/xtts/dvae.py | 5 +- TTS/tts/layers/xtts/hifigan_decoder.py | 16 +++--- TTS/tts/layers/xtts/tokenizer.py | 9 +++- TTS/tts/layers/xtts/trainer/dataset.py | 20 ++++--- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 15 +++--- TTS/tts/layers/xtts/zh_num2words.py | 26 +++++---- TTS/tts/models/__init__.py | 5 +- TTS/tts/models/base_tacotron.py | 9 ++-- TTS/tts/models/base_tts.py | 21 ++++---- TTS/tts/models/delightful_tts.py | 23 ++++---- TTS/tts/models/forward_tts.py | 5 +- TTS/tts/models/glow_tts.py | 9 ++-- TTS/tts/models/neuralhmm_tts.py | 23 +++++--- TTS/tts/models/overflow.py | 23 +++++--- TTS/tts/models/tortoise.py | 11 ++-- TTS/tts/models/vits.py | 31 ++++++----- TTS/tts/models/xtts.py | 5 +- TTS/tts/utils/speakers.py | 17 +++--- TTS/tts/utils/text/characters.py | 19 ++++--- TTS/tts/utils/text/phonemizers/base.py | 7 ++- .../utils/text/phonemizers/espeak_wrapper.py | 10 ++-- .../text/phonemizers/multi_phonemizer.py | 7 ++- TTS/tts/utils/text/tokenizer.py | 19 ++++--- TTS/utils/audio/numpy_transforms.py | 5 +- TTS/utils/audio/processor.py | 9 ++-- TTS/utils/download.py | 19 ++++--- TTS/utils/downloaders.py | 13 ++--- TTS/utils/generic_utils.py | 6 ++- TTS/utils/manage.py | 48 +++++++++-------- TTS/utils/synthesizer.py | 15 +++--- TTS/utils/training.py | 8 ++- TTS/utils/vad.py | 10 ++-- TTS/vc/models/__init__.py | 5 +- TTS/vc/models/base_vc.py | 21 ++++---- TTS/vc/models/freevc.py | 7 ++- TTS/vc/modules/freevc/mel_processing.py | 12 +++-- .../freevc/speaker_encoder/speaker_encoder.py | 9 ++-- TTS/vc/modules/freevc/wavlm/__init__.py | 5 +- TTS/vocoder/datasets/gan_dataset.py | 1 - TTS/vocoder/datasets/wavernn_dataset.py | 8 ++- TTS/vocoder/models/__init__.py | 9 ++-- TTS/vocoder/models/hifigan_generator.py | 6 ++- .../models/parallel_wavegan_discriminator.py | 7 ++- .../models/parallel_wavegan_generator.py | 7 ++- TTS/vocoder/models/univnet_generator.py | 7 ++- TTS/vocoder/utils/generic_utils.py | 7 ++- 66 files changed, 518 insertions(+), 317 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 992fbe69e9..6d618d29db 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -1,3 +1,4 @@ +import logging import tempfile import warnings from pathlib import Path @@ -9,6 +10,8 @@ from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer +logger = logging.getLogger(__name__) + class TTS(nn.Module): """TODO: Add voice conversion and Capacitron support.""" diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py index 582b1fe9ca..7e4286c5d9 100644 --- a/TTS/encoder/dataset.py +++ b/TTS/encoder/dataset.py @@ -1,3 +1,4 @@ +import logging import random import torch @@ -5,6 +6,8 @@ from TTS.encoder.utils.generic_utils import AugmentWAV +logger = logging.getLogger(__name__) + class EncoderDataset(Dataset): def __init__( @@ -51,12 +54,12 @@ def __init__( self.gaussian_augmentation_config = augmentation_config["gaussian"] if self.verbose: - print("\n > DataLoader initialization") - print(f" | > Classes per Batch: {num_classes_in_batch}") - print(f" | > Number of instances : {len(self.items)}") - print(f" | > Sequence length: {self.seq_len}") - print(f" | > Num Classes: {len(self.classes)}") - print(f" | > Classes: {self.classes}") + logger.info("DataLoader initialization") + logger.info(" | Classes per batch: %d", num_classes_in_batch) + logger.info(" | Number of instances: %d", len(self.items)) + logger.info(" | Sequence length: %d", self.seq_len) + logger.info(" | Number of classes: %d", len(self.classes)) + logger.info(" | Classes: %d", self.classes) def load_wav(self, filename): audio = self.ap.load_wav(filename, sr=self.ap.sample_rate) diff --git a/TTS/encoder/losses.py b/TTS/encoder/losses.py index 5b5aa0fc48..2e27848c31 100644 --- a/TTS/encoder/losses.py +++ b/TTS/encoder/losses.py @@ -1,7 +1,11 @@ +import logging + import torch import torch.nn.functional as F from torch import nn +logger = logging.getLogger(__name__) + # adapted from https://github.com/cvqluu/GE2E-Loss class GE2ELoss(nn.Module): @@ -23,7 +27,7 @@ def __init__(self, init_w=10.0, init_b=-5.0, loss_method="softmax"): self.b = nn.Parameter(torch.tensor(init_b)) self.loss_method = loss_method - print(" > Initialized Generalized End-to-End loss") + logger.info("Initialized Generalized End-to-End loss") assert self.loss_method in ["softmax", "contrast"] @@ -139,7 +143,7 @@ def __init__(self, init_w=10.0, init_b=-5.0): self.b = nn.Parameter(torch.tensor(init_b)) self.criterion = torch.nn.CrossEntropyLoss() - print(" > Initialized Angular Prototypical loss") + logger.info("Initialized Angular Prototypical loss") def forward(self, x, _label=None): """ @@ -177,7 +181,7 @@ def __init__(self, embedding_dim, n_speakers): self.criterion = torch.nn.CrossEntropyLoss() self.fc = nn.Linear(embedding_dim, n_speakers) - print("Initialised Softmax Loss") + logger.info("Initialised Softmax Loss") def forward(self, x, label=None): # reshape for compatibility @@ -212,7 +216,7 @@ def __init__(self, embedding_dim, n_speakers, init_w=10.0, init_b=-5.0): self.softmax = SoftmaxLoss(embedding_dim, n_speakers) self.angleproto = AngleProtoLoss(init_w, init_b) - print("Initialised SoftmaxAnglePrototypical Loss") + logger.info("Initialised SoftmaxAnglePrototypical Loss") def forward(self, x, label=None): """ diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py index 957ea3c4ca..374062463d 100644 --- a/TTS/encoder/models/base_encoder.py +++ b/TTS/encoder/models/base_encoder.py @@ -1,3 +1,5 @@ +import logging + import numpy as np import torch import torchaudio @@ -8,6 +10,8 @@ from TTS.utils.generic_utils import set_init_dict from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + class PreEmphasis(nn.Module): def __init__(self, coefficient=0.97): @@ -118,13 +122,13 @@ def load_checkpoint( state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) try: self.load_state_dict(state["model"]) - print(" > Model fully restored. ") + logger.info("Model fully restored. ") except (KeyError, RuntimeError) as error: # If eval raise the error if eval: raise error - print(" > Partial model initialization.") + logger.info("Partial model initialization.") model_dict = self.state_dict() model_dict = set_init_dict(model_dict, state["model"], c) self.load_state_dict(model_dict) @@ -135,7 +139,7 @@ def load_checkpoint( try: criterion.load_state_dict(state["criterion"]) except (KeyError, RuntimeError) as error: - print(" > Criterion load ignored because of:", error) + logger.exception("Criterion load ignored because of: %s", error) # instance and load the criterion for the encoder classifier in inference time if ( diff --git a/TTS/encoder/utils/generic_utils.py b/TTS/encoder/utils/generic_utils.py index 88ed71d3f4..495b4def5a 100644 --- a/TTS/encoder/utils/generic_utils.py +++ b/TTS/encoder/utils/generic_utils.py @@ -1,4 +1,5 @@ import glob +import logging import os import random @@ -8,6 +9,8 @@ from TTS.encoder.models.lstm import LSTMSpeakerEncoder from TTS.encoder.models.resnet import ResNetSpeakerEncoder +logger = logging.getLogger(__name__) + class AugmentWAV(object): def __init__(self, ap, augmentation_config): @@ -38,8 +41,10 @@ def __init__(self, ap, augmentation_config): self.noise_list[noise_dir] = [] self.noise_list[noise_dir].append(wav_file) - print( - f" | > Using Additive Noise Augmentation: with {len(additive_files)} audios instances from {self.additive_noise_types}" + logger.info( + "Using Additive Noise Augmentation: with %d audios instances from %s", + len(additive_files), + self.additive_noise_types, ) self.use_rir = False @@ -50,7 +55,7 @@ def __init__(self, ap, augmentation_config): self.rir_files = glob.glob(os.path.join(self.rir_config["rir_path"], "**/*.wav"), recursive=True) self.use_rir = True - print(f" | > Using RIR Noise Augmentation: with {len(self.rir_files)} audios instances") + logger.info("Using RIR Noise Augmentation: with %d audios instances", len(self.rir_files)) self.create_augmentation_global_list() diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py index 5a68c3075a..8f571dd2c7 100644 --- a/TTS/encoder/utils/prepare_voxceleb.py +++ b/TTS/encoder/utils/prepare_voxceleb.py @@ -21,13 +21,15 @@ import csv import hashlib +import logging import os import subprocess import sys import zipfile import soundfile as sf -from absl import logging + +logger = logging.getLogger(__name__) SUBSETS = { "vox1_dev_wav": [ @@ -77,14 +79,14 @@ def download_and_extract(directory, subset, urls): zip_filepath = os.path.join(directory, url.split("/")[-1]) if os.path.exists(zip_filepath): continue - logging.info("Downloading %s to %s" % (url, zip_filepath)) + logger.info("Downloading %s to %s" % (url, zip_filepath)) subprocess.call( "wget %s --user %s --password %s -O %s" % (url, USER["user"], USER["password"], zip_filepath), shell=True, ) statinfo = os.stat(zip_filepath) - logging.info("Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size)) + logger.info("Successfully downloaded %s, size(bytes): %d" % (url, statinfo.st_size)) # concatenate all parts into zip files if ".zip" not in zip_filepath: @@ -118,9 +120,9 @@ def exec_cmd(cmd): try: retcode = subprocess.call(cmd, shell=True) if retcode < 0: - logging.info(f"Child was terminated by signal {retcode}") + logger.info(f"Child was terminated by signal {retcode}") except OSError as e: - logging.info(f"Execution failed: {e}") + logger.info(f"Execution failed: {e}") retcode = -999 return retcode @@ -134,11 +136,11 @@ def decode_aac_with_ffmpeg(aac_file, wav_file): bool, True if success. """ cmd = f"ffmpeg -i {aac_file} {wav_file}" - logging.info(f"Decoding aac file using command line: {cmd}") + logger.info(f"Decoding aac file using command line: {cmd}") ret = exec_cmd(cmd) if ret != 0: - logging.error(f"Failed to decode aac file with retcode {ret}") - logging.error("Please check your ffmpeg installation.") + logger.error(f"Failed to decode aac file with retcode {ret}") + logger.error("Please check your ffmpeg installation.") return False return True @@ -152,7 +154,7 @@ def convert_audio_and_make_label(input_dir, subset, output_dir, output_file): output_file: the name of the newly generated csv file. e.g. vox1_dev_wav.csv """ - logging.info("Preprocessing audio and label for subset %s" % subset) + logger.info("Preprocessing audio and label for subset %s" % subset) source_dir = os.path.join(input_dir, subset) files = [] @@ -190,7 +192,7 @@ def convert_audio_and_make_label(input_dir, subset, output_dir, output_file): writer.writerow(["wav_filename", "wav_length_ms", "speaker_id", "speaker_name"]) for wav_file in files: writer.writerow(wav_file) - logging.info("Successfully generated csv file {}".format(csv_file_path)) + logger.info("Successfully generated csv file {}".format(csv_file_path)) def processor(directory, subset, force_process): @@ -203,16 +205,16 @@ def processor(directory, subset, force_process): if not force_process and os.path.exists(subset_csv): return subset_csv - logging.info("Downloading and process the voxceleb in %s", directory) - logging.info("Preparing subset %s", subset) + logger.info("Downloading and process the voxceleb in %s", directory) + logger.info("Preparing subset %s", subset) download_and_extract(directory, subset, urls[subset]) convert_audio_and_make_label(directory, subset, directory, subset + ".csv") - logging.info("Finished downloading and processing") + logger.info("Finished downloading and processing") return subset_csv if __name__ == "__main__": - logging.set_verbosity(logging.INFO) + logging.getLogger("TTS").setLevel(logging.INFO) if len(sys.argv) != 4: print("Usage: python prepare_data.py save_directory user password") sys.exit() diff --git a/TTS/server/server.py b/TTS/server/server.py index 01bd79a137..ddf630a64d 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -2,6 +2,7 @@ import argparse import io import json +import logging import os import sys from pathlib import Path @@ -18,6 +19,8 @@ from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer +logger = logging.getLogger(__name__) + def create_argparser(): def convert_boolean(x): @@ -200,9 +203,9 @@ def tts(): style_wav = request.headers.get("style-wav") or request.values.get("style_wav", "") style_wav = style_wav_uri_to_dict(style_wav) - print(f" > Model input: {text}") - print(f" > Speaker Idx: {speaker_idx}") - print(f" > Language Idx: {language_idx}") + logger.info("Model input: %s", text) + logger.info("Speaker idx: %s", speaker_idx) + logger.info("Language idx: %s", language_idx) wavs = synthesizer.tts(text, speaker_name=speaker_idx, language_name=language_idx, style_wav=style_wav) out = io.BytesIO() synthesizer.save_wav(wavs, out) @@ -246,7 +249,7 @@ def mary_tts_api_process(): text = data.get("INPUT_TEXT", [""])[0] else: text = request.args.get("INPUT_TEXT", "") - print(f" > Model input: {text}") + logger.info("Model input: %s", text) wavs = synthesizer.tts(text) out = io.BytesIO() synthesizer.save_wav(wavs, out) diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py index 4f354fa0be..f9f2cb2e37 100644 --- a/TTS/tts/datasets/__init__.py +++ b/TTS/tts/datasets/__init__.py @@ -1,3 +1,4 @@ +import logging import os import sys from collections import Counter @@ -9,6 +10,8 @@ from TTS.tts.datasets.dataset import * from TTS.tts.datasets.formatters import * +logger = logging.getLogger(__name__) + def split_dataset(items, eval_split_max_size=None, eval_split_size=0.01): """Split a dataset into train and eval. Consider speaker distribution in multi-speaker training. @@ -122,7 +125,7 @@ def load_tts_samples( meta_data_train = add_extra_keys(meta_data_train, language, dataset_name) - print(f" | > Found {len(meta_data_train)} files in {Path(root_path).resolve()}") + logger.info("Found %d files in %s", len(meta_data_train), Path(root_path).resolve()) # load evaluation split if set if eval_split: if meta_file_val: @@ -166,16 +169,15 @@ def _get_formatter_by_name(name): return getattr(thismodule, name.lower()) -def find_unique_chars(data_samples, verbose=True): +def find_unique_chars(data_samples): texts = "".join(item["text"] for item in data_samples) chars = set(texts) lower_chars = filter(lambda c: c.islower(), chars) chars_force_lower = [c.lower() for c in chars] chars_force_lower = set(chars_force_lower) - if verbose: - print(f" > Number of unique characters: {len(chars)}") - print(f" > Unique characters: {''.join(sorted(chars))}") - print(f" > Unique lower characters: {''.join(sorted(lower_chars))}") - print(f" > Unique all forced to lower characters: {''.join(sorted(chars_force_lower))}") + logger.info("Number of unique characters: %d", len(chars)) + logger.info("Unique characters: %s", "".join(sorted(chars))) + logger.info("Unique lower characters: %s", "".join(sorted(lower_chars))) + logger.info("Unique all forced to lower characters: %s", "".join(sorted(chars_force_lower))) return chars_force_lower diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 257d1c3100..dd879565dc 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -1,5 +1,6 @@ import base64 import collections +import logging import os import random from typing import Dict, List, Union @@ -14,6 +15,8 @@ from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy +logger = logging.getLogger(__name__) + # to prevent too many open files error as suggested here # https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936 torch.multiprocessing.set_sharing_strategy("file_system") @@ -214,11 +217,10 @@ def __getitem__(self, idx): def print_logs(self, level: int = 0) -> None: indent = "\t" * level - print("\n") - print(f"{indent}> DataLoader initialization") - print(f"{indent}| > Tokenizer:") + logger.info("%sDataLoader initialization", indent) + logger.info("%s| Tokenizer:", indent) self.tokenizer.print_logs(level + 1) - print(f"{indent}| > Number of instances : {len(self.samples)}") + logger.info("%s| Number of instances : %d", indent, len(self.samples)) def load_wav(self, filename): waveform = self.ap.load_wav(filename) @@ -390,17 +392,15 @@ def preprocess_samples(self): text_lengths = [s["text_length"] for s in samples] self.samples = samples - if self.verbose: - print(" | > Preprocessing samples") - print(" | > Max text length: {}".format(np.max(text_lengths))) - print(" | > Min text length: {}".format(np.min(text_lengths))) - print(" | > Avg text length: {}".format(np.mean(text_lengths))) - print(" | ") - print(" | > Max audio length: {}".format(np.max(audio_lengths))) - print(" | > Min audio length: {}".format(np.min(audio_lengths))) - print(" | > Avg audio length: {}".format(np.mean(audio_lengths))) - print(f" | > Num. instances discarded samples: {len(ignore_idx)}") - print(" | > Batch group size: {}.".format(self.batch_group_size)) + logger.info("Preprocessing samples") + logger.info("Max text length: {}".format(np.max(text_lengths))) + logger.info("Min text length: {}".format(np.min(text_lengths))) + logger.info("Avg text length: {}".format(np.mean(text_lengths))) + logger.info("Max audio length: {}".format(np.max(audio_lengths))) + logger.info("Min audio length: {}".format(np.min(audio_lengths))) + logger.info("Avg audio length: {}".format(np.mean(audio_lengths))) + logger.info("Num. instances discarded samples: %d", len(ignore_idx)) + logger.info("Batch group size: {}.".format(self.batch_group_size)) @staticmethod def _sort_batch(batch, text_lengths): @@ -643,7 +643,7 @@ def precompute(self, num_workers=1): We use pytorch dataloader because we are lazy. """ - print("[*] Pre-computing phonemes...") + logger.info("Pre-computing phonemes...") with tqdm.tqdm(total=len(self)) as pbar: batch_size = num_workers if num_workers > 0 else 1 dataloder = torch.utils.data.DataLoader( @@ -665,11 +665,10 @@ def collate_fn(self, batch): def print_logs(self, level: int = 0) -> None: indent = "\t" * level - print("\n") - print(f"{indent}> PhonemeDataset ") - print(f"{indent}| > Tokenizer:") + logger.info("%sPhonemeDataset", indent) + logger.info("%s| Tokenizer:", indent) self.tokenizer.print_logs(level + 1) - print(f"{indent}| > Number of instances : {len(self.samples)}") + logger.info("%s| Number of instances : %d", indent, len(self.samples)) class F0Dataset: @@ -732,7 +731,7 @@ def __len__(self): return len(self.samples) def precompute(self, num_workers=0): - print("[*] Pre-computing F0s...") + logger.info("Pre-computing F0s...") with tqdm.tqdm(total=len(self)) as pbar: batch_size = num_workers if num_workers > 0 else 1 # we do not normalize at preproessing @@ -819,9 +818,8 @@ def collate_fn(self, batch): def print_logs(self, level: int = 0) -> None: indent = "\t" * level - print("\n") - print(f"{indent}> F0Dataset ") - print(f"{indent}| > Number of instances : {len(self.samples)}") + logger.info("%sF0Dataset", indent) + logger.info("%s| Number of instances : %d", indent, len(self.samples)) class EnergyDataset: @@ -883,7 +881,7 @@ def __len__(self): return len(self.samples) def precompute(self, num_workers=0): - print("[*] Pre-computing energys...") + logger.info("Pre-computing energys...") with tqdm.tqdm(total=len(self)) as pbar: batch_size = num_workers if num_workers > 0 else 1 # we do not normalize at preproessing @@ -971,6 +969,5 @@ def collate_fn(self, batch): def print_logs(self, level: int = 0) -> None: indent = "\t" * level - print("\n") - print(f"{indent}> energyDataset ") - print(f"{indent}| > Number of instances : {len(self.samples)}") + logger.info("%senergyDataset") + logger.info("%s| Number of instances : %d", indent, len(self.samples)) diff --git a/TTS/tts/datasets/formatters.py b/TTS/tts/datasets/formatters.py index 09fbd094e8..ff1a76e2c9 100644 --- a/TTS/tts/datasets/formatters.py +++ b/TTS/tts/datasets/formatters.py @@ -1,4 +1,5 @@ import csv +import logging import os import re import xml.etree.ElementTree as ET @@ -8,6 +9,8 @@ from tqdm import tqdm +logger = logging.getLogger(__name__) + ######################## # DATASETS ######################## @@ -23,7 +26,7 @@ def cml_tts(root_path, meta_file, ignored_speakers=None): num_cols = len(lines[0].split("|")) # take the first row as reference for idx, line in enumerate(lines[1:]): if len(line.split("|")) != num_cols: - print(f" > Missing column in line {idx + 1} -> {line.strip()}") + logger.warning("Missing column in line %d -> %s", idx + 1, line.strip()) # load metadata with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="|") @@ -50,7 +53,7 @@ def cml_tts(root_path, meta_file, ignored_speakers=None): } ) if not_found_counter > 0: - print(f" | > [!] {not_found_counter} files not found") + logger.warning("%d files not found", not_found_counter) return items @@ -63,7 +66,7 @@ def coqui(root_path, meta_file, ignored_speakers=None): num_cols = len(lines[0].split("|")) # take the first row as reference for idx, line in enumerate(lines[1:]): if len(line.split("|")) != num_cols: - print(f" > Missing column in line {idx + 1} -> {line.strip()}") + logger.warning("Missing column in line %d -> %s", idx + 1, line.strip()) # load metadata with open(Path(root_path) / meta_file, newline="", encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="|") @@ -90,7 +93,7 @@ def coqui(root_path, meta_file, ignored_speakers=None): } ) if not_found_counter > 0: - print(f" | > [!] {not_found_counter} files not found") + logger.warning("%d files not found", not_found_counter) return items @@ -173,7 +176,7 @@ def mailabs(root_path, meta_files=None, ignored_speakers=None): if isinstance(ignored_speakers, list): if speaker_name in ignored_speakers: continue - print(" | > {}".format(csv_file)) + logger.info(csv_file) with open(txt_file, "r", encoding="utf-8") as ttf: for line in ttf: cols = line.split("|") @@ -188,7 +191,7 @@ def mailabs(root_path, meta_files=None, ignored_speakers=None): ) else: # M-AI-Labs have some missing samples, so just print the warning - print("> File %s does not exist!" % (wav_file)) + logger.warning("File %s does not exist!", wav_file) return items @@ -253,7 +256,7 @@ def sam_accenture(root_path, meta_file, **kwargs): # pylint: disable=unused-arg text = item.text wav_file = os.path.join(root_path, "vo_voice_quality_transformation", item.get("id") + ".wav") if not os.path.exists(wav_file): - print(f" [!] {wav_file} in metafile does not exist. Skipping...") + logger.warning("%s in metafile does not exist. Skipping...", wav_file) continue items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path}) return items @@ -374,7 +377,7 @@ def custom_turkish(root_path, meta_file, **kwargs): # pylint: disable=unused-ar continue text = cols[1].strip() items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path}) - print(f" [!] {len(skipped_files)} files skipped. They don't exist...") + logger.warning("%d files skipped. They don't exist...") return items @@ -442,7 +445,7 @@ def vctk(root_path, meta_files=None, wavs_path="wav48_silence_trimmed", mic="mic {"text": text, "audio_file": wav_file, "speaker_name": "VCTK_" + speaker_id, "root_path": root_path} ) else: - print(f" [!] wav files don't exist - {wav_file}") + logger.warning("Wav file doesn't exist - %s", wav_file) return items diff --git a/TTS/tts/layers/bark/hubert/hubert_manager.py b/TTS/tts/layers/bark/hubert/hubert_manager.py index 4bc1992941..fd936a9157 100644 --- a/TTS/tts/layers/bark/hubert/hubert_manager.py +++ b/TTS/tts/layers/bark/hubert/hubert_manager.py @@ -1,11 +1,14 @@ # From https://github.com/gitmylo/bark-voice-cloning-HuBERT-quantizer +import logging import os.path import shutil import urllib.request import huggingface_hub +logger = logging.getLogger(__name__) + class HubertManager: @staticmethod @@ -13,9 +16,9 @@ def make_sure_hubert_installed( download_url: str = "https://dl.fbaipublicfiles.com/hubert/hubert_base_ls960.pt", model_path: str = "" ): if not os.path.isfile(model_path): - print("Downloading HuBERT base model") + logger.info("Downloading HuBERT base model") urllib.request.urlretrieve(download_url, model_path) - print("Downloaded HuBERT") + logger.info("Downloaded HuBERT") return model_path return None @@ -27,9 +30,9 @@ def make_sure_tokenizer_installed( ): model_dir = os.path.dirname(model_path) if not os.path.isfile(model_path): - print("Downloading HuBERT custom tokenizer") + logger.info("Downloading HuBERT custom tokenizer") huggingface_hub.hf_hub_download(repo, model, local_dir=model_dir, local_dir_use_symlinks=False) shutil.move(os.path.join(model_dir, model), model_path) - print("Downloaded tokenizer") + logger.info("Downloaded tokenizer") return model_path return None diff --git a/TTS/tts/layers/bark/hubert/tokenizer.py b/TTS/tts/layers/bark/hubert/tokenizer.py index 3070241f1c..cd9579799a 100644 --- a/TTS/tts/layers/bark/hubert/tokenizer.py +++ b/TTS/tts/layers/bark/hubert/tokenizer.py @@ -5,6 +5,7 @@ """ import json +import logging import os.path from zipfile import ZipFile @@ -12,6 +13,8 @@ import torch from torch import nn, optim +logger = logging.getLogger(__name__) + class HubertTokenizer(nn.Module): def __init__(self, hidden_size=1024, input_size=768, output_size=10000, version=0): @@ -85,7 +88,7 @@ def train_step(self, x_train, y_train, log_loss=False): # Print loss if log_loss: - print("Loss", loss.item()) + logger.info("Loss %.3f", loss.item()) # Backward pass loss.backward() @@ -157,10 +160,10 @@ def auto_train(data_path, save_path="model.pth", load_model: str = None, save_ep data_x, data_y = [], [] if load_model and os.path.isfile(load_model): - print("Loading model from", load_model) + logger.info("Loading model from %s", load_model) model_training = HubertTokenizer.load_from_checkpoint(load_model, "cuda") else: - print("Creating new model.") + logger.info("Creating new model.") model_training = HubertTokenizer(version=1).to("cuda") # Settings for the model to run without lstm save_path = os.path.join(data_path, save_path) base_save_path = ".".join(save_path.split(".")[:-1]) @@ -191,5 +194,5 @@ def auto_train(data_path, save_path="model.pth", load_model: str = None, save_ep save_p_2 = f"{base_save_path}_epoch_{epoch}.pth" model_training.save(save_p) model_training.save(save_p_2) - print(f"Epoch {epoch} completed") + logger.info("Epoch %d completed", epoch) epoch += 1 diff --git a/TTS/tts/layers/delightful_tts/acoustic_model.py b/TTS/tts/layers/delightful_tts/acoustic_model.py index 74ec204281..83989f9ba4 100644 --- a/TTS/tts/layers/delightful_tts/acoustic_model.py +++ b/TTS/tts/layers/delightful_tts/acoustic_model.py @@ -1,4 +1,5 @@ ### credit: https://github.com/dunky11/voicesmith +import logging from typing import Callable, Dict, Tuple import torch @@ -20,6 +21,8 @@ from TTS.tts.layers.generic.aligner import AlignmentNetwork from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask +logger = logging.getLogger(__name__) + class AcousticModel(torch.nn.Module): def __init__( @@ -217,7 +220,7 @@ def _set_speaker_input(self, aux_input: Dict): def _init_speaker_embedding(self): # pylint: disable=attribute-defined-outside-init if self.num_speakers > 0: - print(" > initialization of speaker-embedding layers.") + logger.info("Initialization of speaker-embedding layers.") self.embedded_speaker_dim = self.args.speaker_embedding_channels self.emb_g = nn.Embedding(self.num_speakers, self.embedded_speaker_dim) diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index de5f408c48..cd6cd0aeb2 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -1,3 +1,4 @@ +import logging import math import numpy as np @@ -10,6 +11,8 @@ from TTS.tts.utils.ssim import SSIMLoss as _SSIMLoss from TTS.utils.audio.torch_transforms import TorchSTFT +logger = logging.getLogger(__name__) + # pylint: disable=abstract-method # relates https://github.com/pytorch/pytorch/issues/42305 @@ -132,11 +135,11 @@ def forward(self, y_hat, y, length): ssim_loss = self.loss_func((y_norm * mask).unsqueeze(1), (y_hat_norm * mask).unsqueeze(1)) if ssim_loss.item() > 1.0: - print(f" > SSIM loss is out-of-range {ssim_loss.item()}, setting it 1.0") + logger.info("SSIM loss is out-of-range (%.2f), setting it to 1.0", ssim_loss.item()) ssim_loss = torch.tensor(1.0, device=ssim_loss.device) if ssim_loss.item() < 0.0: - print(f" > SSIM loss is out-of-range {ssim_loss.item()}, setting it 0.0") + logger.info("SSIM loss is out-of-range (%.2f), setting it to 0.0", ssim_loss.item()) ssim_loss = torch.tensor(0.0, device=ssim_loss.device) return ssim_loss diff --git a/TTS/tts/layers/overflow/common_layers.py b/TTS/tts/layers/overflow/common_layers.py index b036dd1bda..9f77af293c 100644 --- a/TTS/tts/layers/overflow/common_layers.py +++ b/TTS/tts/layers/overflow/common_layers.py @@ -1,3 +1,4 @@ +import logging from typing import List, Tuple import torch @@ -8,6 +9,8 @@ from TTS.tts.layers.tacotron.common_layers import Linear from TTS.tts.layers.tacotron.tacotron2 import ConvBNBlock +logger = logging.getLogger(__name__) + class Encoder(nn.Module): r"""Neural HMM Encoder @@ -213,8 +216,8 @@ def _floor_std(self, std): original_tensor = std.clone().detach() std = torch.clamp(std, min=self.std_floor) if torch.any(original_tensor != std): - print( - "[*] Standard deviation was floored! The model is preventing overfitting, nothing serious to worry about" + logger.info( + "Standard deviation was floored! The model is preventing overfitting, nothing serious to worry about" ) return std diff --git a/TTS/tts/layers/tacotron/tacotron.py b/TTS/tts/layers/tacotron/tacotron.py index 7a47c35ef6..32643dfcee 100644 --- a/TTS/tts/layers/tacotron/tacotron.py +++ b/TTS/tts/layers/tacotron/tacotron.py @@ -1,12 +1,16 @@ # coding: utf-8 # adapted from https://github.com/r9y9/tacotron_pytorch +import logging + import torch from torch import nn from .attentions import init_attn from .common_layers import Prenet +logger = logging.getLogger(__name__) + class BatchNormConv1d(nn.Module): r"""A wrapper for Conv1d with BatchNorm. It sets the activation @@ -480,7 +484,7 @@ def inference(self, inputs): if t > inputs.shape[1] / 4 and (stop_token > 0.6 or attention[:, -1].item() > 0.6): break if t > self.max_decoder_steps: - print(" | > Decoder stopped with 'max_decoder_steps") + logger.info("Decoder stopped with `max_decoder_steps` %d", self.max_decoder_steps) break return self._parse_outputs(outputs, attentions, stop_tokens) diff --git a/TTS/tts/layers/tacotron/tacotron2.py b/TTS/tts/layers/tacotron/tacotron2.py index c79b709972..727bf9ecfd 100644 --- a/TTS/tts/layers/tacotron/tacotron2.py +++ b/TTS/tts/layers/tacotron/tacotron2.py @@ -1,3 +1,5 @@ +import logging + import torch from torch import nn from torch.nn import functional as F @@ -5,6 +7,8 @@ from .attentions import init_attn from .common_layers import Linear, Prenet +logger = logging.getLogger(__name__) + # pylint: disable=no-value-for-parameter # pylint: disable=unexpected-keyword-arg @@ -356,7 +360,7 @@ def inference(self, inputs): if stop_token > self.stop_threshold and t > inputs.shape[0] // 2: break if len(outputs) == self.max_decoder_steps: - print(f" > Decoder stopped with `max_decoder_steps` {self.max_decoder_steps}") + logger.info("Decoder stopped with `max_decoder_steps` %d", self.max_decoder_steps) break memory = self._update_memory(decoder_output) @@ -389,7 +393,7 @@ def inference_truncated(self, inputs): if stop_token > 0.7: break if len(outputs) == self.max_decoder_steps: - print(" | > Decoder stopped with 'max_decoder_steps") + logger.info("Decoder stopped with `max_decoder_steps` %d", self.max_decoder_steps) break self.memory_truncated = decoder_output diff --git a/TTS/tts/layers/tortoise/audio_utils.py b/TTS/tts/layers/tortoise/audio_utils.py index 70711ed7a4..0b8701227b 100644 --- a/TTS/tts/layers/tortoise/audio_utils.py +++ b/TTS/tts/layers/tortoise/audio_utils.py @@ -1,3 +1,4 @@ +import logging import os from glob import glob from typing import Dict, List @@ -10,6 +11,8 @@ from TTS.utils.audio.torch_transforms import TorchSTFT +logger = logging.getLogger(__name__) + def load_wav_to_torch(full_path): sampling_rate, data = read(full_path) @@ -28,7 +31,7 @@ def check_audio(audio, audiopath: str): # Check some assumptions about audio range. This should be automatically fixed in load_wav_to_torch, but might not be in some edge cases, where we should squawk. # '2' is arbitrarily chosen since it seems like audio will often "overdrive" the [-1,1] bounds. if torch.any(audio > 2) or not torch.any(audio < 0): - print(f"Error with {audiopath}. Max={audio.max()} min={audio.min()}") + logger.error("Error with %s. Max=%.2f min=%.2f", audiopath, audio.max(), audio.min()) audio.clip_(-1, 1) @@ -136,7 +139,7 @@ def load_voices(voices: List[str], extra_voice_dirs: List[str] = []): for voice in voices: if voice == "random": if len(voices) > 1: - print("Cannot combine a random voice with a non-random voice. Just using a random voice.") + logger.warning("Cannot combine a random voice with a non-random voice. Just using a random voice.") return None, None clip, latent = load_voice(voice, extra_voice_dirs) if latent is None: diff --git a/TTS/tts/layers/tortoise/dpm_solver.py b/TTS/tts/layers/tortoise/dpm_solver.py index c70888df42..6a1d8ff784 100644 --- a/TTS/tts/layers/tortoise/dpm_solver.py +++ b/TTS/tts/layers/tortoise/dpm_solver.py @@ -1,7 +1,10 @@ +import logging import math import torch +logger = logging.getLogger(__name__) + class NoiseScheduleVP: def __init__( @@ -1171,7 +1174,7 @@ def norm_fn(v): lambda_0 - lambda_s, ) nfe += order - print("adaptive solver nfe", nfe) + logger.debug("adaptive solver nfe %d", nfe) return x def add_noise(self, x, t, noise=None): diff --git a/TTS/tts/layers/tortoise/utils.py b/TTS/tts/layers/tortoise/utils.py index 810a9e7f7a..898121f793 100644 --- a/TTS/tts/layers/tortoise/utils.py +++ b/TTS/tts/layers/tortoise/utils.py @@ -1,8 +1,11 @@ +import logging import os from urllib import request from tqdm import tqdm +logger = logging.getLogger(__name__) + DEFAULT_MODELS_DIR = os.path.join(os.path.expanduser("~"), ".cache", "tortoise", "models") MODELS_DIR = os.environ.get("TORTOISE_MODELS_DIR", DEFAULT_MODELS_DIR) MODELS_DIR = "/data/speech_synth/models/" @@ -28,10 +31,10 @@ def download_models(specific_models=None): model_path = os.path.join(MODELS_DIR, model_name) if os.path.exists(model_path): continue - print(f"Downloading {model_name} from {url}...") + logger.info("Downloading %s from %s...", model_name, url) with tqdm(unit="B", unit_scale=True, unit_divisor=1024, miniters=1) as t: request.urlretrieve(url, model_path, lambda nb, bs, fs, t=t: t.update(nb * bs - t.n)) - print("Done.") + logger.info("Done.") def get_model_path(model_name, models_dir=MODELS_DIR): diff --git a/TTS/tts/layers/xtts/dvae.py b/TTS/tts/layers/xtts/dvae.py index 8598f0b47a..4a37307e74 100644 --- a/TTS/tts/layers/xtts/dvae.py +++ b/TTS/tts/layers/xtts/dvae.py @@ -1,4 +1,5 @@ import functools +import logging from math import sqrt import torch @@ -8,6 +9,8 @@ import torchaudio from einops import rearrange +logger = logging.getLogger(__name__) + def default(val, d): return val if val is not None else d @@ -79,7 +82,7 @@ def forward(self, input, return_soft_codes=False): self.embed_avg = (ea * ~mask + rand_embed).permute(1, 0) self.cluster_size = self.cluster_size * ~mask.squeeze() if torch.any(mask): - print(f"Reset {torch.sum(mask)} embedding codes.") + logger.info("Reset %d embedding codes.", torch.sum(mask)) self.codes = None self.codes_full = False diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 9add7826e6..42f64e6807 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -1,3 +1,5 @@ +import logging + import torch import torchaudio from torch import nn @@ -8,6 +10,8 @@ from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + LRELU_SLOPE = 0.1 @@ -316,7 +320,7 @@ def inference(self, c): return self.forward(c) def remove_weight_norm(self): - print("Removing weight norm...") + logger.info("Removing weight norm...") for l in self.ups: remove_parametrizations(l, "weight") for l in self.resblocks: @@ -390,7 +394,7 @@ def set_init_dict(model_dict, checkpoint_state, c): # Partial initialization: if there is a mismatch with new and old layer, it is skipped. for k, v in checkpoint_state.items(): if k not in model_dict: - print(" | > Layer missing in the model definition: {}".format(k)) + logger.warning("Layer missing in the model definition: %s", k) # 1. filter out unnecessary keys pretrained_dict = {k: v for k, v in checkpoint_state.items() if k in model_dict} # 2. filter out different size layers @@ -401,7 +405,7 @@ def set_init_dict(model_dict, checkpoint_state, c): pretrained_dict = {k: v for k, v in pretrained_dict.items() if reinit_layer_name not in k} # 4. overwrite entries in the existing state dict model_dict.update(pretrained_dict) - print(" | > {} / {} layers are restored.".format(len(pretrained_dict), len(model_dict))) + logger.info("%d / %d layers are restored.", len(pretrained_dict), len(model_dict)) return model_dict @@ -579,13 +583,13 @@ def load_checkpoint( state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) try: self.load_state_dict(state["model"]) - print(" > Model fully restored. ") + logger.info("Model fully restored.") except (KeyError, RuntimeError) as error: # If eval raise the error if eval: raise error - print(" > Partial model initialization.") + logger.info("Partial model initialization.") model_dict = self.state_dict() model_dict = set_init_dict(model_dict, state["model"]) self.load_state_dict(model_dict) @@ -596,7 +600,7 @@ def load_checkpoint( try: criterion.load_state_dict(state["criterion"]) except (KeyError, RuntimeError) as error: - print(" > Criterion load ignored because of:", error) + logger.exception("Criterion load ignored because of: %s", error) if use_cuda: self.cuda() diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 1a3cc47aaf..d4c3f0bbb8 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -1,3 +1,4 @@ +import logging import os import re import textwrap @@ -17,6 +18,8 @@ from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words +logger = logging.getLogger(__name__) + def get_spacy_lang(lang): if lang == "zh": @@ -623,8 +626,10 @@ def check_input_length(self, txt, lang): lang = lang.split("-")[0] # remove the region limit = self.char_limits.get(lang, 250) if len(txt) > limit: - print( - f"[!] Warning: The text length exceeds the character limit of {limit} for language '{lang}', this might cause truncated audio." + logger.warning( + "The text length exceeds the character limit of %d for language '%s', this might cause truncated audio.", + limit, + lang, ) def preprocess_text(self, txt, lang): diff --git a/TTS/tts/layers/xtts/trainer/dataset.py b/TTS/tts/layers/xtts/trainer/dataset.py index 0a19997a47..e598232665 100644 --- a/TTS/tts/layers/xtts/trainer/dataset.py +++ b/TTS/tts/layers/xtts/trainer/dataset.py @@ -1,3 +1,4 @@ +import logging import random import sys @@ -7,6 +8,8 @@ from TTS.tts.models.xtts import load_audio +logger = logging.getLogger(__name__) + torch.set_num_threads(1) @@ -70,13 +73,13 @@ def __init__(self, config, samples, tokenizer, sample_rate, is_eval=False): random.shuffle(self.samples) # order by language self.samples = key_samples_by_col(self.samples, "language") - print(" > Sampling by language:", self.samples.keys()) + logger.info("Sampling by language: %s", self.samples.keys()) else: # for evaluation load and check samples that are corrupted to ensures the reproducibility self.check_eval_samples() def check_eval_samples(self): - print(" > Filtering invalid eval samples!!") + logger.info("Filtering invalid eval samples!!") new_samples = [] for sample in self.samples: try: @@ -92,7 +95,7 @@ def check_eval_samples(self): continue new_samples.append(sample) self.samples = new_samples - print(" > Total eval samples after filtering:", len(self.samples)) + logger.info("Total eval samples after filtering: %d", len(self.samples)) def get_text(self, text, lang): tokens = self.tokenizer.encode(text, lang) @@ -150,7 +153,7 @@ def __getitem__(self, index): # ignore samples that we already know that is not valid ones if sample_id in self.failed_samples: if self.debug_failures: - print(f"Ignoring sample {sample['audio_file']} because it was already ignored before !!") + logger.info("Ignoring sample %s because it was already ignored before !!", sample["audio_file"]) # call get item again to get other sample return self[1] @@ -159,7 +162,7 @@ def __getitem__(self, index): tseq, audiopath, wav, cond, cond_len, cond_idxs = self.load_item(sample) except: if self.debug_failures: - print(f"error loading {sample['audio_file']} {sys.exc_info()}") + logger.warning("Error loading %s %s", sample["audio_file"], sys.exc_info()) self.failed_samples.add(sample_id) return self[1] @@ -172,8 +175,11 @@ def __getitem__(self, index): # Basically, this audio file is nonexistent or too long to be supported by the dataset. # It's hard to handle this situation properly. Best bet is to return the a random valid token and skew the dataset somewhat as a result. if self.debug_failures and wav is not None and tseq is not None: - print( - f"error loading {sample['audio_file']}: ranges are out of bounds; {wav.shape[-1]}, {tseq.shape[0]}" + logger.warning( + "Error loading %s: ranges are out of bounds: %d, %d", + sample["audio_file"], + wav.shape[-1], + tseq.shape[0], ) self.failed_samples.add(sample_id) return self[1] diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index daf9fc7e4f..0f161324f8 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -1,3 +1,4 @@ +import logging from dataclasses import dataclass, field from typing import Dict, List, Tuple, Union @@ -19,6 +20,8 @@ from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + @dataclass class GPTTrainerConfig(XttsConfig): @@ -57,7 +60,7 @@ def callback_clearml_load_save(operation_type, model_info): # return None means skip the file upload/log, returning model_info will continue with the log/upload # you can also change the upload destination file name model_info.upload_filename or check the local file size with Path(model_info.local_model_path).stat().st_size assert operation_type in ("load", "save") - # print(operation_type, model_info.__dict__) + logger.debug("%s %s", operation_type, model_info.__dict__) if "similarities.pth" in model_info.__dict__["local_model_path"]: return None @@ -91,7 +94,7 @@ def __init__(self, config: Coqpit): gpt_checkpoint = torch.load(self.args.gpt_checkpoint, map_location=torch.device("cpu")) # deal with coqui Trainer exported model if "model" in gpt_checkpoint.keys() and "config" in gpt_checkpoint.keys(): - print("Coqui Trainer checkpoint detected! Converting it!") + logger.info("Coqui Trainer checkpoint detected! Converting it!") gpt_checkpoint = gpt_checkpoint["model"] states_keys = list(gpt_checkpoint.keys()) for key in states_keys: @@ -110,7 +113,7 @@ def __init__(self, config: Coqpit): num_new_tokens = ( self.xtts.gpt.text_embedding.weight.shape[0] - gpt_checkpoint["text_embedding.weight"].shape[0] ) - print(f" > Loading checkpoint with {num_new_tokens} additional tokens.") + logger.info("Loading checkpoint with %d additional tokens.", num_new_tokens) # add new tokens to a linear layer (text_head) emb_g = gpt_checkpoint["text_embedding.weight"] @@ -137,7 +140,7 @@ def __init__(self, config: Coqpit): gpt_checkpoint["text_head.bias"] = text_head_bias self.xtts.gpt.load_state_dict(gpt_checkpoint, strict=True) - print(">> GPT weights restored from:", self.args.gpt_checkpoint) + logger.info("GPT weights restored from: %s", self.args.gpt_checkpoint) # Mel spectrogram extractor for conditioning if self.args.gpt_use_perceiver_resampler: @@ -183,7 +186,7 @@ def __init__(self, config: Coqpit): if self.args.dvae_checkpoint: dvae_checkpoint = torch.load(self.args.dvae_checkpoint, map_location=torch.device("cpu")) self.dvae.load_state_dict(dvae_checkpoint, strict=False) - print(">> DVAE weights restored from:", self.args.dvae_checkpoint) + logger.info("DVAE weights restored from: %s", self.args.dvae_checkpoint) else: raise RuntimeError( "You need to specify config.model_args.dvae_checkpoint path to be able to train the GPT decoder!!" @@ -229,7 +232,7 @@ def test_run(self, assets) -> Tuple[Dict, Dict]: # pylint: disable=W0613 # init gpt for inference mode self.xtts.gpt.init_gpt_for_inference(kv_cache=self.args.kv_cache, use_deepspeed=False) self.xtts.gpt.eval() - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") for idx, s_info in enumerate(self.config.test_sentences): wav = self.xtts.synthesize( s_info["text"], diff --git a/TTS/tts/layers/xtts/zh_num2words.py b/TTS/tts/layers/xtts/zh_num2words.py index 7d8f658160..69b8dae952 100644 --- a/TTS/tts/layers/xtts/zh_num2words.py +++ b/TTS/tts/layers/xtts/zh_num2words.py @@ -4,10 +4,13 @@ import argparse import csv +import logging import re import string import sys +logger = logging.getLogger(__name__) + # fmt: off # ================================================================================ # # basic constant @@ -923,12 +926,13 @@ def percentage2chntext(self): def normalize_nsw(raw_text): text = "^" + raw_text + "$" + logger.debug(text) # 规范化日期 pattern = re.compile(r"\D+((([089]\d|(19|20)\d{2})年)?(\d{1,2}月(\d{1,2}[日号])?)?)") matchers = pattern.findall(text) if matchers: - # print('date') + logger.debug("date") for matcher in matchers: text = text.replace(matcher[0], Date(date=matcher[0]).date2chntext(), 1) @@ -936,7 +940,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"\D+((\d+(\.\d+)?)[多余几]?" + CURRENCY_UNITS + r"(\d" + CURRENCY_UNITS + r"?)?)") matchers = pattern.findall(text) if matchers: - # print('money') + logger.debug("money") for matcher in matchers: text = text.replace(matcher[0], Money(money=matcher[0]).money2chntext(), 1) @@ -949,14 +953,14 @@ def normalize_nsw(raw_text): pattern = re.compile(r"\D((\+?86 ?)?1([38]\d|5[0-35-9]|7[678]|9[89])\d{8})\D") matchers = pattern.findall(text) if matchers: - # print('telephone') + logger.debug("telephone") for matcher in matchers: text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(), 1) # 固话 pattern = re.compile(r"\D((0(10|2[1-3]|[3-9]\d{2})-?)?[1-9]\d{6,7})\D") matchers = pattern.findall(text) if matchers: - # print('fixed telephone') + logger.debug("fixed telephone") for matcher in matchers: text = text.replace(matcher[0], TelePhone(telephone=matcher[0]).telephone2chntext(fixed=True), 1) @@ -964,7 +968,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"(\d+/\d+)") matchers = pattern.findall(text) if matchers: - # print('fraction') + logger.debug("fraction") for matcher in matchers: text = text.replace(matcher, Fraction(fraction=matcher).fraction2chntext(), 1) @@ -973,7 +977,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"(\d+(\.\d+)?%)") matchers = pattern.findall(text) if matchers: - # print('percentage') + logger.debug("percentage") for matcher in matchers: text = text.replace(matcher[0], Percentage(percentage=matcher[0]).percentage2chntext(), 1) @@ -981,7 +985,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"(\d+(\.\d+)?)[多余几]?" + COM_QUANTIFIERS) matchers = pattern.findall(text) if matchers: - # print('cardinal+quantifier') + logger.debug("cardinal+quantifier") for matcher in matchers: text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1) @@ -989,7 +993,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"(\d{4,32})") matchers = pattern.findall(text) if matchers: - # print('digit') + logger.debug("digit") for matcher in matchers: text = text.replace(matcher, Digit(digit=matcher).digit2chntext(), 1) @@ -997,7 +1001,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"(\d+(\.\d+)?)") matchers = pattern.findall(text) if matchers: - # print('cardinal') + logger.debug("cardinal") for matcher in matchers: text = text.replace(matcher[0], Cardinal(cardinal=matcher[0]).cardinal2chntext(), 1) @@ -1005,7 +1009,7 @@ def normalize_nsw(raw_text): pattern = re.compile(r"(([a-zA-Z]+)二([a-zA-Z]+))") matchers = pattern.findall(text) if matchers: - # print('particular') + logger.debug("particular") for matcher in matchers: text = text.replace(matcher[0], matcher[1] + "2" + matcher[2], 1) @@ -1103,7 +1107,7 @@ def __call__(self, text): if self.check_chars: for c in text: if not IN_VALID_CHARS.get(c): - print(f"WARNING: illegal char {c} in: {text}", file=sys.stderr) + logger.warning("Illegal char %s in: %s", c, text) return "" if self.remove_space: diff --git a/TTS/tts/models/__init__.py b/TTS/tts/models/__init__.py index 2bd2e5f087..ebfa171c80 100644 --- a/TTS/tts/models/__init__.py +++ b/TTS/tts/models/__init__.py @@ -1,10 +1,13 @@ +import logging from typing import Dict, List, Union from TTS.utils.generic_utils import find_module +logger = logging.getLogger(__name__) + def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseTTS": - print(" > Using model: {}".format(config.model)) + logger.info("Using model: %s", config.model) # fetch the right model implementation. if "base_model" in config and config["base_model"] is not None: MyModel = find_module("TTS.tts.models", config.base_model.lower()) diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py index f38dace235..33e1c11ab7 100644 --- a/TTS/tts/models/base_tacotron.py +++ b/TTS/tts/models/base_tacotron.py @@ -1,4 +1,5 @@ import copy +import logging from abc import abstractmethod from typing import Dict, Tuple @@ -17,6 +18,8 @@ from TTS.utils.io import load_fsspec from TTS.utils.training import gradual_training_scheduler +logger = logging.getLogger(__name__) + class BaseTacotron(BaseTTS): """Base class shared by Tacotron and Tacotron2""" @@ -116,7 +119,7 @@ def load_checkpoint( self.decoder.set_r(config.r) if eval: self.eval() - print(f" > Model's reduction rate `r` is set to: {self.decoder.r}") + logger.info("Model's reduction rate `r` is set to: %d", self.decoder.r) assert not self.training def get_criterion(self) -> nn.Module: @@ -148,7 +151,7 @@ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: Returns: Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. """ - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") test_audios = {} test_figures = {} test_sentences = self.config.test_sentences @@ -302,4 +305,4 @@ def on_epoch_start(self, trainer): self.decoder.set_r(r) if trainer.config.bidirectional_decoder: trainer.model.decoder_backward.set_r(r) - print(f"\n > Number of output frames: {self.decoder.r}") + logger.info("Number of output frames: %d", self.decoder.r) diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index 0aa5edc647..dd0082315a 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -1,3 +1,4 @@ +import logging import os import random from typing import Dict, List, Tuple, Union @@ -18,6 +19,8 @@ from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +logger = logging.getLogger(__name__) + # pylint: skip-file @@ -105,7 +108,7 @@ def init_multispeaker(self, config: Coqpit, data: List = None): ) # init speaker embedding layer if config.use_speaker_embedding and not config.use_d_vector_file: - print(" > Init speaker_embedding layer.") + logger.info("Init speaker_embedding layer.") self.speaker_embedding = nn.Embedding(self.num_speakers, self.embedded_speaker_dim) self.speaker_embedding.weight.data.normal_(0, 0.3) @@ -245,12 +248,12 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): if getattr(config, "use_language_weighted_sampler", False): alpha = getattr(config, "language_weighted_sampler_alpha", 1.0) - print(" > Using Language weighted sampler with alpha:", alpha) + logger.info("Using Language weighted sampler with alpha: %.2f", alpha) weights = get_language_balancer_weights(data_items) * alpha if getattr(config, "use_speaker_weighted_sampler", False): alpha = getattr(config, "speaker_weighted_sampler_alpha", 1.0) - print(" > Using Speaker weighted sampler with alpha:", alpha) + logger.info("Using Speaker weighted sampler with alpha: %.2f", alpha) if weights is not None: weights += get_speaker_balancer_weights(data_items) * alpha else: @@ -258,7 +261,7 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): if getattr(config, "use_length_weighted_sampler", False): alpha = getattr(config, "length_weighted_sampler_alpha", 1.0) - print(" > Using Length weighted sampler with alpha:", alpha) + logger.info("Using Length weighted sampler with alpha: %.2f", alpha) if weights is not None: weights += get_length_balancer_weights(data_items) * alpha else: @@ -390,7 +393,7 @@ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: Returns: Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. """ - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") test_audios = {} test_figures = {} test_sentences = self.config.test_sentences @@ -429,8 +432,8 @@ def on_init_start(self, trainer): if hasattr(trainer.config, "model_args"): trainer.config.model_args.speakers_file = output_path trainer.config.save_json(os.path.join(trainer.output_path, "config.json")) - print(f" > `speakers.pth` is saved to {output_path}.") - print(" > `speakers_file` is updated in the config.json.") + logger.info("`speakers.pth` is saved to: %s", output_path) + logger.info("`speakers_file` is updated in the config.json.") if self.language_manager is not None: output_path = os.path.join(trainer.output_path, "language_ids.json") @@ -439,8 +442,8 @@ def on_init_start(self, trainer): if hasattr(trainer.config, "model_args"): trainer.config.model_args.language_ids_file = output_path trainer.config.save_json(os.path.join(trainer.output_path, "config.json")) - print(f" > `language_ids.json` is saved to {output_path}.") - print(" > `language_ids_file` is updated in the config.json.") + logger.info("`language_ids.json` is saved to: %s", output_path) + logger.info("`language_ids_file` is updated in the config.json.") class BaseTTSE2E(BaseTTS): diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index a4aa563f48..91ef9a691f 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -1,3 +1,4 @@ +import logging import os from dataclasses import dataclass, field from itertools import chain @@ -36,6 +37,8 @@ from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results +logger = logging.getLogger(__name__) + def id_to_torch(aux_id, cuda=False): if aux_id is not None: @@ -162,9 +165,9 @@ def _wav_to_spec(y, n_fft, hop_length, win_length, center=False): y = y.squeeze(1) if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) + logger.info("min value is %.3f", torch.min(y)) if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) + logger.info("max value is %.3f", torch.max(y)) global hann_window # pylint: disable=global-statement dtype_device = str(y.dtype) + "_" + str(y.device) @@ -253,9 +256,9 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm y = y.squeeze(1) if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) + logger.info("min value is %.3f", torch.min(y)) if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) + logger.info("max value is %.3f", torch.max(y)) global mel_basis, hann_window # pylint: disable=global-statement mel_basis_key = name_mel_basis(y, n_fft, fmax) @@ -408,7 +411,7 @@ def __getitem__(self, idx): try: token_ids = self.get_token_ids(idx, item["text"]) except: - print(idx, item) + logger.exception("%s %s", idx, item) # pylint: disable=raise-missing-from raise OSError f0 = None @@ -773,7 +776,7 @@ def init_multispeaker(self, config: Coqpit): def _init_speaker_embedding(self): # pylint: disable=attribute-defined-outside-init if self.num_speakers > 0: - print(" > initialization of speaker-embedding layers.") + logger.info("Initialization of speaker-embedding layers.") self.embedded_speaker_dim = self.args.speaker_embedding_channels self.args.embedded_speaker_dim = self.args.speaker_embedding_channels @@ -1291,7 +1294,7 @@ def test_run(self, assets) -> Tuple[Dict, Dict]: Returns: Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. """ - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") test_audios = {} test_figures = {} test_sentences = self.config.test_sentences @@ -1405,14 +1408,14 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): data_items = dataset.samples if getattr(config, "use_weighted_sampler", False): for attr_name, alpha in config.weighted_sampler_attrs.items(): - print(f" > Using weighted sampler for attribute '{attr_name}' with alpha '{alpha}'") + logger.info("Using weighted sampler for attribute '%s' with alpha %.2f", attr_name, alpha) multi_dict = config.weighted_sampler_multipliers.get(attr_name, None) - print(multi_dict) + logger.info(multi_dict) weights, attr_names, attr_weights = get_attribute_balancer_weights( attr_name=attr_name, items=data_items, multi_dict=multi_dict ) weights = weights * alpha - print(f" > Attribute weights for '{attr_names}' \n | > {attr_weights}") + logger.info("Attribute weights for '%s' \n | > %s", attr_names, attr_weights) if weights is not None: sampler = WeightedRandomSampler(weights, len(weights)) diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index 1d3a13d433..b108a554d5 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -1,3 +1,4 @@ +import logging from dataclasses import dataclass, field from typing import Dict, List, Tuple, Union @@ -18,6 +19,8 @@ from TTS.tts.utils.visual import plot_alignment, plot_avg_energy, plot_avg_pitch, plot_spectrogram from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + @dataclass class ForwardTTSArgs(Coqpit): @@ -303,7 +306,7 @@ def init_multispeaker(self, config: Coqpit): self.proj_g = nn.Linear(in_features=self.args.d_vector_dim, out_features=self.args.hidden_channels) # init speaker embedding layer if config.use_speaker_embedding and not config.use_d_vector_file: - print(" > Init speaker_embedding layer.") + logger.info("Init speaker_embedding layer.") self.emb_g = nn.Embedding(self.num_speakers, self.args.hidden_channels) nn.init.uniform_(self.emb_g.weight, -0.1, 0.1) diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index bfd1a2b618..90bc9f2ece 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -1,3 +1,4 @@ +import logging import math from typing import Dict, List, Tuple, Union @@ -18,6 +19,8 @@ from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + class GlowTTS(BaseTTS): """GlowTTS model. @@ -127,7 +130,7 @@ def init_multispeaker(self, config: Coqpit): ), " [!] d-vector dimension mismatch b/w config and speaker manager." # init speaker embedding layer if config.use_speaker_embedding and not config.use_d_vector_file: - print(" > Init speaker_embedding layer.") + logger.info("Init speaker_embedding layer.") self.embedded_speaker_dim = self.hidden_channels_enc self.emb_g = nn.Embedding(self.num_speakers, self.hidden_channels_enc) nn.init.uniform_(self.emb_g.weight, -0.1, 0.1) @@ -479,13 +482,13 @@ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: Returns: Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. """ - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") test_audios = {} test_figures = {} test_sentences = self.config.test_sentences aux_inputs = self._get_test_aux_input() if len(test_sentences) == 0: - print(" | [!] No test sentences provided.") + logger.warning("No test sentences provided.") else: for idx, sen in enumerate(test_sentences): outputs = synthesis( diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index e241410872..6158d30382 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -1,3 +1,4 @@ +import logging import os from typing import Dict, List, Union @@ -19,6 +20,8 @@ from TTS.utils.generic_utils import format_aux_input from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + class NeuralhmmTTS(BaseTTS): """Neural HMM TTS model. @@ -266,14 +269,17 @@ def on_init_start(self, trainer): dataloader = trainer.get_train_dataloader( training_assets=None, samples=trainer.train_samples, verbose=False ) - print( - f" | > Data parameters not found for: {trainer.config.mel_statistics_parameter_path}. Computing mel normalization parameters..." + logger.info( + "Data parameters not found for: %s. Computing mel normalization parameters...", + trainer.config.mel_statistics_parameter_path, ) data_mean, data_std, init_transition_prob = OverflowUtils.get_data_parameters_for_flat_start( dataloader, trainer.config.out_channels, trainer.config.state_per_phone ) - print( - f" | > Saving data parameters to: {trainer.config.mel_statistics_parameter_path}: value: {data_mean, data_std, init_transition_prob}" + logger.info( + "Saving data parameters to: %s: value: %s", + trainer.config.mel_statistics_parameter_path, + (data_mean, data_std, init_transition_prob), ) statistics = { "mean": data_mean.item(), @@ -283,8 +289,9 @@ def on_init_start(self, trainer): torch.save(statistics, trainer.config.mel_statistics_parameter_path) else: - print( - f" | > Data parameters found for: {trainer.config.mel_statistics_parameter_path}. Loading mel normalization parameters..." + logger.info( + "Data parameters found for: %s. Loading mel normalization parameters...", + trainer.config.mel_statistics_parameter_path, ) statistics = torch.load(trainer.config.mel_statistics_parameter_path) data_mean, data_std, init_transition_prob = ( @@ -292,7 +299,7 @@ def on_init_start(self, trainer): statistics["std"], statistics["init_transition_prob"], ) - print(f" | > Data parameters loaded with value: {data_mean, data_std, init_transition_prob}") + logger.info("Data parameters loaded with value: %s", (data_mean, data_std, init_transition_prob)) trainer.config.flat_start_params["transition_p"] = ( init_transition_prob.item() if torch.is_tensor(init_transition_prob) else init_transition_prob @@ -318,7 +325,7 @@ def _create_logs(self, batch, outputs, ap): # pylint: disable=no-self-use, unus } # sample one item from the batch -1 will give the smalles item - print(" | > Synthesising audio from the model...") + logger.info("Synthesising audio from the model...") inference_output = self.inference( batch["text_input"][-1].unsqueeze(0), aux_input={"x_lengths": batch["text_lengths"][-1].unsqueeze(0)} ) diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index 92b3c767de..cc0c5cd3f0 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -1,3 +1,4 @@ +import logging import os from typing import Dict, List, Union @@ -20,6 +21,8 @@ from TTS.utils.generic_utils import format_aux_input from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + class Overflow(BaseTTS): """OverFlow TTS model. @@ -282,14 +285,17 @@ def on_init_start(self, trainer): dataloader = trainer.get_train_dataloader( training_assets=None, samples=trainer.train_samples, verbose=False ) - print( - f" | > Data parameters not found for: {trainer.config.mel_statistics_parameter_path}. Computing mel normalization parameters..." + logger.info( + "Data parameters not found for: %s. Computing mel normalization parameters...", + trainer.config.mel_statistics_parameter_path, ) data_mean, data_std, init_transition_prob = OverflowUtils.get_data_parameters_for_flat_start( dataloader, trainer.config.out_channels, trainer.config.state_per_phone ) - print( - f" | > Saving data parameters to: {trainer.config.mel_statistics_parameter_path}: value: {data_mean, data_std, init_transition_prob}" + logger.info( + "Saving data parameters to: %s: value: %s", + trainer.config.mel_statistics_parameter_path, + (data_mean, data_std, init_transition_prob), ) statistics = { "mean": data_mean.item(), @@ -299,8 +305,9 @@ def on_init_start(self, trainer): torch.save(statistics, trainer.config.mel_statistics_parameter_path) else: - print( - f" | > Data parameters found for: {trainer.config.mel_statistics_parameter_path}. Loading mel normalization parameters..." + logger.info( + "Data parameters found for: %s. Loading mel normalization parameters...", + trainer.config.mel_statistics_parameter_path, ) statistics = torch.load(trainer.config.mel_statistics_parameter_path) data_mean, data_std, init_transition_prob = ( @@ -308,7 +315,7 @@ def on_init_start(self, trainer): statistics["std"], statistics["init_transition_prob"], ) - print(f" | > Data parameters loaded with value: {data_mean, data_std, init_transition_prob}") + logger.info("Data parameters loaded with value: %s", (data_mean, data_std, init_transition_prob)) trainer.config.flat_start_params["transition_p"] = ( init_transition_prob.item() if torch.is_tensor(init_transition_prob) else init_transition_prob @@ -334,7 +341,7 @@ def _create_logs(self, batch, outputs, ap): # pylint: disable=no-self-use, unus } # sample one item from the batch -1 will give the smalles item - print(" | > Synthesising audio from the model...") + logger.info("Synthesising audio from the model...") inference_output = self.inference( batch["text_input"][-1].unsqueeze(0), aux_input={"x_lengths": batch["text_lengths"][-1].unsqueeze(0)} ) diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py index 99e0107fdf..17303c69f7 100644 --- a/TTS/tts/models/tortoise.py +++ b/TTS/tts/models/tortoise.py @@ -1,3 +1,4 @@ +import logging import os import random from contextlib import contextmanager @@ -23,6 +24,8 @@ from TTS.tts.layers.tortoise.wav2vec_alignment import Wav2VecAlignment from TTS.tts.models.base_tts import BaseTTS +logger = logging.getLogger(__name__) + def pad_or_truncate(t, length): """ @@ -100,7 +103,7 @@ def fix_autoregressive_output(codes, stop_token, complain=True): stop_token_indices = (codes == stop_token).nonzero() if len(stop_token_indices) == 0: if complain: - print( + logger.warning( "No stop tokens found in one of the generated voice clips. This typically means the spoken audio is " "too long. In some cases, the output will still be good, though. Listen to it and if it is missing words, " "try breaking up your input text." @@ -713,8 +716,7 @@ def inference( 83 # This is the token for coding silence, which is fixed in place with "fix_autoregressive_output" ) self.autoregressive = self.autoregressive.to(self.device) - if verbose: - print("Generating autoregressive samples..") + logger.info("Generating autoregressive samples..") with ( self.temporary_cuda(self.autoregressive) as autoregressive, torch.autocast(device_type="cuda", dtype=torch.float16, enabled=half), @@ -775,8 +777,7 @@ def inference( ) del auto_conditioning - if verbose: - print("Transforming autoregressive outputs into audio..") + logger.info("Transforming autoregressive outputs into audio..") wav_candidates = [] for b in range(best_results.shape[0]): codes = best_results[b].unsqueeze(0) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 9bc743b213..eea9b59eb7 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1,3 +1,4 @@ +import logging import math import os from dataclasses import dataclass, field, replace @@ -38,6 +39,8 @@ from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results +logger = logging.getLogger(__name__) + ############################## # IO / Feature extraction ############################## @@ -104,9 +107,9 @@ def wav_to_spec(y, n_fft, hop_length, win_length, center=False): y = y.squeeze(1) if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) + logger.info("min value is %.3f", torch.min(y)) if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) + logger.info("max value is %.3f", torch.max(y)) global hann_window dtype_device = str(y.dtype) + "_" + str(y.device) @@ -170,9 +173,9 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm y = y.squeeze(1) if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) + logger.info("min value is %.3f", torch.min(y)) if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) + logger.info("max value is %.3f", torch.max(y)) global mel_basis, hann_window dtype_device = str(y.dtype) + "_" + str(y.device) @@ -764,7 +767,7 @@ def init_multispeaker(self, config: Coqpit): ) self.speaker_manager.encoder.eval() - print(" > External Speaker Encoder Loaded !!") + logger.info("External Speaker Encoder Loaded !!") if ( hasattr(self.speaker_manager.encoder, "audio_config") @@ -778,7 +781,7 @@ def init_multispeaker(self, config: Coqpit): def _init_speaker_embedding(self): # pylint: disable=attribute-defined-outside-init if self.num_speakers > 0: - print(" > initialization of speaker-embedding layers.") + logger.info("Initialization of speaker-embedding layers.") self.embedded_speaker_dim = self.args.speaker_embedding_channels self.emb_g = nn.Embedding(self.num_speakers, self.embedded_speaker_dim) @@ -798,7 +801,7 @@ def init_multilingual(self, config: Coqpit): self.language_manager = LanguageManager(language_ids_file_path=config.language_ids_file) if self.args.use_language_embedding and self.language_manager: - print(" > initialization of language-embedding layers.") + logger.info("Initialization of language-embedding layers.") self.num_languages = self.language_manager.num_languages self.embedded_language_dim = self.args.embedded_language_dim self.emb_l = nn.Embedding(self.num_languages, self.embedded_language_dim) @@ -833,7 +836,7 @@ def on_init_end(self, trainer): # pylint: disable=W0613 for key, value in after_dict.items(): if value == before_dict[key]: raise RuntimeError(" [!] The weights of Duration Predictor was not reinit check it !") - print(" > Duration Predictor was reinit.") + logger.info("Duration Predictor was reinit.") if self.args.reinit_text_encoder: before_dict = get_module_weights_sum(self.text_encoder) @@ -843,7 +846,7 @@ def on_init_end(self, trainer): # pylint: disable=W0613 for key, value in after_dict.items(): if value == before_dict[key]: raise RuntimeError(" [!] The weights of Text Encoder was not reinit check it !") - print(" > Text Encoder was reinit.") + logger.info("Text Encoder was reinit.") def get_aux_input(self, aux_input: Dict): sid, g, lid, _ = self._set_cond_input(aux_input) @@ -1437,7 +1440,7 @@ def test_run(self, assets) -> Tuple[Dict, Dict]: Returns: Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. """ - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") test_audios = {} test_figures = {} test_sentences = self.config.test_sentences @@ -1554,14 +1557,14 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1, is_eval=F data_items = dataset.samples if getattr(config, "use_weighted_sampler", False): for attr_name, alpha in config.weighted_sampler_attrs.items(): - print(f" > Using weighted sampler for attribute '{attr_name}' with alpha '{alpha}'") + logger.info("Using weighted sampler for attribute '%s' with alpha %.3f", attr_name, alpha) multi_dict = config.weighted_sampler_multipliers.get(attr_name, None) - print(multi_dict) + logger.info(multi_dict) weights, attr_names, attr_weights = get_attribute_balancer_weights( attr_name=attr_name, items=data_items, multi_dict=multi_dict ) weights = weights * alpha - print(f" > Attribute weights for '{attr_names}' \n | > {attr_weights}") + logger.info("Attribute weights for '%s' \n | > %s", attr_names, attr_weights) # input_audio_lenghts = [os.path.getsize(x["audio_file"]) for x in data_items] @@ -1719,7 +1722,7 @@ def load_checkpoint( # handle fine-tuning from a checkpoint with additional speakers if hasattr(self, "emb_g") and state["model"]["emb_g.weight"].shape != self.emb_g.weight.shape: num_new_speakers = self.emb_g.weight.shape[0] - state["model"]["emb_g.weight"].shape[0] - print(f" > Loading checkpoint with {num_new_speakers} additional speakers.") + logger.info("Loading checkpoint with %d additional speakers.", num_new_speakers) emb_g = state["model"]["emb_g.weight"] new_row = torch.randn(num_new_speakers, emb_g.shape[1]) emb_g = torch.cat([emb_g, new_row], axis=0) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 1c73c42ce9..df49cf54fd 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -1,3 +1,4 @@ +import logging import os from dataclasses import dataclass @@ -15,6 +16,8 @@ from TTS.tts.models.base_tts import BaseTTS from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + init_stream_support() @@ -82,7 +85,7 @@ def load_audio(audiopath, sampling_rate): # Check some assumptions about audio range. This should be automatically fixed in load_wav_to_torch, but might not be in some edge cases, where we should squawk. # '10' is arbitrarily chosen since it seems like audio will often "overdrive" the [-1,1] bounds. if torch.any(audio > 10) or not torch.any(audio < 0): - print(f"Error with {audiopath}. Max={audio.max()} min={audio.min()}") + logger.error("Error with %s. Max=%.2f min=%.2f", audiopath, audio.max(), audio.min()) # clip audio invalid values audio.clip_(-1, 1) return audio diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py index e49695268d..5229af81c5 100644 --- a/TTS/tts/utils/speakers.py +++ b/TTS/tts/utils/speakers.py @@ -1,4 +1,5 @@ import json +import logging import os from typing import Any, Dict, List, Union @@ -10,6 +11,8 @@ from TTS.config import get_from_config_or_model_args_with_default from TTS.tts.utils.managers import EmbeddingManager +logger = logging.getLogger(__name__) + class SpeakerManager(EmbeddingManager): """Manage the speakers for multi-speaker 🐸TTS models. Load a datafile and parse the information @@ -170,7 +173,9 @@ def get_speaker_manager(c: Coqpit, data: List = None, restore_path: str = None, if c.use_d_vector_file: # restore speaker manager with the embedding file if not os.path.exists(speakers_file): - print("WARNING: speakers.json was not found in restore_path, trying to use CONFIG.d_vector_file") + logger.warning( + "speakers.json was not found in %s, trying to use CONFIG.d_vector_file", restore_path + ) if not os.path.exists(c.d_vector_file): raise RuntimeError( "You must copy the file speakers.json to restore_path, or set a valid file in CONFIG.d_vector_file" @@ -193,16 +198,16 @@ def get_speaker_manager(c: Coqpit, data: List = None, restore_path: str = None, speaker_manager.load_ids_from_file(c.speakers_file) if speaker_manager.num_speakers > 0: - print( - " > Speaker manager is loaded with {} speakers: {}".format( - speaker_manager.num_speakers, ", ".join(speaker_manager.name_to_id) - ) + logger.info( + "Speaker manager is loaded with %d speakers: %s", + speaker_manager.num_speakers, + ", ".join(speaker_manager.name_to_id), ) # save file if path is defined if out_path: out_file_path = os.path.join(out_path, "speakers.json") - print(f" > Saving `speakers.json` to {out_file_path}.") + logger.info("Saving `speakers.json` to %s", out_file_path) if c.use_d_vector_file and c.d_vector_file: speaker_manager.save_embeddings_to_file(out_file_path) else: diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py index 37c7a7ca23..c622b93c59 100644 --- a/TTS/tts/utils/text/characters.py +++ b/TTS/tts/utils/text/characters.py @@ -1,8 +1,11 @@ +import logging from dataclasses import replace from typing import Dict from TTS.tts.configs.shared_configs import CharactersConfig +logger = logging.getLogger(__name__) + def parse_symbols(): return { @@ -305,14 +308,14 @@ def print_log(self, level: int = 0): Prints the vocabulary in a nice format. """ indent = "\t" * level - print(f"{indent}| > Characters: {self._characters}") - print(f"{indent}| > Punctuations: {self._punctuations}") - print(f"{indent}| > Pad: {self._pad}") - print(f"{indent}| > EOS: {self._eos}") - print(f"{indent}| > BOS: {self._bos}") - print(f"{indent}| > Blank: {self._blank}") - print(f"{indent}| > Vocab: {self.vocab}") - print(f"{indent}| > Num chars: {self.num_chars}") + logger.info("%s| Characters: %s", indent, self._characters) + logger.info("%s| Punctuations: %s", indent, self._punctuations) + logger.info("%s| Pad: %s", indent, self._pad) + logger.info("%s| EOS: %s", indent, self._eos) + logger.info("%s| BOS: %s", indent, self._bos) + logger.info("%s| Blank: %s", indent, self._blank) + logger.info("%s| Vocab: %s", indent, self.vocab) + logger.info("%s| Num chars: %d", indent, self.num_chars) @staticmethod def init_from_config(config: "Coqpit"): # pylint: disable=unused-argument diff --git a/TTS/tts/utils/text/phonemizers/base.py b/TTS/tts/utils/text/phonemizers/base.py index 4fc7987415..5e701df458 100644 --- a/TTS/tts/utils/text/phonemizers/base.py +++ b/TTS/tts/utils/text/phonemizers/base.py @@ -1,8 +1,11 @@ import abc +import logging from typing import List, Tuple from TTS.tts.utils.text.punctuation import Punctuation +logger = logging.getLogger(__name__) + class BasePhonemizer(abc.ABC): """Base phonemizer class @@ -136,5 +139,5 @@ def phonemize(self, text: str, separator="|", language: str = None) -> str: # p def print_logs(self, level: int = 0): indent = "\t" * level - print(f"{indent}| > phoneme language: {self.language}") - print(f"{indent}| > phoneme backend: {self.name()}") + logger.info("%s| phoneme language: %s", indent, self.language) + logger.info("%s| phoneme backend: %s", indent, self.name()) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 328e52f369..d1d2335037 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -8,6 +8,8 @@ from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.punctuation import Punctuation +logger = logging.getLogger(__name__) + def is_tool(name): from shutil import which @@ -53,7 +55,7 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: "1", # UTF8 text encoding ] cmd.extend(args) - logging.debug("espeakng: executing %s", repr(cmd)) + logger.debug("espeakng: executing %s", repr(cmd)) with subprocess.Popen( cmd, @@ -189,7 +191,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str: # compute phonemes phonemes = "" for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True): - logging.debug("line: %s", repr(line)) + logger.debug("line: %s", repr(line)) ph_decoded = line.decode("utf8").strip() # espeak: # version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" @@ -227,7 +229,7 @@ def supported_languages() -> Dict: lang_code = cols[1] lang_name = cols[3] langs[lang_code] = lang_name - logging.debug("line: %s", repr(line)) + logger.debug("line: %s", repr(line)) count += 1 return langs @@ -240,7 +242,7 @@ def version(self) -> str: args = ["--version"] for line in _espeak_exe(self.backend, args, sync=True): version = line.decode("utf8").strip().split()[2] - logging.debug("line: %s", repr(line)) + logger.debug("line: %s", repr(line)) return version @classmethod diff --git a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py index 62a9c39322..1a9e98b091 100644 --- a/TTS/tts/utils/text/phonemizers/multi_phonemizer.py +++ b/TTS/tts/utils/text/phonemizers/multi_phonemizer.py @@ -1,7 +1,10 @@ +import logging from typing import Dict, List from TTS.tts.utils.text.phonemizers import DEF_LANG_TO_PHONEMIZER, get_phonemizer_by_name +logger = logging.getLogger(__name__) + class MultiPhonemizer: """🐸TTS multi-phonemizer that operates phonemizers for multiple langugages @@ -46,8 +49,8 @@ def supported_languages(self) -> List: def print_logs(self, level: int = 0): indent = "\t" * level - print(f"{indent}| > phoneme language: {self.supported_languages()}") - print(f"{indent}| > phoneme backend: {self.name()}") + logger.info("%s| phoneme language: %s", indent, self.supported_languages()) + logger.info("%s| phoneme backend: %s", indent, self.name()) # if __name__ == "__main__": diff --git a/TTS/tts/utils/text/tokenizer.py b/TTS/tts/utils/text/tokenizer.py index b7faf86e8a..9aff7dd4bb 100644 --- a/TTS/tts/utils/text/tokenizer.py +++ b/TTS/tts/utils/text/tokenizer.py @@ -1,3 +1,4 @@ +import logging from typing import Callable, Dict, List, Union from TTS.tts.utils.text import cleaners @@ -6,6 +7,8 @@ from TTS.tts.utils.text.phonemizers.multi_phonemizer import MultiPhonemizer from TTS.utils.generic_utils import get_import_path, import_class +logger = logging.getLogger(__name__) + class TTSTokenizer: """🐸TTS tokenizer to convert input characters to token IDs and back. @@ -73,8 +76,8 @@ def encode(self, text: str) -> List[int]: # discard but store not found characters if char not in self.not_found_characters: self.not_found_characters.append(char) - print(text) - print(f" [!] Character {repr(char)} not found in the vocabulary. Discarding it.") + logger.warning(text) + logger.warning("Character %s not found in the vocabulary. Discarding it.", repr(char)) return token_ids def decode(self, token_ids: List[int]) -> str: @@ -135,16 +138,16 @@ def intersperse_blank_char(self, char_sequence: List[str], use_blank_char: bool def print_logs(self, level: int = 0): indent = "\t" * level - print(f"{indent}| > add_blank: {self.add_blank}") - print(f"{indent}| > use_eos_bos: {self.use_eos_bos}") - print(f"{indent}| > use_phonemes: {self.use_phonemes}") + logger.info("%s| add_blank: %s", indent, self.add_blank) + logger.info("%s| use_eos_bos: %s", indent, self.use_eos_bos) + logger.info("%s| use_phonemes: %s", indent, self.use_phonemes) if self.use_phonemes: - print(f"{indent}| > phonemizer:") + logger.info("%s| phonemizer:", indent) self.phonemizer.print_logs(level + 1) if len(self.not_found_characters) > 0: - print(f"{indent}| > {len(self.not_found_characters)} not found characters:") + logger.info("%s| %d characters not found:", indent, len(self.not_found_characters)) for char in self.not_found_characters: - print(f"{indent}| > {char}") + logger.info("%s| %s", indent, char) @staticmethod def init_from_config(config: "Coqpit", characters: "BaseCharacters" = None): diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index af88569fc3..4a8972480c 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -1,3 +1,4 @@ +import logging from io import BytesIO from typing import Tuple @@ -7,6 +8,8 @@ import soundfile as sf from librosa import magphase, pyin +logger = logging.getLogger(__name__) + # For using kwargs # pylint: disable=unused-argument @@ -222,7 +225,7 @@ def griffin_lim(*, spec: np.ndarray = None, num_iter=60, **kwargs) -> np.ndarray S_complex = np.abs(spec).astype(complex) y = istft(y=S_complex * angles, **kwargs) if not np.isfinite(y).all(): - print(" [!] Waveform is not finite everywhere. Skipping the GL.") + logger.warning("Waveform is not finite everywhere. Skipping the GL.") return np.array([0.0]) for _ in range(num_iter): angles = np.exp(1j * np.angle(stft(y=y, **kwargs))) diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index c53bad562e..e2eb924e57 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -1,3 +1,4 @@ +import logging from io import BytesIO from typing import Dict, Tuple @@ -26,6 +27,8 @@ volume_norm, ) +logger = logging.getLogger(__name__) + # pylint: disable=too-many-public-methods @@ -229,9 +232,9 @@ def __init__( ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" members = vars(self) if verbose: - print(" > Setting up Audio Processor...") + logger.info("Setting up Audio Processor...") for key, value in members.items(): - print(" | > {}:{}".format(key, value)) + logger.info(" | %s: %s", key, value) # create spectrogram utils self.mel_basis = build_mel_basis( sample_rate=self.sample_rate, @@ -595,7 +598,7 @@ def load_wav(self, filename: str, sr: int = None) -> np.ndarray: try: x = self.trim_silence(x) except ValueError: - print(f" [!] File cannot be trimmed for silence - {filename}") + logger.exception("File cannot be trimmed for silence - %s", filename) if self.do_sound_norm: x = self.sound_norm(x) if self.do_rms_norm: diff --git a/TTS/utils/download.py b/TTS/utils/download.py index 37e6ed3cee..e94b1d68c8 100644 --- a/TTS/utils/download.py +++ b/TTS/utils/download.py @@ -12,6 +12,8 @@ from torch.utils.model_zoo import tqdm +logger = logging.getLogger(__name__) + def stream_url( url: str, start_byte: Optional[int] = None, block_size: int = 32 * 1024, progress_bar: bool = True @@ -149,20 +151,20 @@ def extract_archive(from_path: str, to_path: Optional[str] = None, overwrite: bo Returns: list: List of paths to extracted files even if not overwritten. """ - + logger.info("Extracting archive file...") if to_path is None: to_path = os.path.dirname(from_path) try: with tarfile.open(from_path, "r") as tar: - logging.info("Opened tar file %s.", from_path) + logger.info("Opened tar file %s.", from_path) files = [] for file_ in tar: # type: Any file_path = os.path.join(to_path, file_.name) if file_.isfile(): files.append(file_path) if os.path.exists(file_path): - logging.info("%s already extracted.", file_path) + logger.info("%s already extracted.", file_path) if not overwrite: continue tar.extract(file_, to_path) @@ -172,12 +174,12 @@ def extract_archive(from_path: str, to_path: Optional[str] = None, overwrite: bo try: with zipfile.ZipFile(from_path, "r") as zfile: - logging.info("Opened zip file %s.", from_path) + logger.info("Opened zip file %s.", from_path) files = zfile.namelist() for file_ in files: file_path = os.path.join(to_path, file_) if os.path.exists(file_path): - logging.info("%s already extracted.", file_path) + logger.info("%s already extracted.", file_path) if not overwrite: continue zfile.extract(file_, to_path) @@ -201,9 +203,10 @@ def download_kaggle_dataset(dataset_path: str, dataset_name: str, output_path: s import kaggle # pylint: disable=import-outside-toplevel kaggle.api.authenticate() - print(f"""\nDownloading {dataset_name}...""") + logger.info("Downloading %s...", dataset_name) kaggle.api.dataset_download_files(dataset_path, path=data_path, unzip=True) except OSError: - print( - f"""[!] in order to download kaggle datasets, you need to have a kaggle api token stored in your {os.path.join(expanduser('~'), '.kaggle/kaggle.json')}""" + logger.exception( + "In order to download kaggle datasets, you need to have a kaggle api token stored in your %s", + os.path.join(expanduser("~"), ".kaggle/kaggle.json"), ) diff --git a/TTS/utils/downloaders.py b/TTS/utils/downloaders.py index 104dc7b94e..8705873982 100644 --- a/TTS/utils/downloaders.py +++ b/TTS/utils/downloaders.py @@ -1,8 +1,11 @@ +import logging import os from typing import Optional from TTS.utils.download import download_kaggle_dataset, download_url, extract_archive +logger = logging.getLogger(__name__) + def download_ljspeech(path: str): """Download and extract LJSpeech dataset @@ -15,7 +18,6 @@ def download_ljspeech(path: str): download_url(url, path) basename = os.path.basename(url) archive = os.path.join(path, basename) - print(" > Extracting archive file...") extract_archive(archive) @@ -35,7 +37,6 @@ def download_vctk(path: str, use_kaggle: Optional[bool] = False): download_url(url, path) basename = os.path.basename(url) archive = os.path.join(path, basename) - print(" > Extracting archive file...") extract_archive(archive) @@ -71,19 +72,17 @@ def download_libri_tts(path: str, subset: Optional[str] = "all"): os.makedirs(path, exist_ok=True) if subset == "all": for sub, val in subset_dict.items(): - print(f" > Downloading {sub}...") + logger.info("Downloading %s...", sub) download_url(val, path) basename = os.path.basename(val) archive = os.path.join(path, basename) - print(" > Extracting archive file...") extract_archive(archive) - print(" > All subsets downloaded") + logger.info("All subsets downloaded") else: url = subset_dict[subset] download_url(url, path) basename = os.path.basename(url) archive = os.path.join(path, basename) - print(" > Extracting archive file...") extract_archive(archive) @@ -98,7 +97,6 @@ def download_thorsten_de(path: str): download_url(url, path) basename = os.path.basename(url) archive = os.path.join(path, basename) - print(" > Extracting archive file...") extract_archive(archive) @@ -122,5 +120,4 @@ def download_mailabs(path: str, language: str = "english"): download_url(url, path) basename = os.path.basename(url) archive = os.path.join(path, basename) - print(" > Extracting archive file...") extract_archive(archive) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index e0cd3ad85f..a2af8ffbb3 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -9,6 +9,8 @@ from pathlib import Path from typing import Dict +logger = logging.getLogger(__name__) + # TODO: This method is duplicated in Trainer but out of date there def get_git_branch(): @@ -91,7 +93,7 @@ def set_init_dict(model_dict, checkpoint_state, c): # Partial initialization: if there is a mismatch with new and old layer, it is skipped. for k, v in checkpoint_state.items(): if k not in model_dict: - print(" | > Layer missing in the model definition: {}".format(k)) + logger.warning("Layer missing in the model finition %s", k) # 1. filter out unnecessary keys pretrained_dict = {k: v for k, v in checkpoint_state.items() if k in model_dict} # 2. filter out different size layers @@ -102,7 +104,7 @@ def set_init_dict(model_dict, checkpoint_state, c): pretrained_dict = {k: v for k, v in pretrained_dict.items() if reinit_layer_name not in k} # 4. overwrite entries in the existing state dict model_dict.update(pretrained_dict) - print(" | > {} / {} layers are restored.".format(len(pretrained_dict), len(model_dict))) + logger.info("%d / %d layers are restored.", len(pretrained_dict), len(model_dict)) return model_dict diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index ca16183d37..0dfb501269 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -1,4 +1,5 @@ import json +import logging import os import re import tarfile @@ -14,6 +15,8 @@ from TTS.config import load_config, read_json_with_comments from TTS.utils.generic_utils import get_user_data_dir +logger = logging.getLogger(__name__) + LICENSE_URLS = { "cc by-nc-nd 4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/", "mpl": "https://www.mozilla.org/en-US/MPL/2.0/", @@ -69,18 +72,17 @@ def read_models_file(self, file_path): def _list_models(self, model_type, model_count=0): if self.verbose: - print("\n Name format: type/language/dataset/model") + logger.info("") + logger.info("Name format: type/language/dataset/model") model_list = [] for lang in self.models_dict[model_type]: for dataset in self.models_dict[model_type][lang]: for model in self.models_dict[model_type][lang][dataset]: model_full_name = f"{model_type}--{lang}--{dataset}--{model}" - output_path = os.path.join(self.output_prefix, model_full_name) if self.verbose: - if os.path.exists(output_path): - print(f" {model_count}: {model_type}/{lang}/{dataset}/{model} [already downloaded]") - else: - print(f" {model_count}: {model_type}/{lang}/{dataset}/{model}") + output_path = Path(self.output_prefix) / model_full_name + downloaded = " [already downloaded]" if output_path.is_dir() else "" + logger.info(" %2d: %s/%s/%s/%s%s", model_count, model_type, lang, dataset, model, downloaded) model_list.append(f"{model_type}/{lang}/{dataset}/{model}") model_count += 1 return model_list @@ -197,18 +199,18 @@ def list_vc_models(self): def list_langs(self): """Print all the available languages""" - print(" Name format: type/language") + logger.info("Name format: type/language") for model_type in self.models_dict: for lang in self.models_dict[model_type]: - print(f" >: {model_type}/{lang} ") + logger.info(" %s/%s", model_type, lang) def list_datasets(self): """Print all the datasets""" - print(" Name format: type/language/dataset") + logger.info("Name format: type/language/dataset") for model_type in self.models_dict: for lang in self.models_dict[model_type]: for dataset in self.models_dict[model_type][lang]: - print(f" >: {model_type}/{lang}/{dataset}") + logger.info(" %s/%s/%s", model_type, lang, dataset) @staticmethod def print_model_license(model_item: Dict): @@ -218,13 +220,13 @@ def print_model_license(model_item: Dict): model_item (dict): model item in the models.json """ if "license" in model_item and model_item["license"].strip() != "": - print(f" > Model's license - {model_item['license']}") + logger.info("Model's license - %s", model_item["license"]) if model_item["license"].lower() in LICENSE_URLS: - print(f" > Check {LICENSE_URLS[model_item['license'].lower()]} for more info.") + logger.info("Check %s for more info.", LICENSE_URLS[model_item["license"].lower()]) else: - print(" > Check https://opensource.org/licenses for more info.") + logger.info("Check https://opensource.org/licenses for more info.") else: - print(" > Model's license - No license information available") + logger.info("Model's license - No license information available") def _download_github_model(self, model_item: Dict, output_path: str): if isinstance(model_item["github_rls_url"], list): @@ -336,7 +338,7 @@ def create_dir_and_download_model(self, model_name, model_item, output_path): if not self.ask_tos(output_path): os.rmdir(output_path) raise Exception(" [!] You must agree to the terms of service to use this model.") - print(f" > Downloading model to {output_path}") + logger.info("Downloading model to %s", output_path) try: if "fairseq" in model_name: self.download_fairseq_model(model_name, output_path) @@ -346,7 +348,7 @@ def create_dir_and_download_model(self, model_name, model_item, output_path): self._download_hf_model(model_item, output_path) except requests.RequestException as e: - print(f" > Failed to download the model file to {output_path}") + logger.exception("Failed to download the model file to %s", output_path) rmtree(output_path) raise e self.print_model_license(model_item=model_item) @@ -364,7 +366,7 @@ def check_if_configs_are_equal(self, model_name, model_item, output_path): config_remote = json.load(f) if not config_local == config_remote: - print(f" > {model_name} is already downloaded however it has been changed. Redownloading it...") + logger.info("%s is already downloaded however it has been changed. Redownloading it...", model_name) self.create_dir_and_download_model(model_name, model_item, output_path) def download_model(self, model_name): @@ -390,12 +392,12 @@ def download_model(self, model_name): if os.path.isfile(md5sum_file): with open(md5sum_file, mode="r") as f: if not f.read() == md5sum: - print(f" > {model_name} has been updated, clearing model cache...") + logger.info("%s has been updated, clearing model cache...", model_name) self.create_dir_and_download_model(model_name, model_item, output_path) else: - print(f" > {model_name} is already downloaded.") + logger.info("%s is already downloaded.", model_name) else: - print(f" > {model_name} has been updated, clearing model cache...") + logger.info("%s has been updated, clearing model cache...", model_name) self.create_dir_and_download_model(model_name, model_item, output_path) # if the configs are different, redownload it # ToDo: we need a better way to handle it @@ -405,7 +407,7 @@ def download_model(self, model_name): except: pass else: - print(f" > {model_name} is already downloaded.") + logger.info("%s is already downloaded.", model_name) else: self.create_dir_and_download_model(model_name, model_item, output_path) @@ -544,7 +546,7 @@ def _download_zip_file(file_url, output_folder, progress_bar): z.extractall(output_folder) os.remove(temp_zip_name) # delete zip after extract except zipfile.BadZipFile: - print(f" > Error: Bad zip file - {file_url}") + logger.exception("Bad zip file - %s", file_url) raise zipfile.BadZipFile # pylint: disable=raise-missing-from # move the files to the outer path for file_path in z.namelist(): @@ -580,7 +582,7 @@ def _download_tar_file(file_url, output_folder, progress_bar): tar_names = t.getnames() os.remove(temp_tar_name) # delete tar after extract except tarfile.ReadError: - print(f" > Error: Bad tar file - {file_url}") + logger.exception("Bad tar file - %s", file_url) raise tarfile.ReadError # pylint: disable=raise-missing-from # move the files to the outer path for file_path in os.listdir(os.path.join(output_folder, tar_names[0])): diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 6165fb5e8a..2bb1e39cb2 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -1,3 +1,4 @@ +import logging import os import time from typing import List @@ -21,6 +22,8 @@ from TTS.vocoder.models import setup_model as setup_vocoder_model from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input +logger = logging.getLogger(__name__) + class Synthesizer(nn.Module): def __init__( @@ -294,9 +297,9 @@ def tts( if text: sens = [text] if split_sentences: - print(" > Text splitted to sentences.") sens = self.split_into_sentences(text) - print(sens) + logger.info("Text split into sentences.") + logger.info("Input: %s", sens) # handle multi-speaker if "voice_dir" in kwargs: @@ -420,7 +423,7 @@ def tts( self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate, ] if scale_factor[1] != 1: - print(" > interpolating tts model output.") + logger.info("Interpolating TTS model output.") vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) else: vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable @@ -484,7 +487,7 @@ def tts( self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate, ] if scale_factor[1] != 1: - print(" > interpolating tts model output.") + logger.info("Interpolating TTS model output.") vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) else: vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable @@ -500,6 +503,6 @@ def tts( # compute stats process_time = time.time() - start_time audio_time = len(wavs) / self.tts_config.audio["sample_rate"] - print(f" > Processing time: {process_time}") - print(f" > Real-time factor: {process_time / audio_time}") + logger.info("Processing time: %.3f", process_time) + logger.info("Real-time factor: %.3f", process_time / audio_time) return wavs diff --git a/TTS/utils/training.py b/TTS/utils/training.py index b51f55e92b..57885005f1 100644 --- a/TTS/utils/training.py +++ b/TTS/utils/training.py @@ -1,6 +1,10 @@ +import logging + import numpy as np import torch +logger = logging.getLogger(__name__) + def check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=None): r"""Check model gradient against unexpected jumps and failures""" @@ -21,11 +25,11 @@ def check_update(model, grad_clip, ignore_stopnet=False, amp_opt_params=None): # compatibility with different torch versions if isinstance(grad_norm, float): if np.isinf(grad_norm): - print(" | > Gradient is INF !!") + logger.warning("Gradient is INF !!") skip_flag = True else: if torch.isinf(grad_norm): - print(" | > Gradient is INF !!") + logger.warning("Gradient is INF !!") skip_flag = True return grad_norm, skip_flag diff --git a/TTS/utils/vad.py b/TTS/utils/vad.py index aefce2b50b..49c8dc6b66 100644 --- a/TTS/utils/vad.py +++ b/TTS/utils/vad.py @@ -1,6 +1,10 @@ +import logging + import torch import torchaudio +logger = logging.getLogger(__name__) + def read_audio(path): wav, sr = torchaudio.load(path) @@ -54,8 +58,8 @@ def remove_silence( # read ground truth wav and resample the audio for the VAD try: wav, gt_sample_rate = read_audio(audio_path) - except: - print(f"> ❗ Failed to read {audio_path}") + except Exception: + logger.exception("Failed to read %s", audio_path) return None, False # if needed, resample the audio for the VAD model @@ -80,7 +84,7 @@ def remove_silence( wav = collect_chunks(new_speech_timestamps, wav) is_speech = True else: - print(f"> The file {audio_path} probably does not have speech please check it !!") + logger.warning("The file %s probably does not have speech please check it!", audio_path) is_speech = False # save diff --git a/TTS/vc/models/__init__.py b/TTS/vc/models/__init__.py index 5a09b4e53e..a498b292b7 100644 --- a/TTS/vc/models/__init__.py +++ b/TTS/vc/models/__init__.py @@ -1,7 +1,10 @@ import importlib +import logging import re from typing import Dict, List, Union +logger = logging.getLogger(__name__) + def to_camel(text): text = text.capitalize() @@ -9,7 +12,7 @@ def to_camel(text): def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseVC": - print(" > Using model: {}".format(config.model)) + logger.info("Using model: %s", config.model) # fetch the right model implementation. if "model" in config and config["model"].lower() == "freevc": MyModel = importlib.import_module("TTS.vc.models.freevc").FreeVC diff --git a/TTS/vc/models/base_vc.py b/TTS/vc/models/base_vc.py index 78f1556b71..d68d8364cd 100644 --- a/TTS/vc/models/base_vc.py +++ b/TTS/vc/models/base_vc.py @@ -1,3 +1,4 @@ +import logging import os import random from typing import Dict, List, Tuple, Union @@ -20,6 +21,8 @@ # pylint: skip-file +logger = logging.getLogger(__name__) + class BaseVC(BaseTrainerModel): """Base `vc` class. Every new `vc` model must inherit this. @@ -93,7 +96,7 @@ def init_multispeaker(self, config: Coqpit, data: List = None): ) # init speaker embedding layer if config.use_speaker_embedding and not config.use_d_vector_file: - print(" > Init speaker_embedding layer.") + logger.info("Init speaker_embedding layer.") self.speaker_embedding = nn.Embedding(self.num_speakers, self.embedded_speaker_dim) self.speaker_embedding.weight.data.normal_(0, 0.3) @@ -233,12 +236,12 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): if getattr(config, "use_language_weighted_sampler", False): alpha = getattr(config, "language_weighted_sampler_alpha", 1.0) - print(" > Using Language weighted sampler with alpha:", alpha) + logger.info("Using Language weighted sampler with alpha: %.2f", alpha) weights = get_language_balancer_weights(data_items) * alpha if getattr(config, "use_speaker_weighted_sampler", False): alpha = getattr(config, "speaker_weighted_sampler_alpha", 1.0) - print(" > Using Speaker weighted sampler with alpha:", alpha) + logger.info("Using Speaker weighted sampler with alpha: %.2f", alpha) if weights is not None: weights += get_speaker_balancer_weights(data_items) * alpha else: @@ -246,7 +249,7 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): if getattr(config, "use_length_weighted_sampler", False): alpha = getattr(config, "length_weighted_sampler_alpha", 1.0) - print(" > Using Length weighted sampler with alpha:", alpha) + logger.info("Using Length weighted sampler with alpha: %.2f", alpha) if weights is not None: weights += get_length_balancer_weights(data_items) * alpha else: @@ -378,7 +381,7 @@ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: Returns: Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. """ - print(" | > Synthesizing test sentences.") + logger.info("Synthesizing test sentences.") test_audios = {} test_figures = {} test_sentences = self.config.test_sentences @@ -417,8 +420,8 @@ def on_init_start(self, trainer): if hasattr(trainer.config, "model_args"): trainer.config.model_args.speakers_file = output_path trainer.config.save_json(os.path.join(trainer.output_path, "config.json")) - print(f" > `speakers.pth` is saved to {output_path}.") - print(" > `speakers_file` is updated in the config.json.") + logger.info("`speakers.pth` is saved to %s", output_path) + logger.info("`speakers_file` is updated in the config.json.") if self.language_manager is not None: output_path = os.path.join(trainer.output_path, "language_ids.json") @@ -427,5 +430,5 @@ def on_init_start(self, trainer): if hasattr(trainer.config, "model_args"): trainer.config.model_args.language_ids_file = output_path trainer.config.save_json(os.path.join(trainer.output_path, "config.json")) - print(f" > `language_ids.json` is saved to {output_path}.") - print(" > `language_ids_file` is updated in the config.json.") + logger.info("`language_ids.json` is saved to %s", output_path) + logger.info("`language_ids_file` is updated in the config.json.") diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 8f2a35d204..f410313729 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -1,3 +1,4 @@ +import logging from typing import Dict, List, Optional, Tuple, Union import librosa @@ -22,6 +23,8 @@ from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx from TTS.vc.modules.freevc.wavlm import get_wavlm +logger = logging.getLogger(__name__) + class ResidualCouplingBlock(nn.Module): def __init__(self, channels, hidden_channels, kernel_size, dilation_rate, n_layers, n_flows=4, gin_channels=0): @@ -152,7 +155,7 @@ def forward(self, x, g=None): return x def remove_weight_norm(self): - print("Removing weight norm...") + logger.info("Removing weight norm...") for l in self.ups: remove_parametrizations(l, "weight") for l in self.resblocks: @@ -377,7 +380,7 @@ def device(self): def load_pretrained_speaker_encoder(self): """Load pretrained speaker encoder model as mentioned in the paper.""" - print(" > Loading pretrained speaker encoder model ...") + logger.info("Loading pretrained speaker encoder model ...") self.enc_spk_ex = SpeakerEncoderEx( "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/speaker_encoder.pt" ) diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/modules/freevc/mel_processing.py index 1955e758ac..a3e251891a 100644 --- a/TTS/vc/modules/freevc/mel_processing.py +++ b/TTS/vc/modules/freevc/mel_processing.py @@ -1,7 +1,11 @@ +import logging + import torch import torch.utils.data from librosa.filters import mel as librosa_mel_fn +logger = logging.getLogger(__name__) + MAX_WAV_VALUE = 32768.0 @@ -39,9 +43,9 @@ def spectral_de_normalize_torch(magnitudes): def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) + logger.info("Min value is: %.3f", torch.min(y)) if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) + logger.info("Max value is: %.3f", torch.max(y)) global hann_window dtype_device = str(y.dtype) + "_" + str(y.device) @@ -87,9 +91,9 @@ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) + logger.info("Min value is: %.3f", torch.min(y)) if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) + logger.info("Max value is: %.3f", torch.max(y)) global mel_basis, hann_window dtype_device = str(y.dtype) + "_" + str(y.device) diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py index 7f811ac3ab..2636400b90 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py @@ -1,3 +1,4 @@ +import logging from time import perf_counter as timer from typing import List, Union @@ -17,9 +18,11 @@ sampling_rate, ) +logger = logging.getLogger(__name__) + class SpeakerEncoder(nn.Module): - def __init__(self, weights_fpath, device: Union[str, torch.device] = None, verbose=True): + def __init__(self, weights_fpath, device: Union[str, torch.device] = None): """ :param device: either a torch device or the name of a torch device (e.g. "cpu", "cuda"). If None, defaults to cuda if it is available on your machine, otherwise the model will @@ -50,9 +53,7 @@ def __init__(self, weights_fpath, device: Union[str, torch.device] = None, verbo self.load_state_dict(checkpoint["model_state"], strict=False) self.to(device) - - if verbose: - print("Loaded the voice encoder model on %s in %.2f seconds." % (device.type, timer() - start)) + logger.info("Loaded the voice encoder model on %s in %.2f seconds.", device.type, timer() - start) def forward(self, mels: torch.FloatTensor): """ diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/modules/freevc/wavlm/__init__.py index 6edada407b..0033d22c48 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/modules/freevc/wavlm/__init__.py @@ -1,3 +1,4 @@ +import logging import os import urllib.request @@ -6,6 +7,8 @@ from TTS.utils.generic_utils import get_user_data_dir from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig +logger = logging.getLogger(__name__) + model_uri = "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/WavLM-Large.pt" @@ -20,7 +23,7 @@ def get_wavlm(device="cpu"): output_path = os.path.join(output_path, "WavLM-Large.pt") if not os.path.exists(output_path): - print(f" > Downloading WavLM model to {output_path} ...") + logger.info("Downloading WavLM model to %s ...", output_path) urllib.request.urlretrieve(model_uri, output_path) checkpoint = torch.load(output_path, map_location=torch.device(device)) diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py index 50c38c4deb..b5e30fada9 100644 --- a/TTS/vocoder/datasets/gan_dataset.py +++ b/TTS/vocoder/datasets/gan_dataset.py @@ -109,7 +109,6 @@ def load_item(self, idx): if self.compute_feat: # compute features from wav wavpath = self.item_list[idx] - # print(wavpath) if self.use_cache and self.cache[idx] is not None: audio, mel = self.cache[idx] diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py index a67c5b31a0..533feaa530 100644 --- a/TTS/vocoder/datasets/wavernn_dataset.py +++ b/TTS/vocoder/datasets/wavernn_dataset.py @@ -1,9 +1,13 @@ +import logging + import numpy as np import torch from torch.utils.data import Dataset from TTS.utils.audio.numpy_transforms import mulaw_encode, quantize +logger = logging.getLogger(__name__) + class WaveRNNDataset(Dataset): """ @@ -60,7 +64,7 @@ def load_item(self, index): else: min_audio_len = audio.shape[0] + (2 * self.pad * self.hop_len) if audio.shape[0] < min_audio_len: - print(" [!] Instance is too short! : {}".format(wavpath)) + logger.warning("Instance is too short: %s", wavpath) audio = np.pad(audio, [0, min_audio_len - audio.shape[0] + self.hop_len]) mel = self.ap.melspectrogram(audio) @@ -80,7 +84,7 @@ def load_item(self, index): mel = np.load(feat_path.replace("/quant/", "/mel/")) if mel.shape[-1] < self.mel_len + 2 * self.pad: - print(" [!] Instance is too short! : {}".format(wavpath)) + logger.warning("Instance is too short: %s", wavpath) self.item_list[index] = self.item_list[index + 1] feat_path = self.item_list[index] mel = np.load(feat_path.replace("/quant/", "/mel/")) diff --git a/TTS/vocoder/models/__init__.py b/TTS/vocoder/models/__init__.py index 65901617b6..7a1716f16d 100644 --- a/TTS/vocoder/models/__init__.py +++ b/TTS/vocoder/models/__init__.py @@ -1,8 +1,11 @@ import importlib +import logging import re from coqpit import Coqpit +logger = logging.getLogger(__name__) + def to_camel(text): text = text.capitalize() @@ -27,13 +30,13 @@ def setup_model(config: Coqpit): MyModel = getattr(MyModel, to_camel(config.model)) except ModuleNotFoundError as e: raise ValueError(f"Model {config.model} not exist!") from e - print(" > Vocoder Model: {}".format(config.model)) + logger.info("Vocoder model: %s", config.model) return MyModel.init_from_config(config) def setup_generator(c): """TODO: use config object as arguments""" - print(" > Generator Model: {}".format(c.generator_model)) + logger.info("Generator model: %s", c.generator_model) MyModel = importlib.import_module("TTS.vocoder.models." + c.generator_model.lower()) MyModel = getattr(MyModel, to_camel(c.generator_model)) # this is to preserve the Wavernn class name (instead of Wavernn) @@ -96,7 +99,7 @@ def setup_generator(c): def setup_discriminator(c): """TODO: use config objekt as arguments""" - print(" > Discriminator Model: {}".format(c.discriminator_model)) + logger.info("Discriminator model: %s", c.discriminator_model) if "parallel_wavegan" in c.discriminator_model: MyModel = importlib.import_module("TTS.vocoder.models.parallel_wavegan_discriminator") else: diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py index 9247532259..b9561f6ff6 100644 --- a/TTS/vocoder/models/hifigan_generator.py +++ b/TTS/vocoder/models/hifigan_generator.py @@ -1,4 +1,6 @@ # adopted from https://github.com/jik876/hifi-gan/blob/master/models.py +import logging + import torch from torch import nn from torch.nn import Conv1d, ConvTranspose1d @@ -8,6 +10,8 @@ from TTS.utils.io import load_fsspec +logger = logging.getLogger(__name__) + LRELU_SLOPE = 0.1 @@ -282,7 +286,7 @@ def inference(self, c): return self.forward(c) def remove_weight_norm(self): - print("Removing weight norm...") + logger.info("Removing weight norm...") for l in self.ups: remove_parametrizations(l, "weight") for l in self.resblocks: diff --git a/TTS/vocoder/models/parallel_wavegan_discriminator.py b/TTS/vocoder/models/parallel_wavegan_discriminator.py index d02af75f05..211d45d91c 100644 --- a/TTS/vocoder/models/parallel_wavegan_discriminator.py +++ b/TTS/vocoder/models/parallel_wavegan_discriminator.py @@ -1,3 +1,4 @@ +import logging import math import torch @@ -6,6 +7,8 @@ from TTS.vocoder.layers.parallel_wavegan import ResidualBlock +logger = logging.getLogger(__name__) + class ParallelWaveganDiscriminator(nn.Module): """PWGAN discriminator as in https://arxiv.org/abs/1910.11480. @@ -76,7 +79,7 @@ def _apply_weight_norm(m): def remove_weight_norm(self): def _remove_weight_norm(m): try: - # print(f"Weight norm is removed from {m}.") + logger.info("Weight norm is removed from %s", m) remove_parametrizations(m, "weight") except ValueError: # this module didn't have weight norm return @@ -179,7 +182,7 @@ def _apply_weight_norm(m): def remove_weight_norm(self): def _remove_weight_norm(m): try: - print(f"Weight norm is removed from {m}.") + logger.info("Weight norm is removed from %s", m) remove_parametrizations(m, "weight") except ValueError: # this module didn't have weight norm return diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index 8338d94653..96684d2a0a 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -1,3 +1,4 @@ +import logging import math import numpy as np @@ -8,6 +9,8 @@ from TTS.vocoder.layers.parallel_wavegan import ResidualBlock from TTS.vocoder.layers.upsample import ConvUpsample +logger = logging.getLogger(__name__) + class ParallelWaveganGenerator(torch.nn.Module): """PWGAN generator as in https://arxiv.org/pdf/1910.11480.pdf. @@ -126,7 +129,7 @@ def inference(self, c): def remove_weight_norm(self): def _remove_weight_norm(m): try: - # print(f"Weight norm is removed from {m}.") + logger.info("Weight norm is removed from %s", m) remove_parametrizations(m, "weight") except ValueError: # this module didn't have weight norm return @@ -137,7 +140,7 @@ def apply_weight_norm(self): def _apply_weight_norm(m): if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): torch.nn.utils.parametrizations.weight_norm(m) - # print(f"Weight norm is applied to {m}.") + logger.info("Weight norm is applied to %s", m) self.apply(_apply_weight_norm) diff --git a/TTS/vocoder/models/univnet_generator.py b/TTS/vocoder/models/univnet_generator.py index 5e66b70df8..72e57a9c39 100644 --- a/TTS/vocoder/models/univnet_generator.py +++ b/TTS/vocoder/models/univnet_generator.py @@ -1,3 +1,4 @@ +import logging from typing import List import numpy as np @@ -7,6 +8,8 @@ from TTS.vocoder.layers.lvc_block import LVCBlock +logger = logging.getLogger(__name__) + LRELU_SLOPE = 0.1 @@ -113,7 +116,7 @@ def remove_weight_norm(self): def _remove_weight_norm(m): try: - # print(f"Weight norm is removed from {m}.") + logger.info("Weight norm is removed from %s", m) parametrize.remove_parametrizations(m, "weight") except ValueError: # this module didn't have weight norm return @@ -126,7 +129,7 @@ def apply_weight_norm(self): def _apply_weight_norm(m): if isinstance(m, (torch.nn.Conv1d, torch.nn.Conv2d)): torch.nn.utils.parametrizations.weight_norm(m) - # print(f"Weight norm is applied to {m}.") + logger.info("Weight norm is applied to %s", m) self.apply(_apply_weight_norm) diff --git a/TTS/vocoder/utils/generic_utils.py b/TTS/vocoder/utils/generic_utils.py index 113240fd75..ac797d97f7 100644 --- a/TTS/vocoder/utils/generic_utils.py +++ b/TTS/vocoder/utils/generic_utils.py @@ -1,3 +1,4 @@ +import logging from typing import Dict import numpy as np @@ -7,6 +8,8 @@ from TTS.tts.utils.visual import plot_spectrogram from TTS.utils.audio import AudioProcessor +logger = logging.getLogger(__name__) + def interpolate_vocoder_input(scale_factor, spec): """Interpolate spectrogram by the scale factor. @@ -20,12 +23,12 @@ def interpolate_vocoder_input(scale_factor, spec): Returns: torch.tensor: interpolated spectrogram. """ - print(" > before interpolation :", spec.shape) + logger.info("Before interpolation: %s", spec.shape) spec = torch.tensor(spec).unsqueeze(0).unsqueeze(0) # pylint: disable=not-callable spec = torch.nn.functional.interpolate( spec, scale_factor=scale_factor, recompute_scale_factor=True, mode="bilinear", align_corners=False ).squeeze(0) - print(" > after interpolation :", spec.shape) + logger.info("After interpolation: %s", spec.shape) return spec From b711e19cb6783251cb5f771e75e9f6d1385513f6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 18 Nov 2023 14:26:44 +0100 Subject: [PATCH 058/255] refactor: remove verbose arguments Can be handled by adjusting logging levels instead. --- TTS/api.py | 4 ++-- TTS/bin/extract_tts_spectrograms.py | 5 ++--- TTS/bin/train_encoder.py | 9 ++++---- TTS/bin/tune_wavegrad.py | 1 - TTS/encoder/dataset.py | 16 ++++++--------- TTS/server/server.py | 1 + TTS/tts/datasets/dataset.py | 11 +--------- TTS/tts/models/base_tts.py | 1 - TTS/tts/models/delightful_tts.py | 5 +---- TTS/tts/models/glow_tts.py | 7 +++---- TTS/tts/models/neuralhmm_tts.py | 5 ++--- TTS/tts/models/overflow.py | 5 ++--- TTS/tts/models/vits.py | 5 ++--- TTS/utils/audio/processor.py | 18 ++++++---------- TTS/utils/manage.py | 16 ++++++--------- TTS/utils/synthesizer.py | 2 +- TTS/vc/models/base_vc.py | 1 - TTS/vc/models/freevc.py | 2 +- TTS/vocoder/datasets/__init__.py | 5 +---- TTS/vocoder/datasets/gan_dataset.py | 2 -- TTS/vocoder/datasets/wavegrad_dataset.py | 2 -- TTS/vocoder/datasets/wavernn_dataset.py | 5 +---- TTS/vocoder/models/gan.py | 5 ++--- TTS/vocoder/models/wavegrad.py | 1 - TTS/vocoder/models/wavernn.py | 1 - tests/tts_tests/test_vits.py | 22 ++++++++++---------- tests/tts_tests2/test_glow_tts.py | 26 ++++++++++++------------ 27 files changed, 69 insertions(+), 114 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 6d618d29db..250ed1a0d9 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -62,7 +62,7 @@ def __init__( gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False. """ super().__init__() - self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False) + self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar) self.config = load_config(config_path) if config_path else None self.synthesizer = None self.voice_converter = None @@ -125,7 +125,7 @@ def get_models_file_path(): @staticmethod def list_models(): - return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False).list_models() + return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models() def download_model_by_name(self, model_name: str): model_path, config_path, model_item = self.manager.download_model(model_name) diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py index 16ad36b8dc..cfb35916c1 100755 --- a/TTS/bin/extract_tts_spectrograms.py +++ b/TTS/bin/extract_tts_spectrograms.py @@ -21,7 +21,7 @@ use_cuda = torch.cuda.is_available() -def setup_loader(ap, r, verbose=False): +def setup_loader(ap, r): tokenizer, _ = TTSTokenizer.init_from_config(c) dataset = TTSDataset( outputs_per_step=r, @@ -37,7 +37,6 @@ def setup_loader(ap, r, verbose=False): phoneme_cache_path=c.phoneme_cache_path, precompute_num_workers=0, use_noise_augment=False, - verbose=verbose, speaker_id_mapping=speaker_manager.name_to_id if c.use_speaker_embedding else None, d_vector_mapping=speaker_manager.embeddings if c.use_d_vector_file else None, ) @@ -257,7 +256,7 @@ def main(args): # pylint: disable=redefined-outer-name print("\n > Model has {} parameters".format(num_params), flush=True) # set r r = 1 if c.model.lower() == "glow_tts" else model.decoder.r - own_loader = setup_loader(ap, r, verbose=True) + own_loader = setup_loader(ap, r) extract_spectrograms( own_loader, diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index 6a8cd7b444..e1f157493e 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +import logging import os import sys import time @@ -31,7 +32,7 @@ print(" > Number of GPUs: ", num_gpus) -def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False): +def setup_loader(ap: AudioProcessor, is_val: bool = False): num_utter_per_class = c.num_utter_per_class if not is_val else c.eval_num_utter_per_class num_classes_in_batch = c.num_classes_in_batch if not is_val else c.eval_num_classes_in_batch @@ -42,7 +43,6 @@ def setup_loader(ap: AudioProcessor, is_val: bool = False, verbose: bool = False voice_len=c.voice_len, num_utter_per_class=num_utter_per_class, num_classes_in_batch=num_classes_in_batch, - verbose=verbose, augmentation_config=c.audio_augmentation if not is_val else None, use_torch_spec=c.model_params.get("use_torch_spec", False), ) @@ -278,9 +278,10 @@ def main(args): # pylint: disable=redefined-outer-name # pylint: disable=redefined-outer-name meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True) - train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False, verbose=True) + logging.getLogger("TTS.encoder.dataset").setLevel(logging.INFO) + train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False) if c.run_eval: - eval_data_loader, _, _ = setup_loader(ap, is_val=True, verbose=True) + eval_data_loader, _, _ = setup_loader(ap, is_val=True) else: eval_data_loader = None diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py index a4b10009d7..d5bdcfc9e8 100644 --- a/TTS/bin/tune_wavegrad.py +++ b/TTS/bin/tune_wavegrad.py @@ -55,7 +55,6 @@ return_segments=False, use_noise_augment=False, use_cache=False, - verbose=True, ) loader = DataLoader( dataset, diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py index 7e4286c5d9..81385c6c1f 100644 --- a/TTS/encoder/dataset.py +++ b/TTS/encoder/dataset.py @@ -18,7 +18,6 @@ def __init__( voice_len=1.6, num_classes_in_batch=64, num_utter_per_class=10, - verbose=False, augmentation_config=None, use_torch_spec=None, ): @@ -27,7 +26,6 @@ def __init__( ap (TTS.tts.utils.AudioProcessor): audio processor object. meta_data (list): list of dataset instances. seq_len (int): voice segment length in seconds. - verbose (bool): print diagnostic information. """ super().__init__() self.config = config @@ -36,7 +34,6 @@ def __init__( self.seq_len = int(voice_len * self.sample_rate) self.num_utter_per_class = num_utter_per_class self.ap = ap - self.verbose = verbose self.use_torch_spec = use_torch_spec self.classes, self.items = self.__parse_items() @@ -53,13 +50,12 @@ def __init__( if "gaussian" in augmentation_config.keys(): self.gaussian_augmentation_config = augmentation_config["gaussian"] - if self.verbose: - logger.info("DataLoader initialization") - logger.info(" | Classes per batch: %d", num_classes_in_batch) - logger.info(" | Number of instances: %d", len(self.items)) - logger.info(" | Sequence length: %d", self.seq_len) - logger.info(" | Number of classes: %d", len(self.classes)) - logger.info(" | Classes: %d", self.classes) + logger.info("DataLoader initialization") + logger.info(" | Classes per batch: %d", num_classes_in_batch) + logger.info(" | Number of instances: %d", len(self.items)) + logger.info(" | Sequence length: %d", self.seq_len) + logger.info(" | Number of classes: %d", len(self.classes)) + logger.info(" | Classes: %d", self.classes) def load_wav(self, filename): audio = self.ap.load_wav(filename, sr=self.ap.sample_rate) diff --git a/TTS/server/server.py b/TTS/server/server.py index ddf630a64d..a8f3a08817 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -20,6 +20,7 @@ from TTS.utils.synthesizer import Synthesizer logger = logging.getLogger(__name__) +logging.getLogger("TTS").setLevel(logging.INFO) def create_argparser(): diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index dd879565dc..3886a8f8c9 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -82,7 +82,6 @@ def __init__( language_id_mapping: Dict = None, use_noise_augment: bool = False, start_by_longest: bool = False, - verbose: bool = False, ): """Generic 📂 data loader for `tts` models. It is configurable for different outputs and needs. @@ -140,8 +139,6 @@ def __init__( use_noise_augment (bool): Enable adding random noise to wav for augmentation. Defaults to False. start_by_longest (bool): Start by longest sequence. It is especially useful to check OOM. Defaults to False. - - verbose (bool): Print diagnostic information. Defaults to false. """ super().__init__() self.batch_group_size = batch_group_size @@ -165,7 +162,6 @@ def __init__( self.use_noise_augment = use_noise_augment self.start_by_longest = start_by_longest - self.verbose = verbose self.rescue_item_idx = 1 self.pitch_computed = False self.tokenizer = tokenizer @@ -183,8 +179,7 @@ def __init__( self.energy_dataset = EnergyDataset( self.samples, self.ap, cache_path=energy_cache_path, precompute_num_workers=precompute_num_workers ) - if self.verbose: - self.print_logs() + self.print_logs() @property def lengths(self): @@ -700,14 +695,12 @@ def __init__( samples: Union[List[List], List[Dict]], ap: "AudioProcessor", audio_config=None, # pylint: disable=unused-argument - verbose=False, cache_path: str = None, precompute_num_workers=0, normalize_f0=True, ): self.samples = samples self.ap = ap - self.verbose = verbose self.cache_path = cache_path self.normalize_f0 = normalize_f0 self.pad_id = 0.0 @@ -850,14 +843,12 @@ def __init__( self, samples: Union[List[List], List[Dict]], ap: "AudioProcessor", - verbose=False, cache_path: str = None, precompute_num_workers=0, normalize_energy=True, ): self.samples = samples self.ap = ap - self.verbose = verbose self.cache_path = cache_path self.normalize_energy = normalize_energy self.pad_id = 0.0 diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index dd0082315a..7fbc2a3a78 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -333,7 +333,6 @@ def get_data_loader( phoneme_cache_path=config.phoneme_cache_path, precompute_num_workers=config.precompute_num_workers, use_noise_augment=False if is_eval else config.use_noise_augment, - verbose=verbose, speaker_id_mapping=speaker_id_mapping, d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None, tokenizer=self.tokenizer, diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 91ef9a691f..ed318923e9 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -331,7 +331,6 @@ def __init__( self, ap, samples: Union[List[List], List[Dict]], - verbose=False, cache_path: str = None, precompute_num_workers=0, normalize_f0=True, @@ -339,7 +338,6 @@ def __init__( super().__init__( samples=samples, ap=ap, - verbose=verbose, cache_path=cache_path, precompute_num_workers=precompute_num_workers, normalize_f0=normalize_f0, @@ -1455,7 +1453,6 @@ def get_data_loader( compute_f0=config.compute_f0, f0_cache_path=config.f0_cache_path, attn_prior_cache_path=config.attn_prior_cache_path if config.use_attn_priors else None, - verbose=verbose, tokenizer=self.tokenizer, start_by_longest=config.start_by_longest, ) @@ -1532,7 +1529,7 @@ def on_epoch_end(self, trainer): # pylint: disable=unused-argument @staticmethod def init_from_config( - config: "DelightfulTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=False + config: "DelightfulTTSConfig", samples: Union[List[List], List[Dict]] = None ): # pylint: disable=unused-argument """Initiate model from config diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index 90bc9f2ece..a4ae012166 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -56,7 +56,7 @@ class GlowTTS(BaseTTS): >>> from TTS.tts.configs.glow_tts_config import GlowTTSConfig >>> from TTS.tts.models.glow_tts import GlowTTS >>> config = GlowTTSConfig() - >>> model = GlowTTS.init_from_config(config, verbose=False) + >>> model = GlowTTS.init_from_config(config) """ def __init__( @@ -543,18 +543,17 @@ def on_train_step_start(self, trainer): self.run_data_dep_init = trainer.total_steps_done < self.data_dep_init_steps @staticmethod - def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True): + def init_from_config(config: "GlowTTSConfig", samples: Union[List[List], List[Dict]] = None): """Initiate model from config Args: config (VitsConfig): Model config. samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training. Defaults to None. - verbose (bool): If True, print init messages. Defaults to True. """ from TTS.utils.audio import AudioProcessor - ap = AudioProcessor.init_from_config(config, verbose) + ap = AudioProcessor.init_from_config(config) tokenizer, new_config = TTSTokenizer.init_from_config(config) speaker_manager = SpeakerManager.init_from_config(config, samples) return GlowTTS(new_config, ap, tokenizer, speaker_manager) diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index 6158d30382..d5bd9d1311 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -238,18 +238,17 @@ def get_criterion(): return NLLLoss() @staticmethod - def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List[List], List[Dict]] = None, verbose=True): + def init_from_config(config: "NeuralhmmTTSConfig", samples: Union[List[List], List[Dict]] = None): """Initiate model from config Args: config (VitsConfig): Model config. samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training. Defaults to None. - verbose (bool): If True, print init messages. Defaults to True. """ from TTS.utils.audio import AudioProcessor - ap = AudioProcessor.init_from_config(config, verbose) + ap = AudioProcessor.init_from_config(config) tokenizer, new_config = TTSTokenizer.init_from_config(config) speaker_manager = SpeakerManager.init_from_config(config, samples) return NeuralhmmTTS(new_config, ap, tokenizer, speaker_manager) diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index cc0c5cd3f0..0218d0452b 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -253,18 +253,17 @@ def get_criterion(): return NLLLoss() @staticmethod - def init_from_config(config: "OverFlowConfig", samples: Union[List[List], List[Dict]] = None, verbose=True): + def init_from_config(config: "OverFlowConfig", samples: Union[List[List], List[Dict]] = None): """Initiate model from config Args: config (VitsConfig): Model config. samples (Union[List[List], List[Dict]]): Training samples to parse speaker ids for training. Defaults to None. - verbose (bool): If True, print init messages. Defaults to True. """ from TTS.utils.audio import AudioProcessor - ap = AudioProcessor.init_from_config(config, verbose) + ap = AudioProcessor.init_from_config(config) tokenizer, new_config = TTSTokenizer.init_from_config(config) speaker_manager = SpeakerManager.init_from_config(config, samples) return Overflow(new_config, ap, tokenizer, speaker_manager) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index eea9b59eb7..2552133753 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -1612,7 +1612,6 @@ def get_data_loader( max_audio_len=config.max_audio_len, phoneme_cache_path=config.phoneme_cache_path, precompute_num_workers=config.precompute_num_workers, - verbose=verbose, tokenizer=self.tokenizer, start_by_longest=config.start_by_longest, ) @@ -1779,7 +1778,7 @@ def load_fairseq_checkpoint( assert not self.training @staticmethod - def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None, verbose=True): + def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict]] = None): """Initiate model from config Args: @@ -1802,7 +1801,7 @@ def init_from_config(config: "VitsConfig", samples: Union[List[List], List[Dict] upsample_rate == effective_hop_length ), f" [!] Product of upsample rates must be equal to the hop length - {upsample_rate} vs {effective_hop_length}" - ap = AudioProcessor.init_from_config(config, verbose=verbose) + ap = AudioProcessor.init_from_config(config) tokenizer, new_config = TTSTokenizer.init_from_config(config) speaker_manager = SpeakerManager.init_from_config(config, samples) language_manager = LanguageManager.init_from_config(config) diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index e2eb924e57..680e29debc 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -135,10 +135,6 @@ class AudioProcessor(object): stats_path (str, optional): Path to the computed stats file. Defaults to None. - - verbose (bool, optional): - enable/disable logging. Defaults to True. - """ def __init__( @@ -175,7 +171,6 @@ def __init__( do_rms_norm=False, db_level=None, stats_path=None, - verbose=True, **_, ): # setup class attributed @@ -231,10 +226,9 @@ def __init__( self.win_length <= self.fft_size ), f" [!] win_length cannot be larger than fft_size - {self.win_length} vs {self.fft_size}" members = vars(self) - if verbose: - logger.info("Setting up Audio Processor...") - for key, value in members.items(): - logger.info(" | %s: %s", key, value) + logger.info("Setting up Audio Processor...") + for key, value in members.items(): + logger.info(" | %s: %s", key, value) # create spectrogram utils self.mel_basis = build_mel_basis( sample_rate=self.sample_rate, @@ -253,10 +247,10 @@ def __init__( self.symmetric_norm = None @staticmethod - def init_from_config(config: "Coqpit", verbose=True): + def init_from_config(config: "Coqpit"): if "audio" in config: - return AudioProcessor(verbose=verbose, **config.audio) - return AudioProcessor(verbose=verbose, **config) + return AudioProcessor(**config.audio) + return AudioProcessor(**config) ### normalization ### def normalize(self, S: np.ndarray) -> np.ndarray: diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 0dfb501269..0b6e79bacc 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -43,13 +43,11 @@ class ModelManager(object): models_file (str): path to .model.json file. Defaults to None. output_prefix (str): prefix to `tts` to download models. Defaults to None progress_bar (bool): print a progress bar when donwloading a file. Defaults to False. - verbose (bool): print info. Defaults to True. """ - def __init__(self, models_file=None, output_prefix=None, progress_bar=False, verbose=True): + def __init__(self, models_file=None, output_prefix=None, progress_bar=False): super().__init__() self.progress_bar = progress_bar - self.verbose = verbose if output_prefix is None: self.output_prefix = get_user_data_dir("tts") else: @@ -71,18 +69,16 @@ def read_models_file(self, file_path): self.models_dict = read_json_with_comments(file_path) def _list_models(self, model_type, model_count=0): - if self.verbose: - logger.info("") - logger.info("Name format: type/language/dataset/model") + logger.info("") + logger.info("Name format: type/language/dataset/model") model_list = [] for lang in self.models_dict[model_type]: for dataset in self.models_dict[model_type][lang]: for model in self.models_dict[model_type][lang][dataset]: model_full_name = f"{model_type}--{lang}--{dataset}--{model}" - if self.verbose: - output_path = Path(self.output_prefix) / model_full_name - downloaded = " [already downloaded]" if output_path.is_dir() else "" - logger.info(" %2d: %s/%s/%s/%s%s", model_count, model_type, lang, dataset, model, downloaded) + output_path = Path(self.output_prefix) / model_full_name + downloaded = " [already downloaded]" if output_path.is_dir() else "" + logger.info(" %2d: %s/%s/%s/%s%s", model_count, model_type, lang, dataset, model, downloaded) model_list.append(f"{model_type}/{lang}/{dataset}/{model}") model_count += 1 return model_list diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 2bb1e39cb2..50a7893047 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -221,7 +221,7 @@ def _load_vocoder(self, model_file: str, model_config: str, use_cuda: bool) -> N use_cuda (bool): enable/disable CUDA use. """ self.vocoder_config = load_config(model_config) - self.vocoder_ap = AudioProcessor(verbose=False, **self.vocoder_config.audio) + self.vocoder_ap = AudioProcessor(**self.vocoder_config.audio) self.vocoder_model = setup_vocoder_model(self.vocoder_config) self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True) if use_cuda: diff --git a/TTS/vc/models/base_vc.py b/TTS/vc/models/base_vc.py index d68d8364cd..c387157f19 100644 --- a/TTS/vc/models/base_vc.py +++ b/TTS/vc/models/base_vc.py @@ -321,7 +321,6 @@ def get_data_loader( phoneme_cache_path=config.phoneme_cache_path, precompute_num_workers=config.precompute_num_workers, use_noise_augment=False if is_eval else config.use_noise_augment, - verbose=verbose, speaker_id_mapping=speaker_id_mapping, d_vector_mapping=d_vector_mapping if config.use_d_vector_file else None, tokenizer=None, diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index f410313729..f9e691256e 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -550,7 +550,7 @@ def voice_conversion(self, src, tgt): def eval_step(): ... @staticmethod - def init_from_config(config: FreeVCConfig, samples: Union[List[List], List[Dict]] = None, verbose=True): + def init_from_config(config: FreeVCConfig, samples: Union[List[List], List[Dict]] = None): model = FreeVC(config) return model diff --git a/TTS/vocoder/datasets/__init__.py b/TTS/vocoder/datasets/__init__.py index 871eb0d202..04462817a8 100644 --- a/TTS/vocoder/datasets/__init__.py +++ b/TTS/vocoder/datasets/__init__.py @@ -10,7 +10,7 @@ from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset -def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: List, verbose: bool) -> Dataset: +def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: List) -> Dataset: if config.model.lower() in "gan": dataset = GANDataset( ap=ap, @@ -24,7 +24,6 @@ def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: return_segments=not is_eval, use_noise_augment=config.use_noise_augment, use_cache=config.use_cache, - verbose=verbose, ) dataset.shuffle_mapping() elif config.model.lower() == "wavegrad": @@ -39,7 +38,6 @@ def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: return_segments=True, use_noise_augment=False, use_cache=config.use_cache, - verbose=verbose, ) elif config.model.lower() == "wavernn": dataset = WaveRNNDataset( @@ -51,7 +49,6 @@ def setup_dataset(config: Coqpit, ap: AudioProcessor, is_eval: bool, data_items: mode=config.model_params.mode, mulaw=config.model_params.mulaw, is_training=not is_eval, - verbose=verbose, ) else: raise ValueError(f" [!] Dataset for model {config.model.lower()} cannot be found.") diff --git a/TTS/vocoder/datasets/gan_dataset.py b/TTS/vocoder/datasets/gan_dataset.py index b5e30fada9..0806c0d496 100644 --- a/TTS/vocoder/datasets/gan_dataset.py +++ b/TTS/vocoder/datasets/gan_dataset.py @@ -28,7 +28,6 @@ def __init__( return_segments=True, use_noise_augment=False, use_cache=False, - verbose=False, ): super().__init__() self.ap = ap @@ -43,7 +42,6 @@ def __init__( self.return_segments = return_segments self.use_cache = use_cache self.use_noise_augment = use_noise_augment - self.verbose = verbose assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." self.feat_frame_len = seq_len // hop_len + (2 * conv_pad) diff --git a/TTS/vocoder/datasets/wavegrad_dataset.py b/TTS/vocoder/datasets/wavegrad_dataset.py index 305fe430e3..6f34bccb7c 100644 --- a/TTS/vocoder/datasets/wavegrad_dataset.py +++ b/TTS/vocoder/datasets/wavegrad_dataset.py @@ -28,7 +28,6 @@ def __init__( return_segments=True, use_noise_augment=False, use_cache=False, - verbose=False, ): super().__init__() self.ap = ap @@ -41,7 +40,6 @@ def __init__( self.return_segments = return_segments self.use_cache = use_cache self.use_noise_augment = use_noise_augment - self.verbose = verbose if return_segments: assert seq_len % hop_len == 0, " [!] seq_len has to be a multiple of hop_len." diff --git a/TTS/vocoder/datasets/wavernn_dataset.py b/TTS/vocoder/datasets/wavernn_dataset.py index 533feaa530..4c4f5c48df 100644 --- a/TTS/vocoder/datasets/wavernn_dataset.py +++ b/TTS/vocoder/datasets/wavernn_dataset.py @@ -15,9 +15,7 @@ class WaveRNNDataset(Dataset): and converts them to acoustic features on the fly. """ - def __init__( - self, ap, items, seq_len, hop_len, pad, mode, mulaw, is_training=True, verbose=False, return_segments=True - ): + def __init__(self, ap, items, seq_len, hop_len, pad, mode, mulaw, is_training=True, return_segments=True): super().__init__() self.ap = ap self.compute_feat = not isinstance(items[0], (tuple, list)) @@ -29,7 +27,6 @@ def __init__( self.mode = mode self.mulaw = mulaw self.is_training = is_training - self.verbose = verbose self.return_segments = return_segments assert self.seq_len % self.hop_len == 0 diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py index 19c30e983e..9b6508d8ba 100644 --- a/TTS/vocoder/models/gan.py +++ b/TTS/vocoder/models/gan.py @@ -349,7 +349,6 @@ def get_data_loader( # pylint: disable=no-self-use, unused-argument return_segments=not is_eval, use_noise_augment=config.use_noise_augment, use_cache=config.use_cache, - verbose=verbose, ) dataset.shuffle_mapping() sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None @@ -369,6 +368,6 @@ def get_criterion(self): return [DiscriminatorLoss(self.config), GeneratorLoss(self.config)] @staticmethod - def init_from_config(config: Coqpit, verbose=True) -> "GAN": - ap = AudioProcessor.init_from_config(config, verbose=verbose) + def init_from_config(config: Coqpit) -> "GAN": + ap = AudioProcessor.init_from_config(config) return GAN(config, ap=ap) diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py index c1166e0914..70d9edb342 100644 --- a/TTS/vocoder/models/wavegrad.py +++ b/TTS/vocoder/models/wavegrad.py @@ -321,7 +321,6 @@ def get_data_loader(self, config: Coqpit, assets: Dict, is_eval: True, samples: return_segments=True, use_noise_augment=False, use_cache=config.use_cache, - verbose=verbose, ) sampler = DistributedSampler(dataset) if num_gpus > 1 else None loader = DataLoader( diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index 7f74ba3ebf..62f6ee2d2d 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -623,7 +623,6 @@ def get_data_loader( # pylint: disable=no-self-use mode=config.model_args.mode, mulaw=config.model_args.mulaw, is_training=not is_eval, - verbose=verbose, ) sampler = DistributedSampler(dataset, shuffle=True) if num_gpus > 1 else None loader = DataLoader( diff --git a/tests/tts_tests/test_vits.py b/tests/tts_tests/test_vits.py index e76e29283e..17992773ad 100644 --- a/tests/tts_tests/test_vits.py +++ b/tests/tts_tests/test_vits.py @@ -212,7 +212,7 @@ def test_d_vector_forward(self): d_vector_file=[os.path.join(get_tests_data_path(), "dummy_speakers.json")], ) config = VitsConfig(model_args=args) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) model.train() input_dummy, input_lengths, _, spec, spec_lengths, waveform = self._create_inputs(config, batch_size=batch_size) d_vectors = torch.randn(batch_size, 256).to(device) @@ -357,7 +357,7 @@ def test_d_vector_inference(self): d_vector_file=[os.path.join(get_tests_data_path(), "dummy_speakers.json")], ) config = VitsConfig(model_args=args) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) model.eval() # batch size = 1 input_dummy = torch.randint(0, 24, (1, 128)).long().to(device) @@ -511,7 +511,7 @@ def test_train_step_upsampling_interpolation(self): def test_train_eval_log(self): batch_size = 2 config = VitsConfig(model_args=VitsArgs(num_chars=32, spec_segment_size=10)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) model.run_data_dep_init = False model.train() batch = self._create_batch(config, batch_size) @@ -530,7 +530,7 @@ def test_train_eval_log(self): def test_test_run(self): config = VitsConfig(model_args=VitsArgs(num_chars=32)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) model.run_data_dep_init = False model.eval() test_figures, test_audios = model.test_run(None) @@ -540,7 +540,7 @@ def test_test_run(self): def test_load_checkpoint(self): chkp_path = os.path.join(get_tests_output_path(), "dummy_glow_tts_checkpoint.pth") config = VitsConfig(VitsArgs(num_chars=32)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) chkp = {} chkp["model"] = model.state_dict() torch.save(chkp, chkp_path) @@ -551,20 +551,20 @@ def test_load_checkpoint(self): def test_get_criterion(self): config = VitsConfig(VitsArgs(num_chars=32)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) criterion = model.get_criterion() self.assertTrue(criterion is not None) def test_init_from_config(self): config = VitsConfig(model_args=VitsArgs(num_chars=32)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) config = VitsConfig(model_args=VitsArgs(num_chars=32, num_speakers=2)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) self.assertTrue(not hasattr(model, "emb_g")) config = VitsConfig(model_args=VitsArgs(num_chars=32, num_speakers=2, use_speaker_embedding=True)) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) self.assertEqual(model.num_speakers, 2) self.assertTrue(hasattr(model, "emb_g")) @@ -576,7 +576,7 @@ def test_init_from_config(self): speakers_file=os.path.join(get_tests_data_path(), "ljspeech", "speakers.json"), ) ) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) self.assertEqual(model.num_speakers, 10) self.assertTrue(hasattr(model, "emb_g")) @@ -588,7 +588,7 @@ def test_init_from_config(self): d_vector_file=[os.path.join(get_tests_data_path(), "dummy_speakers.json")], ) ) - model = Vits.init_from_config(config, verbose=False).to(device) + model = Vits.init_from_config(config).to(device) self.assertTrue(model.num_speakers == 1) self.assertTrue(not hasattr(model, "emb_g")) self.assertTrue(model.embedded_speaker_dim == config.d_vector_dim) diff --git a/tests/tts_tests2/test_glow_tts.py b/tests/tts_tests2/test_glow_tts.py index b93e701f19..3c7ac51556 100644 --- a/tests/tts_tests2/test_glow_tts.py +++ b/tests/tts_tests2/test_glow_tts.py @@ -132,7 +132,7 @@ def _test_forward_with_d_vector(self, batch_size): d_vector_dim=256, d_vector_file=os.path.join(get_tests_data_path(), "dummy_speakers.json"), ) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) model.train() print(" > Num parameters for GlowTTS model:%s" % (count_parameters(model))) # inference encoder and decoder with MAS @@ -158,7 +158,7 @@ def _test_forward_with_speaker_id(self, batch_size): use_speaker_embedding=True, num_speakers=24, ) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) model.train() print(" > Num parameters for GlowTTS model:%s" % (count_parameters(model))) # inference encoder and decoder with MAS @@ -206,7 +206,7 @@ def _test_inference_with_d_vector(self, batch_size): d_vector_dim=256, d_vector_file=os.path.join(get_tests_data_path(), "dummy_speakers.json"), ) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) model.eval() outputs = model.inference(input_dummy, {"x_lengths": input_lengths, "d_vectors": d_vector}) self._assert_inference_outputs(outputs, input_dummy, mel_spec) @@ -224,7 +224,7 @@ def _test_inference_with_speaker_ids(self, batch_size): use_speaker_embedding=True, num_speakers=24, ) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) outputs = model.inference(input_dummy, {"x_lengths": input_lengths, "speaker_ids": speaker_ids}) self._assert_inference_outputs(outputs, input_dummy, mel_spec) @@ -299,7 +299,7 @@ def test_train_eval_log(self): batch["d_vectors"] = None batch["speaker_ids"] = None config = GlowTTSConfig(num_chars=32) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) model.run_data_dep_init = False model.train() logger = TensorboardLogger( @@ -313,7 +313,7 @@ def test_train_eval_log(self): def test_test_run(self): config = GlowTTSConfig(num_chars=32) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) model.run_data_dep_init = False model.eval() test_figures, test_audios = model.test_run(None) @@ -323,7 +323,7 @@ def test_test_run(self): def test_load_checkpoint(self): chkp_path = os.path.join(get_tests_output_path(), "dummy_glow_tts_checkpoint.pth") config = GlowTTSConfig(num_chars=32) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) chkp = {} chkp["model"] = model.state_dict() torch.save(chkp, chkp_path) @@ -334,21 +334,21 @@ def test_load_checkpoint(self): def test_get_criterion(self): config = GlowTTSConfig(num_chars=32) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) criterion = model.get_criterion() self.assertTrue(criterion is not None) def test_init_from_config(self): config = GlowTTSConfig(num_chars=32) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) config = GlowTTSConfig(num_chars=32, num_speakers=2) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) self.assertTrue(model.num_speakers == 2) self.assertTrue(not hasattr(model, "emb_g")) config = GlowTTSConfig(num_chars=32, num_speakers=2, use_speaker_embedding=True) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) self.assertTrue(model.num_speakers == 2) self.assertTrue(hasattr(model, "emb_g")) @@ -358,7 +358,7 @@ def test_init_from_config(self): use_speaker_embedding=True, speakers_file=os.path.join(get_tests_data_path(), "ljspeech", "speakers.json"), ) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) self.assertTrue(model.num_speakers == 10) self.assertTrue(hasattr(model, "emb_g")) @@ -368,7 +368,7 @@ def test_init_from_config(self): d_vector_dim=256, d_vector_file=os.path.join(get_tests_data_path(), "dummy_speakers.json"), ) - model = GlowTTS.init_from_config(config, verbose=False).to(device) + model = GlowTTS.init_from_config(config).to(device) self.assertTrue(model.num_speakers == 1) self.assertTrue(not hasattr(model, "emb_g")) self.assertTrue(model.c_in_channels == config.d_vector_dim) From 9b2d48f8a67e2520947ddc0cd366494a1ba08019 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 2 Apr 2024 11:33:27 +0200 Subject: [PATCH 059/255] feat(utils.generic_utils): improve setup_logger() arguments and output --- TTS/utils/generic_utils.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index a2af8ffbb3..96edd29f60 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -7,7 +7,7 @@ import subprocess import sys from pathlib import Path -from typing import Dict +from typing import Dict, Optional logger = logging.getLogger(__name__) @@ -125,16 +125,29 @@ def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict: return kwargs -def get_timestamp(): +def get_timestamp() -> str: return datetime.now().strftime("%y%m%d-%H%M%S") -def setup_logger(logger_name, root, phase, level=logging.INFO, screen=False, tofile=False): +def setup_logger( + logger_name: str, + level: int = logging.INFO, + *, + formatter: Optional[logging.Formatter] = None, + screen: bool = False, + tofile: bool = False, + log_dir: str = "logs", + log_name: str = "log", +) -> None: lg = logging.getLogger(logger_name) - formatter = logging.Formatter("%(asctime)s.%(msecs)03d - %(levelname)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S") + if formatter is None: + formatter = logging.Formatter( + "%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S" + ) lg.setLevel(level) if tofile: - log_file = os.path.join(root, phase + "_{}.log".format(get_timestamp())) + Path(log_dir).mkdir(exist_ok=True, parents=True) + log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log" fh = logging.FileHandler(log_file, mode="w") fh.setFormatter(formatter) lg.addHandler(fh) From ab64844aba3db6f8fb6a0204dee9b0dc617688d3 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 2 Apr 2024 12:15:18 +0200 Subject: [PATCH 060/255] feat(utils.generic_utils): add custom formatter for logging to console --- TTS/utils/generic_utils.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 96edd29f60..024d50277c 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -129,6 +129,20 @@ def get_timestamp() -> str: return datetime.now().strftime("%y%m%d-%H%M%S") +class ConsoleFormatter(logging.Formatter): + """Custom formatter that prints logging.INFO messages without the level name. + + Source: https://stackoverflow.com/a/62488520 + """ + + def format(self, record): + if record.levelno == logging.INFO: + self._style._fmt = "%(message)s" + else: + self._style._fmt = "%(levelname)s: %(message)s" + return super().format(record) + + def setup_logger( logger_name: str, level: int = logging.INFO, From 7dc5d1eb3d774ba9e574599c0d010ea42aa700c1 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 20 Nov 2023 15:13:37 +0100 Subject: [PATCH 061/255] fix: logging in executables --- TTS/bin/compute_attention_masks.py | 4 ++++ TTS/bin/compute_embeddings.py | 4 ++++ TTS/bin/compute_statistics.py | 4 ++++ TTS/bin/eval_encoder.py | 4 ++++ TTS/bin/extract_tts_spectrograms.py | 4 ++++ TTS/bin/find_unique_chars.py | 4 ++++ TTS/bin/find_unique_phonemes.py | 4 ++++ TTS/bin/remove_silence_using_vad.py | 4 ++++ TTS/bin/synthesize.py | 33 ++++++++++++++++++++--------- TTS/bin/train_encoder.py | 4 +++- TTS/bin/train_tts.py | 4 ++++ TTS/bin/train_vocoder.py | 4 ++++ TTS/bin/tune_wavegrad.py | 4 ++++ 13 files changed, 70 insertions(+), 11 deletions(-) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index faadf6901d..207b17e9c4 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -1,5 +1,6 @@ import argparse import importlib +import logging import os from argparse import RawTextHelpFormatter @@ -13,9 +14,12 @@ from TTS.tts.models import setup_model from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger from TTS.utils.io import load_checkpoint if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + # pylint: disable=bad-option-value parser = argparse.ArgumentParser( description="""Extract attention masks from trained Tacotron/Tacotron2 models. diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 5b5a37df73..6795241a73 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -1,4 +1,5 @@ import argparse +import logging import os from argparse import RawTextHelpFormatter @@ -10,6 +11,7 @@ from TTS.tts.datasets import load_tts_samples from TTS.tts.utils.managers import save_file from TTS.tts.utils.speakers import SpeakerManager +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger def compute_embeddings( @@ -100,6 +102,8 @@ def compute_embeddings( if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser( description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n""" """ diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py index 3ab7ea7a3b..dc5423a691 100755 --- a/TTS/bin/compute_statistics.py +++ b/TTS/bin/compute_statistics.py @@ -3,6 +3,7 @@ import argparse import glob +import logging import os import numpy as np @@ -12,10 +13,13 @@ from TTS.config import load_config from TTS.tts.datasets import load_tts_samples from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger def main(): """Run preprocessing process.""" + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.") parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.") parser.add_argument("out_path", type=str, help="save path (directory and filename).") diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py index 60fed13932..8327851ca7 100644 --- a/TTS/bin/eval_encoder.py +++ b/TTS/bin/eval_encoder.py @@ -1,4 +1,5 @@ import argparse +import logging from argparse import RawTextHelpFormatter import torch @@ -7,6 +8,7 @@ from TTS.config import load_config from TTS.tts.datasets import load_tts_samples from TTS.tts.utils.speakers import SpeakerManager +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger def compute_encoder_accuracy(dataset_items, encoder_manager): @@ -51,6 +53,8 @@ def compute_encoder_accuracy(dataset_items, encoder_manager): if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser( description="""Compute the accuracy of the encoder.\n\n""" """ diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py index cfb35916c1..83f2ca21c4 100755 --- a/TTS/bin/extract_tts_spectrograms.py +++ b/TTS/bin/extract_tts_spectrograms.py @@ -2,6 +2,7 @@ """Extract Mel spectrograms with teacher forcing.""" import argparse +import logging import os import numpy as np @@ -17,6 +18,7 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import quantize +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger use_cuda = torch.cuda.is_available() @@ -271,6 +273,8 @@ def main(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser() parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True) parser.add_argument("--checkpoint_path", type=str, help="Model file to be restored.", required=True) diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index f476ca5ddb..0519d43769 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -1,13 +1,17 @@ """Find all the unique characters in a dataset""" import argparse +import logging from argparse import RawTextHelpFormatter from TTS.config import load_config from TTS.tts.datasets import find_unique_chars, load_tts_samples +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger def main(): + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + # pylint: disable=bad-option-value parser = argparse.ArgumentParser( description="""Find all the unique characters or phonemes in a dataset.\n\n""" diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py index 48f2e7b740..d99acb9893 100644 --- a/TTS/bin/find_unique_phonemes.py +++ b/TTS/bin/find_unique_phonemes.py @@ -1,6 +1,7 @@ """Find all the unique characters in a dataset""" import argparse +import logging import multiprocessing from argparse import RawTextHelpFormatter @@ -9,6 +10,7 @@ from TTS.config import load_config from TTS.tts.datasets import load_tts_samples from TTS.tts.utils.text.phonemizers import Gruut +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger def compute_phonemes(item): @@ -18,6 +20,8 @@ def compute_phonemes(item): def main(): + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + # pylint: disable=W0601 global c, phonemizer # pylint: disable=bad-option-value diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py index a1eaf4c9a7..f6d09d6bf1 100755 --- a/TTS/bin/remove_silence_using_vad.py +++ b/TTS/bin/remove_silence_using_vad.py @@ -1,5 +1,6 @@ import argparse import glob +import logging import multiprocessing import os import pathlib @@ -7,6 +8,7 @@ import torch from tqdm import tqdm +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger from TTS.utils.vad import get_vad_model_and_utils, remove_silence torch.set_num_threads(1) @@ -75,6 +77,8 @@ def preprocess_audios(): if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser( description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True" ) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index b06c93f7d1..0464cb2943 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -3,12 +3,17 @@ import argparse import contextlib +import logging import sys from argparse import RawTextHelpFormatter # pylint: disable=redefined-outer-name, unused-argument from pathlib import Path +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger + +logger = logging.getLogger(__name__) + description = """ Synthesize speech on command line. @@ -142,6 +147,8 @@ def str2bool(v): def main(): + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser( description=description.replace(" ```\n", ""), formatter_class=RawTextHelpFormatter, @@ -435,31 +442,37 @@ def main(): # query speaker ids of a multi-speaker model. if args.list_speaker_idxs: - print( - " > Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model." + if synthesizer.tts_model.speaker_manager is None: + logger.info("Model only has a single speaker.") + return + logger.info( + "Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model." ) - print(synthesizer.tts_model.speaker_manager.name_to_id) + logger.info(synthesizer.tts_model.speaker_manager.name_to_id) return # query langauge ids of a multi-lingual model. if args.list_language_idxs: - print( - " > Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model." + if synthesizer.tts_model.language_manager is None: + logger.info("Monolingual model.") + return + logger.info( + "Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model." ) - print(synthesizer.tts_model.language_manager.name_to_id) + logger.info(synthesizer.tts_model.language_manager.name_to_id) return # check the arguments against a multi-speaker model. if synthesizer.tts_speakers_file and (not args.speaker_idx and not args.speaker_wav): - print( - " [!] Looks like you use a multi-speaker model. Define `--speaker_idx` to " + logger.error( + "Looks like you use a multi-speaker model. Define `--speaker_idx` to " "select the target speaker. You can list the available speakers for this model by `--list_speaker_idxs`." ) return # RUN THE SYNTHESIS if args.text: - print(" > Text: {}".format(args.text)) + logger.info("Text: %s", args.text) # kick it if tts_path is not None: @@ -484,8 +497,8 @@ def main(): ) # save the results - print(" > Saving output to {}".format(args.out_path)) synthesizer.save_wav(wav, args.out_path, pipe_out=pipe_out) + logger.info("Saved output to %s", args.out_path) if __name__ == "__main__": diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index e1f157493e..c0292743bf 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -20,6 +20,7 @@ from TTS.encoder.utils.visual import plot_embeddings from TTS.tts.datasets import load_tts_samples from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger from TTS.utils.samplers import PerfectBatchSampler from TTS.utils.training import check_update @@ -278,7 +279,6 @@ def main(args): # pylint: disable=redefined-outer-name # pylint: disable=redefined-outer-name meta_data_train, meta_data_eval = load_tts_samples(c.datasets, eval_split=True) - logging.getLogger("TTS.encoder.dataset").setLevel(logging.INFO) train_data_loader, train_classes, map_classid_to_classname = setup_loader(ap, is_val=False) if c.run_eval: eval_data_loader, _, _ = setup_loader(ap, is_val=True) @@ -317,6 +317,8 @@ def main(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training() try: diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py index bdb4f6f691..6d6342a762 100644 --- a/TTS/bin/train_tts.py +++ b/TTS/bin/train_tts.py @@ -1,3 +1,4 @@ +import logging import os from dataclasses import dataclass, field @@ -6,6 +7,7 @@ from TTS.config import load_config, register_config from TTS.tts.datasets import load_tts_samples from TTS.tts.models import setup_model +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger @dataclass @@ -15,6 +17,8 @@ class TrainTTSArgs(TrainerArgs): def main(): """Run `tts` model training directly by a `config.json` file.""" + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + # init trainer args train_args = TrainTTSArgs() parser = train_args.init_argparse(arg_prefix="") diff --git a/TTS/bin/train_vocoder.py b/TTS/bin/train_vocoder.py index 32ecd7bdc3..221ff4cff0 100644 --- a/TTS/bin/train_vocoder.py +++ b/TTS/bin/train_vocoder.py @@ -1,3 +1,4 @@ +import logging import os from dataclasses import dataclass, field @@ -5,6 +6,7 @@ from TTS.config import load_config, register_config from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger from TTS.vocoder.datasets.preprocess import load_wav_data, load_wav_feat_data from TTS.vocoder.models import setup_model @@ -16,6 +18,8 @@ class TrainVocoderArgs(TrainerArgs): def main(): """Run `tts` model training directly by a `config.json` file.""" + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + # init trainer args train_args = TrainVocoderArgs() parser = train_args.init_argparse(arg_prefix="") diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py index d5bdcfc9e8..df2923952d 100644 --- a/TTS/bin/tune_wavegrad.py +++ b/TTS/bin/tune_wavegrad.py @@ -1,6 +1,7 @@ """Search a good noise schedule for WaveGrad for a given number of inference iterations""" import argparse +import logging from itertools import product as cartesian_product import numpy as np @@ -10,11 +11,14 @@ from TTS.config import load_config from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger from TTS.vocoder.datasets.preprocess import load_wav_data from TTS.vocoder.datasets.wavegrad_dataset import WaveGradDataset from TTS.vocoder.models import setup_model if __name__ == "__main__": + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, help="Path to model checkpoint.") parser.add_argument("--config_path", type=str, help="Path to model config file.") From e689fd1d4ac9439a450f645c08457edb2f6d1f0e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 31 Mar 2024 13:21:01 +0200 Subject: [PATCH 062/255] fix(utils.manage): remove bare except, improve messages --- TTS/utils/manage.py | 79 +++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 43 deletions(-) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 0b6e79bacc..d4781d54e6 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -97,21 +97,36 @@ def list_models(self): models_name_list.extend(model_list) return models_name_list + def log_model_details(self, model_type, lang, dataset, model): + logger.info("Model type: %s", model_type) + logger.info("Language supported: %s", lang) + logger.info("Dataset used: %s", dataset) + logger.info("Model name: %s", model) + if "description" in self.models_dict[model_type][lang][dataset][model]: + logger.info("Description: %s", self.models_dict[model_type][lang][dataset][model]["description"]) + else: + logger.info("Description: coming soon") + if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]: + logger.info( + "Default vocoder: %s", + self.models_dict[model_type][lang][dataset][model]["default_vocoder"], + ) + def model_info_by_idx(self, model_query): - """Print the description of the model from .models.json file using model_idx + """Print the description of the model from .models.json file using model_query_idx Args: - model_query (str): / + model_query (str): / """ model_name_list = [] model_type, model_query_idx = model_query.split("/") try: model_query_idx = int(model_query_idx) if model_query_idx <= 0: - print("> model_query_idx should be a positive integer!") + logger.error("model_query_idx [%d] should be a positive integer!", model_query_idx) return - except: - print("> model_query_idx should be an integer!") + except (TypeError, ValueError): + logger.error("model_query_idx [%s] should be an integer!", model_query_idx) return model_count = 0 if model_type in self.models_dict: @@ -121,22 +136,13 @@ def model_info_by_idx(self, model_query): model_name_list.append(f"{model_type}/{lang}/{dataset}/{model}") model_count += 1 else: - print(f"> model_type {model_type} does not exist in the list.") + logger.error("Model type %s does not exist in the list.", model_type) return if model_query_idx > model_count: - print(f"model query idx exceeds the number of available models [{model_count}] ") + logger.error("model_query_idx exceeds the number of available models [%d]", model_count) else: model_type, lang, dataset, model = model_name_list[model_query_idx - 1].split("/") - print(f"> model type : {model_type}") - print(f"> language supported : {lang}") - print(f"> dataset used : {dataset}") - print(f"> model name : {model}") - if "description" in self.models_dict[model_type][lang][dataset][model]: - print(f"> description : {self.models_dict[model_type][lang][dataset][model]['description']}") - else: - print("> description : coming soon") - if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]: - print(f"> default_vocoder : {self.models_dict[model_type][lang][dataset][model]['default_vocoder']}") + self.log_model_details(model_type, lang, dataset, model) def model_info_by_full_name(self, model_query_name): """Print the description of the model from .models.json file using model_full_name @@ -145,32 +151,19 @@ def model_info_by_full_name(self, model_query_name): model_query_name (str): Format is /// """ model_type, lang, dataset, model = model_query_name.split("/") - if model_type in self.models_dict: - if lang in self.models_dict[model_type]: - if dataset in self.models_dict[model_type][lang]: - if model in self.models_dict[model_type][lang][dataset]: - print(f"> model type : {model_type}") - print(f"> language supported : {lang}") - print(f"> dataset used : {dataset}") - print(f"> model name : {model}") - if "description" in self.models_dict[model_type][lang][dataset][model]: - print( - f"> description : {self.models_dict[model_type][lang][dataset][model]['description']}" - ) - else: - print("> description : coming soon") - if "default_vocoder" in self.models_dict[model_type][lang][dataset][model]: - print( - f"> default_vocoder : {self.models_dict[model_type][lang][dataset][model]['default_vocoder']}" - ) - else: - print(f"> model {model} does not exist for {model_type}/{lang}/{dataset}.") - else: - print(f"> dataset {dataset} does not exist for {model_type}/{lang}.") - else: - print(f"> lang {lang} does not exist for {model_type}.") - else: - print(f"> model_type {model_type} does not exist in the list.") + if model_type not in self.models_dict: + logger.error("Model type %s does not exist in the list.", model_type) + return + if lang not in self.models_dict[model_type]: + logger.error("Language %s does not exist for %s.", lang, model_type) + return + if dataset not in self.models_dict[model_type][lang]: + logger.error("Dataset %s does not exist for %s/%s.", dataset, model_type, lang) + return + if model not in self.models_dict[model_type][lang][dataset]: + logger.error("Model %s does not exist for %s/%s/%s.", model, model_type, lang, dataset) + return + self.log_model_details(model_type, lang, dataset, model) def list_tts_models(self): """Print all `TTS` models and return a list of model names From aa40fd277b49669ce53e9a1d008edac036f6d981 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 4 Apr 2024 18:18:25 +0200 Subject: [PATCH 063/255] docs: update links --- CITATION.cff | 4 +-- CONTRIBUTING.md | 24 +++++++-------- README.md | 29 ++++++++++--------- TTS/server/README.md | 2 +- TTS/server/templates/index.html | 2 +- docs/source/faq.md | 4 +-- docs/source/inference.md | 6 ++-- docs/source/installation.md | 6 ++-- docs/source/models/xtts.md | 2 +- docs/source/tutorial_for_nervous_beginners.md | 8 ++--- setup.py | 8 ++--- 11 files changed, 48 insertions(+), 47 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 28eb65e23c..a01a3be642 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -10,8 +10,8 @@ authors: version: 1.4 doi: 10.5281/zenodo.6334862 license: "MPL-2.0" -url: "https://github.com/eginhard/coqui-tts" -repository-code: "https://github.com/eginhard/coqui-tts" +url: "https://github.com/idiap/coqui-ai-TTS" +repository-code: "https://github.com/idiap/coqui-ai-TTS" keywords: - machine learning - deep learning diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8a0fe3904a..d8e8fc61bb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,7 +2,7 @@ Welcome to the 🐸TTS! -This repository is governed by [the Contributor Covenant Code of Conduct](https://github.com/eginhard/coqui-tts/blob/main/CODE_OF_CONDUCT.md). +This repository is governed by [the Contributor Covenant Code of Conduct](https://github.com/idiap/coqui-ai-TTS/blob/main/CODE_OF_CONDUCT.md). ## Where to start. We welcome everyone who likes to contribute to 🐸TTS. @@ -15,13 +15,13 @@ If you like to contribute code, squash a bug but if you don't know where to star You can pick something out of our road map. We keep the progess of the project in this simple issue thread. It has new model proposals or developmental updates etc. -- [Github Issues Tracker](https://github.com/eginhard/coqui-tts/issues) +- [Github Issues Tracker](https://github.com/idiap/coqui-ai-TTS/issues) This is a place to find feature requests, bugs. Issues with the ```good first issue``` tag are good place for beginners to take on. -- ✨**PR**✨ [pages](https://github.com/eginhard/coqui-tts/pulls) with the ```🚀new version``` tag. +- ✨**PR**✨ [pages](https://github.com/idiap/coqui-ai-TTS/pulls) with the ```🚀new version``` tag. We list all the target improvements for the next version. You can pick one of them and start contributing. @@ -46,14 +46,14 @@ Let us know if you encounter a problem along the way. The following steps are tested on an Ubuntu system. -1. Fork 🐸TTS[https://github.com/eginhard/coqui-tts] by clicking the fork button at the top right corner of the project page. +1. Fork 🐸TTS[https://github.com/idiap/coqui-ai-TTS] by clicking the fork button at the top right corner of the project page. 2. Clone 🐸TTS and add the main repo as a new remote named ```upstream```. ```bash - $ git clone git@github.com:/coqui-tts.git - $ cd coqui-tts - $ git remote add upstream https://github.com/eginhard/coqui-tts.git + $ git clone git@github.com:/coqui-ai-TTS.git + $ cd coqui-ai-TTS + $ git remote add upstream https://github.com/idiap/coqui-ai-TTS.git ``` 3. Install 🐸TTS for development. @@ -124,7 +124,7 @@ The following steps are tested on an Ubuntu system. 13. Let's discuss until it is perfect. 💪 - We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/eginhard/coqui-tts/pulls]. + We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/idiap/coqui-ai-TTS/pulls]. 14. Once things look perfect, We merge it to the ```dev``` branch and make it ready for the next version. @@ -132,14 +132,14 @@ The following steps are tested on an Ubuntu system. If you prefer working within a Docker container as your development environment, you can do the following: -1. Fork 🐸TTS[https://github.com/eginhard/coqui-tts] by clicking the fork button at the top right corner of the project page. +1. Fork 🐸TTS[https://github.com/idiap/coqui-ai-TTS] by clicking the fork button at the top right corner of the project page. 2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```. ```bash - $ git clone git@github.com:/coqui-tts.git - $ cd coqui-tts - $ git remote add upstream https://github.com/eginhard/coqui-tts.git + $ git clone git@github.com:/coqui-ai-TTS.git + $ cd coqui-ai-TTS + $ git remote add upstream https://github.com/idiap/coqui-ai-TTS.git ``` 3. Build the Docker Image as your development environment (it installs all of the dependencies for you): diff --git a/README.md b/README.md index 782b48ab69..9a863fc696 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,8 @@ -## 🐸Coqui.ai News +## 🐸Coqui TTS News +- 📣 Fork of the [original, unmaintained repository](https://github.com/coqui-ai/TTS). New PyPI package: [coqui-tts](https://pypi.org/project/coqui-tts) - 📣 ⓍTTSv2 is here with 16 languages and better performance across the board. -- 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/eginhard/coqui-tts/tree/dev/recipes/ljspeech). +- 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). - 📣 ⓍTTS can now stream with <200ms latency. - 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/dev/models/xtts.html) - 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/bark.html) @@ -11,7 +12,7 @@
-## +## **🐸TTS is a library for advanced Text-to-Speech generation.** @@ -26,13 +27,13 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) [![License]()](https://opensource.org/licenses/MPL-2.0) [![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS) -[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/eginhard/coqui-tts/blob/main/CODE_OF_CONDUCT.md) +[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/idiap/coqui-ai-TTS/blob/main/CODE_OF_CONDUCT.md) [![Downloads](https://pepy.tech/badge/tts)](https://pepy.tech/project/tts) [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440) -![GithubActions](https://github.com/eginhard/coqui-tts/actions/workflows/tests.yml/badge.svg) -![GithubActions](https://github.com/eginhard/coqui-tts/actions/workflows/docker.yaml/badge.svg) -![GithubActions](https://github.com/eginhard/coqui-tts/actions/workflows/style_check.yml/badge.svg) +![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/tests.yml/badge.svg) +![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/docker.yaml/badge.svg) +![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/style_check.yml/badge.svg) [![Docs]()](https://coqui-tts.readthedocs.io/en/latest/)
@@ -49,8 +50,8 @@ Please use our dedicated channels for questions and discussion. Help is much mor | 👩‍💻 **Usage Questions** | [GitHub Discussions] | | 🗯 **General Discussion** | [GitHub Discussions] or [Discord] | -[github issue tracker]: https://github.com/eginhard/coqui-tts/issues -[github discussions]: https://github.com/eginhard/coqui-tts/discussions +[github issue tracker]: https://github.com/idiap/coqui-ai-TTS/issues +[github discussions]: https://github.com/idiap/coqui-ai-TTS/discussions [discord]: https://discord.gg/5eXr5seRrv [Tutorials and Examples]: https://github.com/coqui-ai/TTS/wiki/TTS-Notebooks-and-Tutorials @@ -59,10 +60,10 @@ Please use our dedicated channels for questions and discussion. Help is much mor | Type | Links | | ------------------------------- | --------------------------------------- | | 💼 **Documentation** | [ReadTheDocs](https://coqui-tts.readthedocs.io/en/latest/) -| 💾 **Installation** | [TTS/README.md](https://github.com/eginhard/coqui-tts/tree/dev#installation)| -| 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/eginhard/coqui-tts/blob/main/CONTRIBUTING.md)| +| 💾 **Installation** | [TTS/README.md](https://github.com/idiap/coqui-ai-TTS/tree/dev#installation)| +| 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/idiap/coqui-ai-TTS/blob/main/CONTRIBUTING.md)| | 📌 **Road Map** | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378) -| 🚀 **Released Models** | [Standard models](https://github.com/eginhard/coqui-tts/blob/dev/TTS/.models.json) and [Fairseq models in ~1100 languages](https://github.com/eginhard/coqui-tts#example-text-to-speech-using-fairseq-models-in-1100-languages-)| +| 🚀 **Released Models** | [Standard models](https://github.com/idiap/coqui-ai-TTS/blob/dev/TTS/.models.json) and [Fairseq models in ~1100 languages](https://github.com/idiap/coqui-ai-TTS#example-text-to-speech-using-fairseq-models-in-1100-languages-)| | 📰 **Papers** | [TTS Papers](https://github.com/erogol/TTS-papers)| ## Features @@ -130,7 +131,7 @@ Please use our dedicated channels for questions and discussion. Help is much mor You can also help us implement more models. ## Installation -🐸TTS is tested on Ubuntu 18.04 with **python >= 3.9, < 3.12.**. +🐸TTS is tested on Ubuntu 22.04 with **python >= 3.9, < 3.12.**. If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option. @@ -141,7 +142,7 @@ pip install coqui-tts If you plan to code or train models, clone 🐸TTS and install it locally. ```bash -git clone https://github.com/eginhard/coqui-tts +git clone https://github.com/idiap/coqui-ai-TTS pip install -e .[all,dev,notebooks,server] # Select the relevant extras ``` diff --git a/TTS/server/README.md b/TTS/server/README.md index f5df08011b..3b27575aea 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -1,6 +1,6 @@ # :frog: TTS demo server Before you use the server, make sure you -[install](https://github.com/eginhard/coqui-tts/tree/dev#install-tts)) :frog: TTS +[install](https://github.com/idiap/coqui-ai-TTS/tree/dev#install-tts)) :frog: TTS properly and install the additional dependencies with `pip install coqui-tts[server]`. Then, you can follow the steps below. diff --git a/TTS/server/templates/index.html b/TTS/server/templates/index.html index f5f547c7bf..6bfd5ae2cb 100644 --- a/TTS/server/templates/index.html +++ b/TTS/server/templates/index.html @@ -30,7 +30,7 @@ - Fork me on GitHub diff --git a/docs/source/faq.md b/docs/source/faq.md index 14be9d4c9c..1090aaa35c 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -3,7 +3,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is ## Errors with a pre-trained model. How can I resolve this? - Make sure you use the right commit version of 🐸TTS. Each pre-trained model has its corresponding version that needs to be used. It is defined on the model table. -- If it is still problematic, post your problem on [Discussions](https://github.com/eginhard/coqui-tts/discussions). Please give as many details as possible (error message, your TTS version, your TTS model and config.json etc.) +- If it is still problematic, post your problem on [Discussions](https://github.com/idiap/coqui-ai-TTS/discussions). Please give as many details as possible (error message, your TTS version, your TTS model and config.json etc.) - If you feel like it's a bug to be fixed, then prefer Github issues with the same level of scrutiny. ## What are the requirements of a good 🐸TTS dataset? @@ -16,7 +16,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - If you need faster models, consider SpeedySpeech, GlowTTS or AlignTTS. Keep in mind that SpeedySpeech requires a pre-trained Tacotron or Tacotron2 model to compute text-to-speech alignments. ## How can I train my own `tts` model? -0. Check your dataset with notebooks in [dataset_analysis](https://github.com/eginhard/coqui-tts/tree/main/notebooks/dataset_analysis) folder. Use [this notebook](https://github.com/eginhard/coqui-tts/blob/main/notebooks/dataset_analysis/CheckSpectrograms.ipynb) to find the right audio processing parameters. A better set of parameters results in a better audio synthesis. +0. Check your dataset with notebooks in [dataset_analysis](https://github.com/idiap/coqui-ai-TTS/tree/main/notebooks/dataset_analysis) folder. Use [this notebook](https://github.com/idiap/coqui-ai-TTS/blob/main/notebooks/dataset_analysis/CheckSpectrograms.ipynb) to find the right audio processing parameters. A better set of parameters results in a better audio synthesis. 1. Write your own dataset `formatter` in `datasets/formatters.py` or format your dataset as one of the supported datasets, like LJSpeech. A `formatter` parses the metadata file and converts a list of training samples. diff --git a/docs/source/inference.md b/docs/source/inference.md index 0b05965f46..4cb8f45a71 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -14,7 +14,7 @@ After the installation, 2 terminal commands are available. 3. In 🐍Python. - `from TTS.api import TTS` ## On the Commandline - `tts` -![cli.gif](https://github.com/eginhard/coqui-tts/raw/main/images/tts_cli.gif) +![cli.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/tts_cli.gif) After the installation, 🐸TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under 🐸TTS. @@ -81,8 +81,8 @@ tts --model_name "voice_conversion///" ## On the Demo Server - `tts-server` - -![server.gif](https://github.com/eginhard/coqui-tts/raw/main/images/demo_server.gif) + +![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) You can boot up a demo 🐸TTS server to run an inference with your models (make sure to install the additional dependencies with `pip install coqui-tts[server]`). diff --git a/docs/source/installation.md b/docs/source/installation.md index 92743a9db4..f6c4245690 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -15,7 +15,7 @@ pip install coqui-tts # from PyPI Or install from Github: ```bash -pip install git+https://github.com/eginhard/coqui-tts # from Github +pip install git+https://github.com/idiap/coqui-ai-TTS # from Github ``` ## Installing From Source @@ -23,8 +23,8 @@ pip install git+https://github.com/eginhard/coqui-tts # from Github This is recommended for development and more control over 🐸TTS. ```bash -git clone https://github.com/eginhard/coqui-tts -cd coqui-tts +git clone https://github.com/idiap/coqui-ai-TTS +cd coqui-ai-TTS make system-deps # only on Linux systems. make install ``` diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index 014b161669..de16674134 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -29,7 +29,7 @@ Stay tuned as we continue to add support for more languages. If you have any lan This model is licensed under [Coqui Public Model License](https://coqui.ai/cpml). ### Contact -Come and join in our 🐸Community. We're active on [Discord](https://discord.gg/fBC58unbKE) and [Github](https://github.com/eginhard/coqui-tts/discussions). +Come and join in our 🐸Community. We're active on [Discord](https://discord.gg/fBC58unbKE) and [Github](https://github.com/idiap/coqui-ai-TTS/discussions). ### Inference diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index dda2abbc36..b417c4c45a 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -11,8 +11,8 @@ $ pip install coqui-tts Developer friendly installation. ```bash -$ git clone https://github.com/eginhard/coqui-tts -$ cd coqui-tts +$ git clone https://github.com/idiap/coqui-ai-TTS +$ cd coqui-ai-TTS $ pip install -e . ``` @@ -109,7 +109,7 @@ $ tts -h # see the help $ tts --list_models # list the available models. ``` -![cli.gif](https://github.com/eginhard/coqui-tts/raw/main/images/tts_cli.gif) +![cli.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/tts_cli.gif) You can call `tts-server` to start a local demo server that you can open on @@ -120,4 +120,4 @@ dependencies with `pip install coqui-tts[server]`). $ tts-server -h # see the help $ tts-server --list_models # list the available models. ``` -![server.gif](https://github.com/eginhard/coqui-tts/raw/main/images/demo_server.gif) +![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) diff --git a/setup.py b/setup.py index a25b7674f6..b8bd3ee0cd 100644 --- a/setup.py +++ b/setup.py @@ -81,7 +81,7 @@ def pip_install(package_name): setup( name="coqui-tts", version=version, - url="https://github.com/eginhard/coqui-tts", + url="https://github.com/idiap/coqui-ai-TTS", author="Eren Gölge", author_email="egolge@coqui.ai", maintainer="Enno Hermann", @@ -104,9 +104,9 @@ def pip_install(package_name): }, project_urls={ "Documentation": "https://coqui-tts.readthedocs.io", - "Tracker": "https://github.com/eginhard/coqui-tts/issues", - "Repository": "https://github.com/eginhard/coqui-tts", - "Discussions": "https://github.com/eginhard/coqui-tts/discussions", + "Tracker": "https://github.com/idiap/coqui-ai-TTS/issues", + "Repository": "https://github.com/idiap/coqui-ai-TTS", + "Discussions": "https://github.com/idiap/coqui-ai-TTS/discussions", }, cmdclass={ "build_py": build_py, From 107e22c6890274d95236f4bb18bf3c5290b78ecc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 4 Apr 2024 18:18:55 +0200 Subject: [PATCH 064/255] ci(workflows): update actions --- .github/workflows/docker.yaml | 4 ++-- .github/workflows/pypi-release.yml | 6 +++--- .github/workflows/style_check.yml | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 1f15159b42..30b972bdb7 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -10,7 +10,7 @@ on: jobs: docker-build: name: "Build and push Docker image" - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: arch: ["amd64"] @@ -18,7 +18,7 @@ jobs: - "nvidia/cuda:11.8.0-base-ubuntu22.04" # GPU enabled - "python:3.10.8-slim" # CPU only steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Log in to the Container registry uses: docker/login-action@v1 with: diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index f81f5a7493..644cf561bd 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -8,7 +8,7 @@ defaults: bash jobs: build-sdist: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Verify tag matches version @@ -33,7 +33,7 @@ jobs: name: sdist path: dist/*.tar.gz build-wheels: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11"] @@ -55,7 +55,7 @@ jobs: name: wheel-${{ matrix.python-version }} path: dist/*-manylinux*.whl publish-artifacts: - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest needs: [build-sdist, build-wheels] environment: name: release diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index e21feeb7f6..c913c233d8 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -15,9 +15,9 @@ jobs: python-version: [3.9] experimental: [false] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} architecture: x64 From 31f1c8b41475f0d617d0e7be9cb58b08d9c795ab Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 4 Apr 2024 19:38:22 +0200 Subject: [PATCH 065/255] ci(workflows.docker): update image namespace --- .github/workflows/docker.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 30b972bdb7..ab4e9960bd 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -29,11 +29,11 @@ jobs: id: compute-tag run: | set -ex - base="ghcr.io/coqui-ai/tts" + base="ghcr.io/idiap/coqui-tts" tags="" # PR build if [[ ${{ matrix.base }} = "python:3.10.8-slim" ]]; then - base="ghcr.io/coqui-ai/tts-cpu" + base="ghcr.io/idiap/coqui-tts-cpu" fi if [[ "${{ startsWith(github.ref, 'refs/heads/') }}" = "true" ]]; then From d41686502e3003b6472ad769115dfd710059a87d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 8 Apr 2024 12:06:45 +0200 Subject: [PATCH 066/255] feat(xtts): support hindi for sentence-splitting and fine-tuning The XTTS model itself already supports Hindi, it was just in these components. --- TTS/demos/xtts_ft_demo/xtts_demo.py | 2 ++ TTS/tts/layers/xtts/tokenizer.py | 7 ++++++- docs/source/models/xtts.md | 23 +++++++++++++++++++---- 3 files changed, 27 insertions(+), 5 deletions(-) diff --git a/TTS/demos/xtts_ft_demo/xtts_demo.py b/TTS/demos/xtts_ft_demo/xtts_demo.py index 85168c641d..7ac38ed6ee 100644 --- a/TTS/demos/xtts_ft_demo/xtts_demo.py +++ b/TTS/demos/xtts_ft_demo/xtts_demo.py @@ -192,6 +192,7 @@ def read_logs(): "hu", "ko", "ja", + "hi", ], ) progress_data = gr.Label(label="Progress:") @@ -370,6 +371,7 @@ def train_model( "hu", "ko", "ja", + "hi", ], ) tts_text = gr.Textbox( diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 1a3cc47aaf..6cbd374f06 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -11,6 +11,7 @@ from spacy.lang.ar import Arabic from spacy.lang.en import English from spacy.lang.es import Spanish +from spacy.lang.hi import Hindi from spacy.lang.ja import Japanese from spacy.lang.zh import Chinese from tokenizers import Tokenizer @@ -19,6 +20,7 @@ def get_spacy_lang(lang): + """Return Spacy language used for sentence splitting.""" if lang == "zh": return Chinese() elif lang == "ja": @@ -27,8 +29,10 @@ def get_spacy_lang(lang): return Arabic() elif lang == "es": return Spanish() + elif lang == "hi": + return Hindi() else: - # For most languages, Enlish does the job + # For most languages, English does the job return English() @@ -611,6 +615,7 @@ def __init__(self, vocab_file=None): "ja": 71, "hu": 224, "ko": 95, + "hi": 150, } @cached_property diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index de16674134..cc7c36b729 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -14,16 +14,31 @@ There is no need for an excessive amount of training data that spans countless h ### Updates with v2 - Improved voice cloning. - Voices can be cloned with a single audio file or multiple audio files, without any effect on the runtime. -- 2 new languages: Hungarian and Korean. - Across the board quality improvements. ### Code Current implementation only supports inference and GPT encoder training. ### Languages -As of now, XTTS-v2 supports 16 languages: English (en), Spanish (es), French (fr), German (de), Italian (it), Portuguese (pt), Polish (pl), Turkish (tr), Russian (ru), Dutch (nl), Czech (cs), Arabic (ar), Chinese (zh-cn), Japanese (ja), Hungarian (hu) and Korean (ko). - -Stay tuned as we continue to add support for more languages. If you have any language requests, please feel free to reach out. +XTTS-v2 supports 17 languages: + +- Arabic (ar) +- Chinese (zh-cn) +- Czech (cs) +- Dutch (nl) +- English (en) +- French (fr) +- German (de) +- Hindi (hi) +- Hungarian (hu) +- Italian (it) +- Japanese (ja) +- Korean (ko) +- Polish (pl) +- Portuguese (pt) +- Russian (ru) +- Spanish (es) +- Turkish (tr) ### License This model is licensed under [Coqui Public Model License](https://coqui.ai/cpml). From b3c9685aeec48df3c5cca2a7ade83293fb7f641d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 11 Apr 2024 16:58:12 +0200 Subject: [PATCH 067/255] fix(tokenizer): add debug logging --- TTS/tts/utils/text/tokenizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TTS/tts/utils/text/tokenizer.py b/TTS/tts/utils/text/tokenizer.py index 9aff7dd4bb..f653cdf13f 100644 --- a/TTS/tts/utils/text/tokenizer.py +++ b/TTS/tts/utils/text/tokenizer.py @@ -107,10 +107,13 @@ def text_to_ids(self, text: str, language: str = None) -> List[int]: # pylint: 5. Text to token IDs """ # TODO: text cleaner should pick the right routine based on the language + logger.debug("Tokenizer input text: %s", text) if self.text_cleaner is not None: text = self.text_cleaner(text) + logger.debug("Cleaned text: %s", text) if self.use_phonemes: text = self.phonemizer.phonemize(text, separator="", language=language) + logger.debug("Phonemes: %s", text) text = self.encode(text) if self.add_blank: text = self.intersperse_blank_char(text, True) From 794eecb7931dc8f16ba3d3c3619d5abaa42bf0c5 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 11 Apr 2024 17:00:40 +0200 Subject: [PATCH 068/255] docs(README): update badges to new pypi package --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 9a863fc696..d3a0b04830 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,9 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) [![License]()](https://opensource.org/licenses/MPL-2.0) -[![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS) +[![PyPI version](https://badge.fury.io/py/coqui-tts.svg)](https://badge.fury.io/py/coqui-tts) [![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/idiap/coqui-ai-TTS/blob/main/CODE_OF_CONDUCT.md) -[![Downloads](https://pepy.tech/badge/tts)](https://pepy.tech/project/tts) +[![Downloads](https://pepy.tech/badge/coqui-tts)](https://pepy.tech/project/coqui-tts) [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440) ![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/tests.yml/badge.svg) From f7d69cc1d7585d0e3275a14f3ee479992bc3371b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 11 Apr 2024 17:01:09 +0200 Subject: [PATCH 069/255] chore: update version to 0.23.0 --- TTS/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/VERSION b/TTS/VERSION index a723ece79b..ca222b7cf3 100644 --- a/TTS/VERSION +++ b/TTS/VERSION @@ -1 +1 @@ -0.22.1 +0.23.0 From 52a52b5e2129ab5705c8229121bdc542ab82581f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 19 Apr 2024 11:57:27 +0200 Subject: [PATCH 070/255] fix(LanguageManager): allow initialisation from config with language ids file Previously, running `LanguageManager.init_from_config(config)` would never use the `language_ids_file` if that field is present because it was overwritten in the next line with a new manager that manually parses languages from the datasets in the config. Now that is only used as a fallback. --- TTS/tts/utils/languages.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/TTS/tts/utils/languages.py b/TTS/tts/utils/languages.py index 89e5e1911e..f134daf58e 100644 --- a/TTS/tts/utils/languages.py +++ b/TTS/tts/utils/languages.py @@ -1,5 +1,5 @@ import os -from typing import Any, Dict, List +from typing import Any, Dict, List, Optional import fsspec import numpy as np @@ -85,18 +85,18 @@ def save_ids_to_file(self, file_path: str) -> None: self._save_json(file_path, self.name_to_id) @staticmethod - def init_from_config(config: Coqpit) -> "LanguageManager": + def init_from_config(config: Coqpit) -> Optional["LanguageManager"]: """Initialize the language manager from a Coqpit config. Args: config (Coqpit): Coqpit config. """ - language_manager = None if check_config_and_model_args(config, "use_language_embedding", True): if config.get("language_ids_file", None): - language_manager = LanguageManager(language_ids_file_path=config.language_ids_file) - language_manager = LanguageManager(config=config) - return language_manager + return LanguageManager(language_ids_file_path=config.language_ids_file) + # Fall back to parse language IDs from the config + return LanguageManager(config=config) + return None def _set_file_path(path): From 8b1ed020ffa3034cd945ac4533e9dc589ab44ebc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 22 Apr 2024 15:06:32 +0200 Subject: [PATCH 071/255] build: add python 3.12 support --- .github/workflows/pypi-release.yml | 6 +++++- .github/workflows/tests.yml | 4 ++-- README.md | 2 +- docs/source/installation.md | 2 +- requirements.ja.txt | 2 +- setup.py | 10 ++++------ 6 files changed, 14 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 644cf561bd..78ab53986a 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -36,7 +36,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -81,6 +81,10 @@ jobs: with: name: "wheel-3.11" path: "dist/" + - uses: actions/download-artifact@v4 + with: + name: "wheel-3.12" + path: "dist/" - run: | ls -lh dist/ - name: Publish package distributions to PyPI diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b056e3073d..6671c9f590 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.9, "3.10", "3.11"] + python-version: [3.9, "3.10", "3.11", "3.12"] subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] steps: - uses: actions/checkout@v4 @@ -63,7 +63,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.12" - uses: actions/download-artifact@v4 with: pattern: coverage-data-* diff --git a/README.md b/README.md index d3a0b04830..901da37ea5 100644 --- a/README.md +++ b/README.md @@ -131,7 +131,7 @@ Please use our dedicated channels for questions and discussion. Help is much mor You can also help us implement more models. ## Installation -🐸TTS is tested on Ubuntu 22.04 with **python >= 3.9, < 3.12.**. +🐸TTS is tested on Ubuntu 22.04 with **python >= 3.9, < 3.13.**. If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option. diff --git a/docs/source/installation.md b/docs/source/installation.md index f6c4245690..f0b2a00f19 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,6 +1,6 @@ # Installation -🐸TTS supports python >=3.9 <3.12.0 and was tested on Ubuntu 20.04 and 22.04. +🐸TTS supports python >=3.9 <3.13.0 and was tested on Ubuntu 22.04. ## Using `pip` diff --git a/requirements.ja.txt b/requirements.ja.txt index 4baab88a91..855b872507 100644 --- a/requirements.ja.txt +++ b/requirements.ja.txt @@ -1,5 +1,5 @@ # These cause some compatibility issues on some systems and are not strictly necessary # japanese g2p deps -mecab-python3==1.0.6 +mecab-python3 unidic-lite==1.0.8 cutlet diff --git a/setup.py b/setup.py index b8bd3ee0cd..02722c18f1 100644 --- a/setup.py +++ b/setup.py @@ -28,13 +28,10 @@ import setuptools.command.build_py import setuptools.command.develop from Cython.Build import cythonize -from packaging.version import Version from setuptools import Extension, find_packages, setup -python_version = sys.version.split()[0] -if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"): - raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version)) - +if sys.version_info < (3, 9) or sys.version_info >= (3, 13): + raise RuntimeError("Trainer requires python >= 3.6 and <3.13 " "but your Python version is {}".format(sys.version)) cwd = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(cwd, "TTS", "VERSION")) as fin: @@ -121,7 +118,7 @@ def pip_install(package_name): "server": requirements_server, "ja": requirements_ja, }, - python_requires=">=3.9.0, <3.12", + python_requires=">=3.9.0, <3.13", entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]}, classifiers=[ "Programming Language :: Python", @@ -129,6 +126,7 @@ def pip_install(package_name): "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Development Status :: 3 - Alpha", "Intended Audience :: Science/Research", "Intended Audience :: Developers", From f636fabe5138dba4329444e2a1b962aee180bfa4 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 22 Apr 2024 16:10:08 +0200 Subject: [PATCH 072/255] build: switch to forked trainer package --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index a01efaa648..bc7fa67a86 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ umap-learn>=0.5.1 # deps for training matplotlib>=3.7.0 # coqui stack -trainer>=0.0.36 +coqui-tts-trainer>=0.1 # config management coqpit>=0.0.16 # chinese g2p deps From 697d4effbcc4a477398ed9d039f8f7b5fd15cf46 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 22 Apr 2024 16:13:27 +0200 Subject: [PATCH 073/255] Revert "ci: switch back from uv to pip" This reverts commit 00f8d47bcffdfb8b61a20132a226d89273f0d13a. uv has fixed https://github.com/astral-sh/uv/issues/1921, which should resolve timeout issues with pytorch/nvidia packages. --- .github/workflows/tests.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 6671c9f590..ec4503e21b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -38,14 +38,14 @@ jobs: sudo apt-get install -y --no-install-recommends git make gcc make system-deps - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel + run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls if: contains(fromJSON('["data_tests", "inference_tests", "test_aux", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m pip install .[all] + python3 -m uv pip install --system "coqui-tts[dev,server,ja] @ ." python3 setup.py egg_info - name: Unit tests run: make ${{ matrix.subset }} From 2675e743b0ca028dab6ce26510e4a7f8d95f0a1b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 23 Apr 2024 09:57:34 +0200 Subject: [PATCH 074/255] chore: update version to 0.23.1 [ci skip] --- TTS/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/VERSION b/TTS/VERSION index ca222b7cf3..610e28725b 100644 --- a/TTS/VERSION +++ b/TTS/VERSION @@ -1 +1 @@ -0.23.0 +0.23.1 From 7b2289a454068cbc19bc29de9a43fb7fcfbdda9b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 1 May 2024 12:31:49 +0200 Subject: [PATCH 075/255] fix(espeak_wrapper): capture stderr separately Fixes https://github.com/coqui-ai/TTS/issues/2728 Previously, error messages from espeak were treated as normal output and also converted to phonemes. This captures and logs them separately. --- TTS/tts/utils/text/phonemizers/espeak_wrapper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index d1d2335037..d9f4f0793b 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -60,9 +60,12 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: with subprocess.Popen( cmd, stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, + stderr=subprocess.PIPE, ) as p: res = iter(p.stdout.readline, b"") + err = iter(p.stderr.readline, b"") + for line in err: + logger.warning("espeakng: %s", line.decode("utf-8").strip()) if not sync: p.stdout.close() if p.stderr: From 962f9bbbcf1a7f8565581307dbf2b6eff6fd149f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 1 May 2024 13:31:39 +0200 Subject: [PATCH 076/255] refactor(espeak_wrapper): fix ruff lint suggestions --- .../utils/text/phonemizers/espeak_wrapper.py | 84 ++++++++++--------- pyproject.toml | 1 + 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index d9f4f0793b..dd74db6fae 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -1,7 +1,9 @@ +"""Wrapper to call the espeak/espeak-ng phonemizer.""" + import logging import re import subprocess -from typing import Dict, List +from typing import Optional from packaging.version import Version @@ -11,7 +13,7 @@ logger = logging.getLogger(__name__) -def is_tool(name): +def _is_tool(name) -> bool: from shutil import which return which(name) is not None @@ -22,23 +24,25 @@ def is_tool(name): espeak_version_pattern = re.compile(r"text-to-speech:\s(?P\d+\.\d+(\.\d+)?)") -def get_espeak_version(): +def get_espeak_version() -> str: + """Return version of the `espeak` binary.""" output = subprocess.getoutput("espeak --version") match = espeak_version_pattern.search(output) return match.group("version") -def get_espeakng_version(): +def get_espeakng_version() -> str: + """Return version of the `espeak-ng` binary.""" output = subprocess.getoutput("espeak-ng --version") return output.split()[3] # priority: espeakng > espeak -if is_tool("espeak-ng"): +if _is_tool("espeak-ng"): _DEF_ESPEAK_LIB = "espeak-ng" _DEF_ESPEAK_VER = get_espeakng_version() -elif is_tool("espeak"): +elif _is_tool("espeak"): _DEF_ESPEAK_LIB = "espeak" _DEF_ESPEAK_VER = get_espeak_version() else: @@ -46,7 +50,7 @@ def get_espeakng_version(): _DEF_ESPEAK_VER = None -def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: +def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]: """Run espeak with the given arguments.""" cmd = [ espeak_lib, @@ -73,9 +77,7 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: if p.stdin: p.stdin.close() return res - res2 = [] - for line in res: - res2.append(line) + res2 = list(res) p.stdout.close() if p.stderr: p.stderr.close() @@ -86,7 +88,7 @@ def _espeak_exe(espeak_lib: str, args: List, sync=False) -> List[str]: class ESpeak(BasePhonemizer): - """ESpeak wrapper calling `espeak` or `espeak-ng` from the command-line the perform G2P + """Wrapper calling `espeak` or `espeak-ng` from the command-line to perform G2P. Args: language (str): @@ -111,13 +113,17 @@ class ESpeak(BasePhonemizer): """ - _ESPEAK_LIB = _DEF_ESPEAK_LIB - _ESPEAK_VER = _DEF_ESPEAK_VER - - def __init__(self, language: str, backend=None, punctuations=Punctuation.default_puncs(), keep_puncs=True): - if self._ESPEAK_LIB is None: - raise Exception(" [!] No espeak backend found. Install espeak-ng or espeak to your system.") - self.backend = self._ESPEAK_LIB + def __init__( + self, + language: str, + backend: Optional[str] = None, + punctuations: str = Punctuation.default_puncs(), + keep_puncs: bool = True, + ): + if _DEF_ESPEAK_LIB is None: + msg = "[!] No espeak backend found. Install espeak-ng or espeak to your system." + raise FileNotFoundError(msg) + self.backend = _DEF_ESPEAK_LIB # band-aid for backwards compatibility if language == "en": @@ -130,35 +136,37 @@ def __init__(self, language: str, backend=None, punctuations=Punctuation.default self.backend = backend @property - def backend(self): + def backend(self) -> str: return self._ESPEAK_LIB @property - def backend_version(self): + def backend_version(self) -> str: return self._ESPEAK_VER @backend.setter - def backend(self, backend): + def backend(self, backend: str) -> None: if backend not in ["espeak", "espeak-ng"]: - raise Exception("Unknown backend: %s" % backend) + msg = f"Unknown backend: {backend}" + raise ValueError(msg) self._ESPEAK_LIB = backend self._ESPEAK_VER = get_espeakng_version() if backend == "espeak-ng" else get_espeak_version() def auto_set_espeak_lib(self) -> None: - if is_tool("espeak-ng"): + if _is_tool("espeak-ng"): self._ESPEAK_LIB = "espeak-ng" self._ESPEAK_VER = get_espeakng_version() - elif is_tool("espeak"): + elif _is_tool("espeak"): self._ESPEAK_LIB = "espeak" self._ESPEAK_VER = get_espeak_version() else: - raise Exception("Cannot set backend automatically. espeak-ng or espeak not found") + msg = "Cannot set backend automatically. espeak-ng or espeak not found" + raise FileNotFoundError(msg) @staticmethod - def name(): + def name() -> str: return "espeak" - def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str: + def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False) -> str: """Convert input text to phonemes. Args: @@ -193,7 +201,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str: args.append(text) # compute phonemes phonemes = "" - for line in _espeak_exe(self._ESPEAK_LIB, args, sync=True): + for line in _espeak_exe(self.backend, args, sync=True): logger.debug("line: %s", repr(line)) ph_decoded = line.decode("utf8").strip() # espeak: @@ -210,11 +218,11 @@ def phonemize_espeak(self, text: str, separator: str = "|", tie=False) -> str: phonemes += ph_decoded.strip() return phonemes.replace("_", separator) - def _phonemize(self, text, separator=None): + def _phonemize(self, text: str, separator: str = "") -> str: return self.phonemize_espeak(text, separator, tie=False) @staticmethod - def supported_languages() -> Dict: + def supported_languages() -> dict[str, str]: """Get a dictionary of supported languages. Returns: @@ -224,8 +232,7 @@ def supported_languages() -> Dict: return {} args = ["--voices"] langs = {} - count = 0 - for line in _espeak_exe(_DEF_ESPEAK_LIB, args, sync=True): + for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)): line = line.decode("utf8").strip() if count > 0: cols = line.split() @@ -233,7 +240,6 @@ def supported_languages() -> Dict: lang_name = cols[3] langs[lang_code] = lang_name logger.debug("line: %s", repr(line)) - count += 1 return langs def version(self) -> str: @@ -242,16 +248,12 @@ def version(self) -> str: Returns: str: Version of the used backend. """ - args = ["--version"] - for line in _espeak_exe(self.backend, args, sync=True): - version = line.decode("utf8").strip().split()[2] - logger.debug("line: %s", repr(line)) - return version + return self.backend_version @classmethod - def is_available(cls): - """Return true if ESpeak is available else false""" - return is_tool("espeak") or is_tool("espeak-ng") + def is_available(cls) -> bool: + """Return true if ESpeak is available else false.""" + return _is_tool("espeak") or _is_tool("espeak-ng") if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 50d67db97d..8f23a86973 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,6 +8,7 @@ requires = [ ] [tool.ruff] +target-version = "py39" line-length = 120 lint.extend-select = [ "B033", # duplicate-value From 98e21d0f025bb49ca345db706ebea7af8ea654c2 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 1 May 2024 14:28:55 +0200 Subject: [PATCH 077/255] test(losses): change assertEqual to assertAlmostEqual Failed in CI with: AssertionError: 1.401298464324817e-45 != 0.0 --- tests/tts_tests/test_losses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/tts_tests/test_losses.py b/tests/tts_tests/test_losses.py index 522b7bb17c..794478dca3 100644 --- a/tests/tts_tests/test_losses.py +++ b/tests/tts_tests/test_losses.py @@ -216,7 +216,7 @@ def test_in_out(self): # pylint: disable=no-self-use late_x = -200.0 * sequence_mask(length + 1, 100).float() + 100.0 # simulate logits on late stopping loss = layer(true_x, target, length) - self.assertEqual(loss.item(), 0.0) + self.assertAlmostEqual(loss.item(), 0.0) loss = layer(early_x, target, length) self.assertAlmostEqual(loss.item(), 2.1053, places=4) From f4cacd7b7ca82cf2c77ec38499c7caa5cbeda35e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 11:28:01 +0200 Subject: [PATCH 078/255] build: move metadata from setup.py to pyproject.toml --- pyproject.toml | 46 ++++++++++++++++++++++++++++++++++++++++++++ setup.py | 52 +------------------------------------------------- 2 files changed, 47 insertions(+), 51 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8f23a86973..d10b78f7dd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,6 +7,52 @@ requires = [ "packaging", ] +[tool.setuptools.packages.find] +include = ["TTS*"] + +[project] +name = "coqui-tts" +description = "Deep learning for Text to Speech." +readme = "README.md" +requires-python = ">=3.9, <3.13" +license = {text = "MPL-2.0"} +authors = [ + {name = "Eren Gölge", email = "egolge@coqui.ai"} +] +maintainers = [ + {name = "Enno Hermann", email = "enno.hermann@gmail.com"} +] +classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Development Status :: 3 - Alpha", + "Intended Audience :: Science/Research", + "Intended Audience :: Developers", + "Operating System :: POSIX :: Linux", + "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Multimedia :: Sound/Audio :: Speech", + "Topic :: Multimedia :: Sound/Audio", + "Topic :: Multimedia", + "Topic :: Scientific/Engineering :: Artificial Intelligence", +] + +[project.urls] +Homepage = "https://github.com/idiap/coqui-ai-TTS" +Documentation = "https://coqui-tts.readthedocs.io" +Repository = "https://github.com/idiap/coqui-ai-TTS" +Issues = "https://github.com/idiap/coqui-ai-TTS/issues" +Discussions = "https://github.com/idiap/coqui-ai-TTS/discussions" + +[project.scripts] +tts = "TTS.bin.synthesize:main" +tts-server = "TTS.server.server:main" + [tool.ruff] target-version = "py39" line-length = 120 diff --git a/setup.py b/setup.py index 02722c18f1..7180f48baf 100644 --- a/setup.py +++ b/setup.py @@ -28,10 +28,7 @@ import setuptools.command.build_py import setuptools.command.develop from Cython.Build import cythonize -from setuptools import Extension, find_packages, setup - -if sys.version_info < (3, 9) or sys.version_info >= (3, 13): - raise RuntimeError("Trainer requires python >= 3.6 and <3.13 " "but your Python version is {}".format(sys.version)) +from setuptools import Extension, setup cwd = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(cwd, "TTS", "VERSION")) as fin: @@ -66,9 +63,6 @@ def pip_install(package_name): requirements_server = ["flask>=2.0.1"] requirements_all = requirements_dev + requirements_notebooks + requirements_ja + requirements_server -with open("README.md", "r", encoding="utf-8") as readme_file: - README = readme_file.read() - exts = [ Extension( name="TTS.tts.utils.monotonic_align.core", @@ -76,35 +70,12 @@ def pip_install(package_name): ) ] setup( - name="coqui-tts", version=version, - url="https://github.com/idiap/coqui-ai-TTS", - author="Eren Gölge", - author_email="egolge@coqui.ai", - maintainer="Enno Hermann", - maintainer_email="enno.hermann@gmail.com", - description="Deep learning for Text to Speech.", - long_description=README, - long_description_content_type="text/markdown", - license="MPL-2.0", # cython include_dirs=numpy.get_include(), ext_modules=cythonize(exts, language_level=3), # ext_modules=find_cython_extensions(), # package - include_package_data=True, - packages=find_packages(include=["TTS"], exclude=["*.tests", "*tests.*", "tests.*", "*tests", "tests"]), - package_data={ - "TTS": [ - "VERSION", - ] - }, - project_urls={ - "Documentation": "https://coqui-tts.readthedocs.io", - "Tracker": "https://github.com/idiap/coqui-ai-TTS/issues", - "Repository": "https://github.com/idiap/coqui-ai-TTS", - "Discussions": "https://github.com/idiap/coqui-ai-TTS/discussions", - }, cmdclass={ "build_py": build_py, "develop": develop, @@ -118,26 +89,5 @@ def pip_install(package_name): "server": requirements_server, "ja": requirements_ja, }, - python_requires=">=3.9.0, <3.13", - entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]}, - classifiers=[ - "Programming Language :: Python", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Development Status :: 3 - Alpha", - "Intended Audience :: Science/Research", - "Intended Audience :: Developers", - "Operating System :: POSIX :: Linux", - "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: Multimedia :: Sound/Audio :: Speech", - "Topic :: Multimedia :: Sound/Audio", - "Topic :: Multimedia", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - ], zip_safe=False, ) From 259d8fc40bd5120aa4a86fc0512cc5b037d356f9 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 11:52:50 +0200 Subject: [PATCH 079/255] build: store version in pyproject.toml --- .github/workflows/docker.yaml | 2 +- .github/workflows/pypi-release.yml | 2 +- MANIFEST.in | 1 - TTS/VERSION | 1 - TTS/__init__.py | 6 ------ docs/source/conf.py | 6 ++---- pyproject.toml | 1 + setup.py | 3 --- 8 files changed, 5 insertions(+), 17 deletions(-) delete mode 100644 TTS/VERSION diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index ab4e9960bd..3e526e60a8 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -42,7 +42,7 @@ jobs: branch=${github_ref#*refs/heads/} # strip prefix to get branch name tags="${base}:${branch},${base}:${{ github.sha }}," elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" = "true" ]]; then - VERSION="v$(cat TTS/VERSION)" + VERSION="v$(grep -m 1 version pyproject.toml | grep -P '\d+\.\d+\.\d+' -o)" if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then echo "Pushed tag does not match VERSION file. Aborting push." exit 1 diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 78ab53986a..14c956fc70 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -14,7 +14,7 @@ jobs: - name: Verify tag matches version run: | set -ex - version=$(cat TTS/VERSION) + version=$(grep -m 1 version pyproject.toml | grep -P '\d+\.\d+\.\d+' -o) tag="${GITHUB_REF/refs\/tags\/}" if [[ "v$version" != "$tag" ]]; then exit 1 diff --git a/MANIFEST.in b/MANIFEST.in index 321d3999c1..498b33c0fe 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,6 @@ include LICENSE.txt include requirements.*.txt include *.cff include requirements.txt -include TTS/VERSION recursive-include TTS *.json recursive-include TTS *.html recursive-include TTS *.png diff --git a/TTS/VERSION b/TTS/VERSION deleted file mode 100644 index 610e28725b..0000000000 --- a/TTS/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.23.1 diff --git a/TTS/__init__.py b/TTS/__init__.py index eaf05db1b9..e69de29bb2 100644 --- a/TTS/__init__.py +++ b/TTS/__init__.py @@ -1,6 +0,0 @@ -import os - -with open(os.path.join(os.path.dirname(__file__), "VERSION"), "r", encoding="utf-8") as f: - version = f.read().strip() - -__version__ = version diff --git a/docs/source/conf.py b/docs/source/conf.py index b85324fd40..200a487f1c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -10,6 +10,7 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # +import importlib.metadata import os import sys @@ -23,13 +24,10 @@ copyright = "2021 Coqui GmbH, 2020 TTS authors" author = 'Coqui GmbH' -with open("../../TTS/VERSION", "r") as ver: - version = ver.read().strip() - # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. -release = version +release = importlib.metadata.version(project) # The main toctree document. master_doc = "index" diff --git a/pyproject.toml b/pyproject.toml index d10b78f7dd..b58716e2b7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,6 +12,7 @@ include = ["TTS*"] [project] name = "coqui-tts" +version = "0.23.1" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" diff --git a/setup.py b/setup.py index 7180f48baf..722959cdc0 100644 --- a/setup.py +++ b/setup.py @@ -31,8 +31,6 @@ from setuptools import Extension, setup cwd = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(cwd, "TTS", "VERSION")) as fin: - version = fin.read().strip() class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors @@ -70,7 +68,6 @@ def pip_install(package_name): ) ] setup( - version=version, # cython include_dirs=numpy.get_include(), ext_modules=cythonize(exts, language_level=3), From fb92e13ebb28c5956d402417e33e5d23d965aacf Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 12:27:58 +0200 Subject: [PATCH 080/255] build: remove unused/obsolete code --- MANIFEST.in | 2 -- Makefile | 6 ----- TTS/tts/utils/monotonic_align/setup.py | 7 ------ pyproject.toml | 5 ++--- setup.cfg | 8 ------- setup.py | 31 -------------------------- 6 files changed, 2 insertions(+), 57 deletions(-) delete mode 100644 TTS/tts/utils/monotonic_align/setup.py delete mode 100644 setup.cfg diff --git a/MANIFEST.in b/MANIFEST.in index 498b33c0fe..5015d0aae8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -10,5 +10,3 @@ recursive-include TTS *.md recursive-include TTS *.py recursive-include TTS *.pyx recursive-include images *.png -recursive-exclude tests * -prune tests* diff --git a/Makefile b/Makefile index a24c41fc0b..3833f7334d 100644 --- a/Makefile +++ b/Makefile @@ -62,15 +62,9 @@ system-deps: ## install linux system deps dev-deps: ## install development deps pip install -r requirements.dev.txt -doc-deps: ## install docs dependencies - pip install -r docs/requirements.txt - build-docs: ## build the docs cd docs && make clean && make build -hub-deps: ## install deps for torch hub use - pip install -r requirements.hub.txt - deps: ## install 🐸 requirements. pip install -r requirements.txt diff --git a/TTS/tts/utils/monotonic_align/setup.py b/TTS/tts/utils/monotonic_align/setup.py deleted file mode 100644 index f22bc6a35a..0000000000 --- a/TTS/tts/utils/monotonic_align/setup.py +++ /dev/null @@ -1,7 +0,0 @@ -# from distutils.core import setup -# from Cython.Build import cythonize -# import numpy - -# setup(name='monotonic_align', -# ext_modules=cythonize("core.pyx"), -# include_dirs=[numpy.get_include()]) diff --git a/pyproject.toml b/pyproject.toml index b58716e2b7..952b41e851 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,11 +1,10 @@ [build-system] requires = [ "setuptools", - "wheel", "cython~=0.29.30", - "numpy>=1.22.0", - "packaging", + "numpy>=1.24.3", ] +build-backend = "setuptools.build_meta" [tool.setuptools.packages.find] include = ["TTS*"] diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 1f31cb5dec..0000000000 --- a/setup.cfg +++ /dev/null @@ -1,8 +0,0 @@ -[build_py] -build_lib=temp_build - -[bdist_wheel] -bdist_dir=temp_build - -[install_lib] -build_dir=temp_build diff --git a/setup.py b/setup.py index 722959cdc0..32e4934ca8 100644 --- a/setup.py +++ b/setup.py @@ -21,36 +21,13 @@ # `````` import os -import subprocess -import sys import numpy -import setuptools.command.build_py -import setuptools.command.develop from Cython.Build import cythonize from setuptools import Extension, setup cwd = os.path.dirname(os.path.abspath(__file__)) - -class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors - def run(self): - setuptools.command.build_py.build_py.run(self) - - -class develop(setuptools.command.develop.develop): - def run(self): - setuptools.command.develop.develop.run(self) - - -# The documentation for this feature is in server/README.md -package_data = ["TTS/server/templates/*"] - - -def pip_install(package_name): - subprocess.call([sys.executable, "-m", "pip", "install", package_name]) - - requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines() with open(os.path.join(cwd, "requirements.notebooks.txt"), "r") as f: requirements_notebooks = f.readlines() @@ -68,16 +45,8 @@ def pip_install(package_name): ) ] setup( - # cython include_dirs=numpy.get_include(), ext_modules=cythonize(exts, language_level=3), - # ext_modules=find_cython_extensions(), - # package - cmdclass={ - "build_py": build_py, - "develop": develop, - # 'build_ext': build_ext - }, install_requires=requirements, extras_require={ "all": requirements_all, From 8d2a562c59c09e8017458240a3bf1228153fa0f0 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 14:41:40 +0200 Subject: [PATCH 081/255] build: move dependencies into pyproject.toml --- .readthedocs.yml | 5 ++- MANIFEST.in | 2 - Makefile | 5 +-- pyproject.toml | 82 ++++++++++++++++++++++++++++++++++++++ requirements.ja.txt | 5 --- requirements.notebooks.txt | 2 - requirements.txt | 46 --------------------- setup.py | 22 ---------- 8 files changed, 86 insertions(+), 83 deletions(-) delete mode 100644 requirements.ja.txt delete mode 100644 requirements.notebooks.txt delete mode 100644 requirements.txt diff --git a/.readthedocs.yml b/.readthedocs.yml index 266a2cdeb2..e19a4dccb7 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -14,8 +14,9 @@ build: # Optionally set the version of Python and requirements required to build your docs python: install: - - requirements: docs/requirements.txt - - requirements: requirements.txt + - path: . + extra_requirements: + - docs # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/MANIFEST.in b/MANIFEST.in index 5015d0aae8..8d092ceff2 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,8 +1,6 @@ include README.md include LICENSE.txt -include requirements.*.txt include *.cff -include requirements.txt recursive-include TTS *.json recursive-include TTS *.html recursive-include TTS *.png diff --git a/Makefile b/Makefile index 3833f7334d..4379b5567b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: test system-deps dev-deps deps style lint install help docs +.PHONY: test system-deps dev-deps style lint install help docs help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' @@ -65,9 +65,6 @@ dev-deps: ## install development deps build-docs: ## build the docs cd docs && make clean && make build -deps: ## install 🐸 requirements. - pip install -r requirements.txt - install: ## install 🐸 TTS for development. pip install -e .[all] diff --git a/pyproject.toml b/pyproject.toml index 952b41e851..ef8554f801 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,88 @@ classifiers = [ "Topic :: Multimedia", "Topic :: Scientific/Engineering :: Artificial Intelligence", ] +dependencies = [ + # Core + "numpy>=1.24.3", + "cython>=0.29.30", + "scipy>=1.11.2", + "torch>=2.1", + "torchaudio", + "soundfile>=0.12.0", + "librosa>=0.10.1", + "inflect>=5.6.0", + "tqdm>=4.64.1", + "anyascii>=0.3.0", + "pyyaml>=6.0", + "fsspec[http]>=2023.6.0", + "packaging>=23.1", + # Inference + "pysbd>=0.3.4", + # Notebooks + "umap-learn>=0.5.1", + # Training + "matplotlib>=3.7.0", + # Coqui stack + "coqui-tts-trainer>=0.1", + "coqpit>=0.0.16", + # Chinese + "jieba", + "pypinyin", + # Korean + "hangul_romanize", + "jamo", + "g2pkk>=0.1.1", + # Gruut + supported languages + "gruut[de,es,fr]==2.2.3", + # Bangla + "bangla", + "bnnumerizer", + "bnunicodenormalizer", + # Tortoise + "einops>=0.6.0", + "transformers>=4.33.0", + # Bark + "encodec>=0.1.1", + # XTTS + "num2words", + "spacy[ja]>=3" +] + +[project.optional-dependencies] +# Development dependencies +dev = [ + "black==24.2.0", + "coverage[toml]", + "nose2", + "ruff==0.3.0", +] +# Dependencies for building the documentation +docs = [ + "furo", + "myst-parser==2.0.0", + "sphinx==7.2.5", + "sphinx_inline_tabs", + "sphinx_copybutton", + "linkify-it-py", +] +# Only used in notebooks +notebooks = [ + "bokeh==1.4.0", + "pandas>=1.4,<2.0", +] +# For running the TTS server +server = ["flask>=2.0.1"] +# Language-specific dependencies, mainly for G2P +# Japanese +ja = [ + "mecab-python3", + "unidic-lite==1.0.8", + "cutlet", +] +# Installs all extras (except dev and docs) +all = [ + "coqui-tts[notebooks,server,ja]", +] [project.urls] Homepage = "https://github.com/idiap/coqui-ai-TTS" diff --git a/requirements.ja.txt b/requirements.ja.txt deleted file mode 100644 index 855b872507..0000000000 --- a/requirements.ja.txt +++ /dev/null @@ -1,5 +0,0 @@ -# These cause some compatibility issues on some systems and are not strictly necessary -# japanese g2p deps -mecab-python3 -unidic-lite==1.0.8 -cutlet diff --git a/requirements.notebooks.txt b/requirements.notebooks.txt deleted file mode 100644 index 6b7e6e8956..0000000000 --- a/requirements.notebooks.txt +++ /dev/null @@ -1,2 +0,0 @@ -bokeh==1.4.0 -pandas>=1.4,<2.0 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index bc7fa67a86..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,46 +0,0 @@ -# core deps -numpy>=1.24.3 -cython>=0.29.30 -scipy>=1.11.2 -torch>=2.1 -torchaudio -soundfile>=0.12.0 -librosa>=0.10.1 -inflect>=5.6.0 -tqdm>=4.64.1 -anyascii>=0.3.0 -pyyaml>=6.0 -fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail -packaging>=23.1 -# deps for inference -pysbd>=0.3.4 -# deps for notebooks -umap-learn>=0.5.1 -# deps for training -matplotlib>=3.7.0 -# coqui stack -coqui-tts-trainer>=0.1 -# config management -coqpit>=0.0.16 -# chinese g2p deps -jieba -pypinyin -# korean -hangul_romanize -# gruut+supported langs -gruut[de,es,fr]==2.2.3 -# deps for korean -jamo -g2pkk>=0.1.1 -# deps for bangla -bangla -bnnumerizer -bnunicodenormalizer -#deps for tortoise -einops>=0.6.0 -transformers>=4.33.0 -#deps for bark -encodec>=0.1.1 -# deps for XTTS -num2words -spacy[ja]>=3 diff --git a/setup.py b/setup.py index 32e4934ca8..1cf2def1d3 100644 --- a/setup.py +++ b/setup.py @@ -20,24 +20,10 @@ # .,*++++::::::++++*,. # `````` -import os - import numpy from Cython.Build import cythonize from setuptools import Extension, setup -cwd = os.path.dirname(os.path.abspath(__file__)) - -requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines() -with open(os.path.join(cwd, "requirements.notebooks.txt"), "r") as f: - requirements_notebooks = f.readlines() -with open(os.path.join(cwd, "requirements.dev.txt"), "r") as f: - requirements_dev = f.readlines() -with open(os.path.join(cwd, "requirements.ja.txt"), "r") as f: - requirements_ja = f.readlines() -requirements_server = ["flask>=2.0.1"] -requirements_all = requirements_dev + requirements_notebooks + requirements_ja + requirements_server - exts = [ Extension( name="TTS.tts.utils.monotonic_align.core", @@ -47,13 +33,5 @@ setup( include_dirs=numpy.get_include(), ext_modules=cythonize(exts, language_level=3), - install_requires=requirements, - extras_require={ - "all": requirements_all, - "dev": requirements_dev, - "notebooks": requirements_notebooks, - "server": requirements_server, - "ja": requirements_ja, - }, zip_safe=False, ) From 5cf1d415558e034dbec3fc5e42f5506e16d7ec5f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 19:42:43 +0200 Subject: [PATCH 082/255] chore: enable commented pre-commit rules --- .pre-commit-config.yaml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eeb02fde88..21e4898734 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,10 +3,8 @@ repos: rev: v4.5.0 hooks: - id: check-yaml - # TODO: enable these later; there are plenty of violating - # files that need to be fixed first - # - id: end-of-file-fixer - # - id: trailing-whitespace + - id: end-of-file-fixer + - id: trailing-whitespace - repo: "https://github.com/psf/black" rev: 24.2.0 hooks: From ec50006855f5f52b1337264d47d451a27bb60819 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 19:44:38 +0200 Subject: [PATCH 083/255] style: run pre-commit Automatic changes from: pre-commit run --all-files --- .dockerignore | 2 +- .github/stale.yml | 1 - .gitignore | 2 +- CITATION.cff | 2 +- CODE_OF_CONDUCT.md | 4 +- LICENSE.txt | 2 +- TTS/demos/xtts_ft_demo/requirements.txt | 2 +- TTS/server/templates/details.html | 2 +- TTS/tts/utils/assets/tortoise/tokenizer.json | 2 +- TTS/vc/modules/freevc/wavlm/config.json | 2 +- dockerfiles/Dockerfile.dev | 1 - docs/requirements.txt | 2 +- docs/source/conf.py | 62 ++++++++++--------- docs/source/docker_images.md | 2 +- docs/source/finetuning.md | 1 - docs/source/main_classes/audio_processor.md | 2 +- docs/source/main_classes/dataset.md | 2 +- docs/source/main_classes/gan.md | 2 +- docs/source/main_classes/model_api.md | 2 +- docs/source/main_classes/speaker_manager.md | 2 +- docs/source/models/forward_tts.md | 2 - docs/source/models/overflow.md | 2 +- docs/source/models/tacotron1-2.md | 2 - docs/source/what_makes_a_good_dataset.md | 2 +- hubconf.py | 27 ++++---- notebooks/TestAttention.ipynb | 2 +- notebooks/dataset_analysis/CheckPitch.ipynb | 2 +- notebooks/dataset_analysis/README.md | 2 +- recipes/README.md | 2 +- recipes/blizzard2013/README.md | 2 +- recipes/kokoro/tacotron2-DDC/run.sh | 2 +- .../kokoro/tacotron2-DDC/tacotron2-DDC.json | 2 +- recipes/ljspeech/download_ljspeech.sh | 2 +- tests/bash_tests/test_compute_statistics.sh | 1 - tests/data/dummy_speakers.json | 2 +- tests/data/ljspeech/metadata_flac.csv | 2 +- tests/data/ljspeech/metadata_mp3.csv | 2 +- tests/data/ljspeech/metadata_wav.csv | 2 +- tests/inputs/common_voice.tsv | 10 +-- tests/inputs/dummy_model_config.json | 2 - tests/inputs/language_ids.json | 2 +- tests/inputs/test_align_tts.json | 2 +- tests/inputs/test_speaker_encoder_config.json | 2 +- tests/inputs/test_speedy_speech.json | 2 +- tests/inputs/test_vocoder_audio_config.json | 1 - .../test_vocoder_multiband_melgan_config.json | 1 - tests/inputs/test_vocoder_wavegrad.json | 1 - tests/inputs/test_vocoder_wavernn_config.json | 1 - tests/inputs/xtts_vocab.json | 2 +- 49 files changed, 85 insertions(+), 100 deletions(-) diff --git a/.dockerignore b/.dockerignore index 8d8ad918c9..5b28aa99dc 100644 --- a/.dockerignore +++ b/.dockerignore @@ -6,4 +6,4 @@ TTS.egg-info/ tests/outputs/* tests/train_outputs/* __pycache__/ -*.pyc \ No newline at end of file +*.pyc diff --git a/.github/stale.yml b/.github/stale.yml index e05eaf0b57..dd45bf098f 100644 --- a/.github/stale.yml +++ b/.github/stale.yml @@ -15,4 +15,3 @@ markComment: > for your contributions. You might also look our discussion channels. # Comment to post when closing a stale issue. Set to `false` to disable closeComment: false - diff --git a/.gitignore b/.gitignore index 22ec6e410a..f9708961e2 100644 --- a/.gitignore +++ b/.gitignore @@ -169,4 +169,4 @@ wandb depot/* coqui_recipes/* local_scripts/* -coqui_demos/* \ No newline at end of file +coqui_demos/* diff --git a/CITATION.cff b/CITATION.cff index a01a3be642..0be0d75d78 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -17,4 +17,4 @@ keywords: - deep learning - artificial intelligence - text to speech - - TTS \ No newline at end of file + - TTS diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index b80639d63c..9c83ebcf12 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -119,11 +119,11 @@ This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, available at [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0]. -Community Impact Guidelines were inspired by +Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. For answers to common questions about this code of conduct, see the FAQ at -[https://www.contributor-covenant.org/faq][FAQ]. Translations are available +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at [https://www.contributor-covenant.org/translations][translations]. [homepage]: https://www.contributor-covenant.org diff --git a/LICENSE.txt b/LICENSE.txt index 14e2f777f6..a612ad9813 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0 means any form of the work other than Source Code Form. 1.7. "Larger Work" - means a work that combines Covered Software with other material, in + means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" diff --git a/TTS/demos/xtts_ft_demo/requirements.txt b/TTS/demos/xtts_ft_demo/requirements.txt index cb5b16f66e..b58f41c546 100644 --- a/TTS/demos/xtts_ft_demo/requirements.txt +++ b/TTS/demos/xtts_ft_demo/requirements.txt @@ -1,2 +1,2 @@ faster_whisper==0.9.0 -gradio==4.7.1 \ No newline at end of file +gradio==4.7.1 diff --git a/TTS/server/templates/details.html b/TTS/server/templates/details.html index 51c9ed85a8..85ff959591 100644 --- a/TTS/server/templates/details.html +++ b/TTS/server/templates/details.html @@ -128,4 +128,4 @@ - \ No newline at end of file + diff --git a/TTS/tts/utils/assets/tortoise/tokenizer.json b/TTS/tts/utils/assets/tortoise/tokenizer.json index a128f27305..c2fb44a729 100644 --- a/TTS/tts/utils/assets/tortoise/tokenizer.json +++ b/TTS/tts/utils/assets/tortoise/tokenizer.json @@ -1 +1 @@ -{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}} \ No newline at end of file +{"version":"1.0","truncation":null,"padding":null,"added_tokens":[{"id":0,"special":true,"content":"[STOP]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":1,"special":true,"content":"[UNK]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false},{"id":2,"special":true,"content":"[SPACE]","single_word":false,"lstrip":false,"rstrip":false,"normalized":false}],"normalizer":null,"pre_tokenizer":{"type":"Whitespace"},"post_processor":null,"decoder":null,"model":{"type":"BPE","dropout":null,"unk_token":"[UNK]","continuing_subword_prefix":null,"end_of_word_suffix":null,"fuse_unk":false,"vocab":{"[STOP]":0,"[UNK]":1,"[SPACE]":2,"!":3,"'":4,"(":5,")":6,",":7,"-":8,".":9,"/":10,":":11,";":12,"?":13,"a":14,"b":15,"c":16,"d":17,"e":18,"f":19,"g":20,"h":21,"i":22,"j":23,"k":24,"l":25,"m":26,"n":27,"o":28,"p":29,"q":30,"r":31,"s":32,"t":33,"u":34,"v":35,"w":36,"x":37,"y":38,"z":39,"th":40,"in":41,"the":42,"an":43,"er":44,"ou":45,"re":46,"on":47,"at":48,"ed":49,"en":50,"to":51,"ing":52,"and":53,"is":54,"as":55,"al":56,"or":57,"of":58,"ar":59,"it":60,"es":61,"he":62,"st":63,"le":64,"om":65,"se":66,"be":67,"ad":68,"ow":69,"ly":70,"ch":71,"wh":72,"that":73,"you":74,"li":75,"ve":76,"ac":77,"ti":78,"ld":79,"me":80,"was":81,"gh":82,"id":83,"ll":84,"wi":85,"ent":86,"for":87,"ay":88,"ro":89,"ver":90,"ic":91,"her":92,"ke":93,"his":94,"no":95,"ut":96,"un":97,"ir":98,"lo":99,"we":100,"ri":101,"ha":102,"with":103,"ght":104,"out":105,"im":106,"ion":107,"all":108,"ab":109,"one":110,"ne":111,"ge":112,"ould":113,"ter":114,"mo":115,"had":116,"ce":117,"she":118,"go":119,"sh":120,"ur":121,"am":122,"so":123,"pe":124,"my":125,"de":126,"are":127,"but":128,"ome":129,"fr":130,"ther":131,"fe":132,"su":133,"do":134,"con":135,"te":136,"ain":137,"ere":138,"po":139,"if":140,"they":141,"us":142,"ag":143,"tr":144,"now":145,"oun":146,"this":147,"have":148,"not":149,"sa":150,"il":151,"up":152,"thing":153,"from":154,"ap":155,"him":156,"ack":157,"ation":158,"ant":159,"our":160,"op":161,"like":162,"ust":163,"ess":164,"bo":165,"ok":166,"ul":167,"ind":168,"ex":169,"com":170,"some":171,"there":172,"ers":173,"co":174,"res":175,"man":176,"ard":177,"pl":178,"wor":179,"way":180,"tion":181,"fo":182,"ca":183,"were":184,"by":185,"ate":186,"pro":187,"ted":188,"ound":189,"own":190,"would":191,"ts":192,"what":193,"qu":194,"ally":195,"ight":196,"ck":197,"gr":198,"when":199,"ven":200,"can":201,"ough":202,"ine":203,"end":204,"per":205,"ous":206,"od":207,"ide":208,"know":209,"ty":210,"very":211,"si":212,"ak":213,"who":214,"about":215,"ill":216,"them":217,"est":218,"red":219,"ye":220,"could":221,"ong":222,"your":223,"their":224,"em":225,"just":226,"other":227,"into":228,"any":229,"whi":230,"um":231,"tw":232,"ast":233,"der":234,"did":235,"ie":236,"been":237,"ace":238,"ink":239,"ity":240,"back":241,"ting":242,"br":243,"more":244,"ake":245,"pp":246,"then":247,"sp":248,"el":249,"use":250,"bl":251,"said":252,"over":253,"get":254},"merges":["t h","i n","th e","a n","e r","o u","r e","o n","a t","e d","e n","t o","in g","an d","i s","a s","a l","o r","o f","a r","i t","e s","h e","s t","l e","o m","s e","b e","a d","o w","l y","c h","w h","th at","y ou","l i","v e","a c","t i","l d","m e","w as","g h","i d","l l","w i","en t","f or","a y","r o","v er","i c","h er","k e","h is","n o","u t","u n","i r","l o","w e","r i","h a","wi th","gh t","ou t","i m","i on","al l","a b","on e","n e","g e","ou ld","t er","m o","h ad","c e","s he","g o","s h","u r","a m","s o","p e","m y","d e","a re","b ut","om e","f r","the r","f e","s u","d o","c on","t e","a in","er e","p o","i f","the y","u s","a g","t r","n ow","ou n","th is","ha ve","no t","s a","i l","u p","th ing","fr om","a p","h im","ac k","at ion","an t","ou r","o p","li ke","u st","es s","b o","o k","u l","in d","e x","c om","s ome","the re","er s","c o","re s","m an","ar d","p l","w or","w ay","ti on","f o","c a","w ere","b y","at e","p ro","t ed","oun d","ow n","w ould","t s","wh at","q u","al ly","i ght","c k","g r","wh en","v en","c an","ou gh","in e","en d","p er","ou s","o d","id e","k now","t y","ver y","s i","a k","wh o","ab out","i ll","the m","es t","re d","y e","c ould","on g","you r","the ir","e m","j ust","o ther","in to","an y","wh i","u m","t w","as t","d er","d id","i e","be en","ac e","in k","it y","b ack","t ing","b r","mo re","a ke","p p","the n","s p","e l","u se","b l","sa id","o ver","ge t"]}} diff --git a/TTS/vc/modules/freevc/wavlm/config.json b/TTS/vc/modules/freevc/wavlm/config.json index c6f851b93d..c2e414cf0b 100644 --- a/TTS/vc/modules/freevc/wavlm/config.json +++ b/TTS/vc/modules/freevc/wavlm/config.json @@ -96,4 +96,4 @@ "transformers_version": "4.15.0.dev0", "use_weighted_layer_sum": false, "vocab_size": 32 - } \ No newline at end of file + } diff --git a/dockerfiles/Dockerfile.dev b/dockerfiles/Dockerfile.dev index 58baee53e2..32e5c6d72f 100644 --- a/dockerfiles/Dockerfile.dev +++ b/dockerfiles/Dockerfile.dev @@ -41,4 +41,3 @@ COPY . /root # Installing the TTS package itself: RUN make install - diff --git a/docs/requirements.txt b/docs/requirements.txt index efbefec44b..86ccae9cca 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -3,4 +3,4 @@ myst-parser == 2.0.0 sphinx == 7.2.5 sphinx_inline_tabs sphinx_copybutton -linkify-it-py \ No newline at end of file +linkify-it-py diff --git a/docs/source/conf.py b/docs/source/conf.py index 200a487f1c..2a9f62b3b0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -14,15 +14,15 @@ import os import sys -sys.path.insert(0, os.path.abspath('../..')) +sys.path.insert(0, os.path.abspath("../..")) # mock deps with system level requirements. autodoc_mock_imports = ["soundfile"] # -- Project information ----------------------------------------------------- -project = 'TTS' +project = "TTS" copyright = "2021 Coqui GmbH, 2020 TTS authors" -author = 'Coqui GmbH' +author = "Coqui GmbH" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -38,32 +38,34 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.intersphinx', - 'sphinx.ext.todo', - 'sphinx.ext.coverage', - 'sphinx.ext.napoleon', - 'sphinx.ext.viewcode', - 'sphinx.ext.autosectionlabel', - 'myst_parser', + "sphinx.ext.autodoc", + "sphinx.ext.autosummary", + "sphinx.ext.doctest", + "sphinx.ext.intersphinx", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.napoleon", + "sphinx.ext.viewcode", + "sphinx.ext.autosectionlabel", + "myst_parser", "sphinx_copybutton", "sphinx_inline_tabs", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'TODO/*'] +exclude_patterns = ["_build", "Thumbs.db", ".DS_Store", "TODO/*"] source_suffix = [".rst", ".md"] -myst_enable_extensions = ['linkify',] +myst_enable_extensions = [ + "linkify", +] # 'sphinxcontrib.katex', # 'sphinx.ext.autosectionlabel', @@ -74,17 +76,17 @@ # duplicated section names that are in different documents. autosectionlabel_prefix_document = True -language = 'en' +language = "en" autodoc_inherit_docstrings = False # Disable displaying type annotations, these can be very verbose -autodoc_typehints = 'none' +autodoc_typehints = "none" # Enable overriding of function signatures in the first line of the docstring. autodoc_docstring_signature = True -napoleon_custom_sections = [('Shapes', 'shape')] +napoleon_custom_sections = [("Shapes", "shape")] # -- Options for HTML output ------------------------------------------------- @@ -92,7 +94,7 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'furo' +html_theme = "furo" html_tite = "TTS" html_theme_options = { "light_logo": "logo.png", @@ -101,18 +103,18 @@ } html_sidebars = { - '**': [ - "sidebar/scroll-start.html", - "sidebar/brand.html", - "sidebar/search.html", - "sidebar/navigation.html", - "sidebar/ethical-ads.html", - "sidebar/scroll-end.html", - ] - } + "**": [ + "sidebar/scroll-start.html", + "sidebar/brand.html", + "sidebar/search.html", + "sidebar/navigation.html", + "sidebar/ethical-ads.html", + "sidebar/scroll-end.html", + ] +} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] diff --git a/docs/source/docker_images.md b/docs/source/docker_images.md index d08a55837d..8df5185505 100644 --- a/docs/source/docker_images.md +++ b/docs/source/docker_images.md @@ -53,4 +53,4 @@ python3 TTS/server/server.py --list_models #To get the list of available models python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda true ``` -Click [there](http://[::1]:5002/) and have fun with the server! \ No newline at end of file +Click [there](http://[::1]:5002/) and have fun with the server! diff --git a/docs/source/finetuning.md b/docs/source/finetuning.md index 069f565137..548e385ec7 100644 --- a/docs/source/finetuning.md +++ b/docs/source/finetuning.md @@ -111,4 +111,3 @@ them and fine-tune it for your own dataset. This will help you in two main ways: --coqpit.run_name "glow-tts-finetune" \ --coqpit.lr 0.00001 ``` - diff --git a/docs/source/main_classes/audio_processor.md b/docs/source/main_classes/audio_processor.md index 600b0db582..98e94a8789 100644 --- a/docs/source/main_classes/audio_processor.md +++ b/docs/source/main_classes/audio_processor.md @@ -22,4 +22,4 @@ also must inherit or initiate `BaseAudioConfig`. ```{eval-rst} .. autoclass:: TTS.config.shared_configs.BaseAudioConfig :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/dataset.md b/docs/source/main_classes/dataset.md index 92d381aca5..1566488194 100644 --- a/docs/source/main_classes/dataset.md +++ b/docs/source/main_classes/dataset.md @@ -22,4 +22,4 @@ ```{eval-rst} .. autoclass:: TTS.vocoder.datasets.wavernn_dataset.WaveRNNDataset :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/gan.md b/docs/source/main_classes/gan.md index 4524b4b5c5..e143f6431e 100644 --- a/docs/source/main_classes/gan.md +++ b/docs/source/main_classes/gan.md @@ -9,4 +9,4 @@ to do its ✨️. ```{eval-rst} .. autoclass:: TTS.vocoder.models.gan.GAN :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/model_api.md b/docs/source/main_classes/model_api.md index 0e6f2d9427..71b3d41640 100644 --- a/docs/source/main_classes/model_api.md +++ b/docs/source/main_classes/model_api.md @@ -21,4 +21,4 @@ Model API provides you a set of functions that easily make your model compatible ```{eval-rst} .. autoclass:: TTS.vocoder.models.base_vocoder.BaseVocoder :members: -``` \ No newline at end of file +``` diff --git a/docs/source/main_classes/speaker_manager.md b/docs/source/main_classes/speaker_manager.md index ba4b55dc78..fe98823956 100644 --- a/docs/source/main_classes/speaker_manager.md +++ b/docs/source/main_classes/speaker_manager.md @@ -8,4 +8,4 @@ especially useful for multi-speaker models. ```{eval-rst} .. automodule:: TTS.tts.utils.speakers :members: -``` \ No newline at end of file +``` diff --git a/docs/source/models/forward_tts.md b/docs/source/models/forward_tts.md index f8f941c2fd..d618e4e056 100644 --- a/docs/source/models/forward_tts.md +++ b/docs/source/models/forward_tts.md @@ -61,5 +61,3 @@ Currently we provide the following pre-configured architectures: .. autoclass:: TTS.tts.configs.fast_speech_config.FastSpeechConfig :members: ``` - - diff --git a/docs/source/models/overflow.md b/docs/source/models/overflow.md index 09e270eae5..042ad47474 100644 --- a/docs/source/models/overflow.md +++ b/docs/source/models/overflow.md @@ -33,4 +33,4 @@ are available at https://shivammehta25.github.io/OverFlow/. ```{eval-rst} .. autoclass:: TTS.tts.models.overflow.Overflow :members: -``` \ No newline at end of file +``` diff --git a/docs/source/models/tacotron1-2.md b/docs/source/models/tacotron1-2.md index 25721eba4c..f35cfeca4c 100644 --- a/docs/source/models/tacotron1-2.md +++ b/docs/source/models/tacotron1-2.md @@ -59,5 +59,3 @@ If you have a limited VRAM, then you can try using the Guided Attention Loss or .. autoclass:: TTS.tts.configs.tacotron2_config.Tacotron2Config :members: ``` - - diff --git a/docs/source/what_makes_a_good_dataset.md b/docs/source/what_makes_a_good_dataset.md index 18c87453f7..44a93a39da 100644 --- a/docs/source/what_makes_a_good_dataset.md +++ b/docs/source/what_makes_a_good_dataset.md @@ -17,4 +17,4 @@ If you like to use a bespoken dataset, you might like to perform a couple of qua * **CheckSpectrograms** is to measure the noise level of the clips and find good audio processing parameters. The noise level might be observed by checking spectrograms. If spectrograms look cluttered, especially in silent parts, this dataset might not be a good candidate for a TTS project. If your voice clips are too noisy in the background, it makes things harder for your model to learn the alignment, and the final result might be different than the voice you are given. If the spectrograms look good, then the next step is to find a good set of audio processing parameters, defined in ```config.json```. In the notebook, you can compare different sets of parameters and see the resynthesis results in relation to the given ground-truth. Find the best parameters that give the best possible synthesis performance. -Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050. \ No newline at end of file +Another practical detail is the quantization level of the clips. If your dataset has a very high bit-rate, that might cause slow data-load time and consequently slow training. It is better to reduce the sample-rate of your dataset to around 16000-22050. diff --git a/hubconf.py b/hubconf.py index 0c9c5930fc..6e10928265 100644 --- a/hubconf.py +++ b/hubconf.py @@ -1,15 +1,11 @@ -dependencies = [ - 'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite' -] +dependencies = ["torch", "gdown", "pysbd", "gruut", "anyascii", "pypinyin", "coqpit", "mecab-python3", "unidic-lite"] import torch from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer -def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', - vocoder_name=None, - use_cuda=False): +def tts(model_name="tts_models/en/ljspeech/tacotron2-DCA", vocoder_name=None, use_cuda=False): """TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text. Example: @@ -28,19 +24,20 @@ def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA', manager = ModelManager() model_path, config_path, model_item = manager.download_model(model_name) - vocoder_name = model_item[ - 'default_vocoder'] if vocoder_name is None else vocoder_name + vocoder_name = model_item["default_vocoder"] if vocoder_name is None else vocoder_name vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name) # create synthesizer - synt = Synthesizer(tts_checkpoint=model_path, - tts_config_path=config_path, - vocoder_checkpoint=vocoder_path, - vocoder_config=vocoder_config_path, - use_cuda=use_cuda) + synt = Synthesizer( + tts_checkpoint=model_path, + tts_config_path=config_path, + vocoder_checkpoint=vocoder_path, + vocoder_config=vocoder_config_path, + use_cuda=use_cuda, + ) return synt -if __name__ == '__main__': - synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github') +if __name__ == "__main__": + synthesizer = torch.hub.load("coqui-ai/TTS:dev", "tts", source="github") synthesizer.tts("This is a test!") diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb index 65edf98ca4..d85ca1035a 100644 --- a/notebooks/TestAttention.ipynb +++ b/notebooks/TestAttention.ipynb @@ -185,4 +185,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/notebooks/dataset_analysis/CheckPitch.ipynb b/notebooks/dataset_analysis/CheckPitch.ipynb index 72afbc64a1..ebdac87378 100644 --- a/notebooks/dataset_analysis/CheckPitch.ipynb +++ b/notebooks/dataset_analysis/CheckPitch.ipynb @@ -176,4 +176,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +} diff --git a/notebooks/dataset_analysis/README.md b/notebooks/dataset_analysis/README.md index 79faf52159..9fe40d01a4 100644 --- a/notebooks/dataset_analysis/README.md +++ b/notebooks/dataset_analysis/README.md @@ -2,6 +2,6 @@ By the use of this notebook, you can easily analyze a brand new dataset, find exceptional cases and define your training set. -What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. +What we are looking in here is reasonable distribution of instances in terms of sequence-length, audio-length and word-coverage. This notebook is inspired from https://github.com/MycroftAI/mimic2 diff --git a/recipes/README.md b/recipes/README.md index 21a6727d8b..fcc4719aaa 100644 --- a/recipes/README.md +++ b/recipes/README.md @@ -19,4 +19,4 @@ python TTS/bin/resample.py --input_dir recipes/vctk/VCTK/wav48_silence_trimmed - If you train a new model using TTS, feel free to share your training to expand the list of recipes. -You can also open a new discussion and share your progress with the 🐸 community. \ No newline at end of file +You can also open a new discussion and share your progress with the 🐸 community. diff --git a/recipes/blizzard2013/README.md b/recipes/blizzard2013/README.md index 9dcb739728..75f17a5513 100644 --- a/recipes/blizzard2013/README.md +++ b/recipes/blizzard2013/README.md @@ -9,4 +9,4 @@ To get a license and download link for this dataset, you need to visit the [webs You get access to the raw dataset in a couple of days. There are a few preprocessing steps you need to do to be able to use the high fidelity dataset. 1. Get the forced time alignments for the blizzard dataset from [here](https://github.com/mueller91/tts_alignments). -2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation). \ No newline at end of file +2. Segment the high fidelity audio-book files based on the instructions [here](https://github.com/Tomiinek/Blizzard2013_Segmentation). diff --git a/recipes/kokoro/tacotron2-DDC/run.sh b/recipes/kokoro/tacotron2-DDC/run.sh index 69800cf7b4..3f18f2c3fb 100644 --- a/recipes/kokoro/tacotron2-DDC/run.sh +++ b/recipes/kokoro/tacotron2-DDC/run.sh @@ -20,4 +20,4 @@ CUDA_VISIBLE_DEVICES="0" python TTS/bin/train_tts.py --config_path $RUN_DIR/taco --coqpit.output_path $RUN_DIR \ --coqpit.datasets.0.path $RUN_DIR/$CORPUS \ --coqpit.audio.stats_path $RUN_DIR/scale_stats.npy \ - --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \ \ No newline at end of file + --coqpit.phoneme_cache_path $RUN_DIR/phoneme_cache \ diff --git a/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json b/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json index c2e526f46c..f422203a31 100644 --- a/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json +++ b/recipes/kokoro/tacotron2-DDC/tacotron2-DDC.json @@ -122,4 +122,4 @@ "use_gst": false, "use_external_speaker_embedding_file": false, "external_speaker_embedding_file": "../../speakers-vctk-en.json" -} \ No newline at end of file +} diff --git a/recipes/ljspeech/download_ljspeech.sh b/recipes/ljspeech/download_ljspeech.sh index 9468988a99..21c3e0e2d7 100644 --- a/recipes/ljspeech/download_ljspeech.sh +++ b/recipes/ljspeech/download_ljspeech.sh @@ -11,4 +11,4 @@ shuf LJSpeech-1.1/metadata.csv > LJSpeech-1.1/metadata_shuf.csv head -n 12000 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_train.csv tail -n 1100 LJSpeech-1.1/metadata_shuf.csv > LJSpeech-1.1/metadata_val.csv mv LJSpeech-1.1 $RUN_DIR/recipes/ljspeech/ -rm LJSpeech-1.1.tar.bz2 \ No newline at end of file +rm LJSpeech-1.1.tar.bz2 diff --git a/tests/bash_tests/test_compute_statistics.sh b/tests/bash_tests/test_compute_statistics.sh index d7f0ab9d4c..721777f852 100755 --- a/tests/bash_tests/test_compute_statistics.sh +++ b/tests/bash_tests/test_compute_statistics.sh @@ -4,4 +4,3 @@ BASEDIR=$(dirname "$0") echo "$BASEDIR" # run training CUDA_VISIBLE_DEVICES="" python TTS/bin/compute_statistics.py --config_path $BASEDIR/../inputs/test_glow_tts.json --out_path $BASEDIR/../outputs/scale_stats.npy - diff --git a/tests/data/dummy_speakers.json b/tests/data/dummy_speakers.json index 233533b796..507b57b5a5 100644 --- a/tests/data/dummy_speakers.json +++ b/tests/data/dummy_speakers.json @@ -100222,5 +100222,5 @@ 0.04999300092458725, -0.12125937640666962 ] - } + } } diff --git a/tests/data/ljspeech/metadata_flac.csv b/tests/data/ljspeech/metadata_flac.csv index 43db05ac91..fbde71d07d 100644 --- a/tests/data/ljspeech/metadata_flac.csv +++ b/tests/data/ljspeech/metadata_flac.csv @@ -6,4 +6,4 @@ wavs/LJ001-0004.flac|produced the block books, which were the immediate predeces wavs/LJ001-0005.flac|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2 wavs/LJ001-0006.flac|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2 wavs/LJ001-0007.flac|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3 -wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3 \ No newline at end of file +wavs/LJ001-0008.flac|has never been surpassed.|has never been surpassed.|ljspeech-3 diff --git a/tests/data/ljspeech/metadata_mp3.csv b/tests/data/ljspeech/metadata_mp3.csv index 109e48b40a..a8c5ec2e76 100644 --- a/tests/data/ljspeech/metadata_mp3.csv +++ b/tests/data/ljspeech/metadata_mp3.csv @@ -6,4 +6,4 @@ wavs/LJ001-0004.mp3|produced the block books, which were the immediate predecess wavs/LJ001-0005.mp3|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2 wavs/LJ001-0006.mp3|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2 wavs/LJ001-0007.mp3|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3 -wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3 \ No newline at end of file +wavs/LJ001-0008.mp3|has never been surpassed.|has never been surpassed.|ljspeech-3 diff --git a/tests/data/ljspeech/metadata_wav.csv b/tests/data/ljspeech/metadata_wav.csv index aff73f6d40..1af6652e6a 100644 --- a/tests/data/ljspeech/metadata_wav.csv +++ b/tests/data/ljspeech/metadata_wav.csv @@ -6,4 +6,4 @@ wavs/LJ001-0004.wav|produced the block books, which were the immediate predecess wavs/LJ001-0005.wav|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|the invention of movable metal letters in the middle of the fifteenth century may justly be considered as the invention of the art of printing.|ljspeech-2 wavs/LJ001-0006.wav|And it is worth mention in passing that, as an example of fine typography,|And it is worth mention in passing that, as an example of fine typography,|ljspeech-2 wavs/LJ001-0007.wav|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about 1455,|the earliest book printed with movable types, the Gutenberg, or "forty-two line Bible" of about fourteen fifty-five,|ljspeech-3 -wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3 \ No newline at end of file +wavs/LJ001-0008.wav|has never been surpassed.|has never been surpassed.|ljspeech-3 diff --git a/tests/inputs/common_voice.tsv b/tests/inputs/common_voice.tsv index 39fc4190ac..b4351d6739 100644 --- a/tests/inputs/common_voice.tsv +++ b/tests/inputs/common_voice.tsv @@ -1,6 +1,6 @@ client_id path sentence up_votes down_votes age gender accent locale segment -95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en -95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en -95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en -954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en -954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005954.mp3 The applicants are invited for coffee and visa is given immediately. 3 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005955.mp3 Developmental robotics is related to, but differs from, evolutionary robotics. 2 0 en +95324d489b122a800b840e0b0d068f7363a1a6c2cd2e7365672cc7033e38deaa794bd59edcf8196aa35c9791652b9085ac3839a98bb50ebab4a1e8538a94846b common_voice_en_20005956.mp3 The musical was originally directed and choreographed by Alan Lund. 2 0 en +954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737073.mp3 He graduated from Columbia High School, in Brown County, South Dakota. 2 0 en +954a4181ae9fba89d1b1570f2ae148b3ee18ee2311de978e698f598db859f830d93d35574596d713518e8c96cdae01fce7a08c60c2e0a22bcf01e020924440a6 common_voice_en_19737074.mp3 Competition for limited resources has also resulted in some local conflicts. 2 0 en diff --git a/tests/inputs/dummy_model_config.json b/tests/inputs/dummy_model_config.json index b51bb3a871..3f64c7f3df 100644 --- a/tests/inputs/dummy_model_config.json +++ b/tests/inputs/dummy_model_config.json @@ -98,5 +98,3 @@ "gst_style_tokens": 10 } } - - diff --git a/tests/inputs/language_ids.json b/tests/inputs/language_ids.json index 27bb15206f..80833d8058 100644 --- a/tests/inputs/language_ids.json +++ b/tests/inputs/language_ids.json @@ -2,4 +2,4 @@ "en": 0, "fr-fr": 1, "pt-br": 2 -} \ No newline at end of file +} diff --git a/tests/inputs/test_align_tts.json b/tests/inputs/test_align_tts.json index 3f928c7e92..80721346d5 100644 --- a/tests/inputs/test_align_tts.json +++ b/tests/inputs/test_align_tts.json @@ -155,4 +155,4 @@ "meta_file_attn_mask": null } ] -} \ No newline at end of file +} diff --git a/tests/inputs/test_speaker_encoder_config.json b/tests/inputs/test_speaker_encoder_config.json index bfcc17ab0e..ae125f1327 100644 --- a/tests/inputs/test_speaker_encoder_config.json +++ b/tests/inputs/test_speaker_encoder_config.json @@ -58,4 +58,4 @@ "storage_size": 15 // the size of the in-memory storage with respect to a single batch }, "datasets":null -} \ No newline at end of file +} diff --git a/tests/inputs/test_speedy_speech.json b/tests/inputs/test_speedy_speech.json index 4a7eea5ded..93e4790ca3 100644 --- a/tests/inputs/test_speedy_speech.json +++ b/tests/inputs/test_speedy_speech.json @@ -152,4 +152,4 @@ "meta_file_attn_mask": "tests/data/ljspeech/metadata_attn_mask.txt" } ] -} \ No newline at end of file +} diff --git a/tests/inputs/test_vocoder_audio_config.json b/tests/inputs/test_vocoder_audio_config.json index 08acc48cd3..cdf347c4eb 100644 --- a/tests/inputs/test_vocoder_audio_config.json +++ b/tests/inputs/test_vocoder_audio_config.json @@ -21,4 +21,3 @@ "do_trim_silence": false } } - diff --git a/tests/inputs/test_vocoder_multiband_melgan_config.json b/tests/inputs/test_vocoder_multiband_melgan_config.json index 82afc97727..2b6cc9e4cd 100644 --- a/tests/inputs/test_vocoder_multiband_melgan_config.json +++ b/tests/inputs/test_vocoder_multiband_melgan_config.json @@ -163,4 +163,3 @@ // PATHS "output_path": "tests/train_outputs/" } - diff --git a/tests/inputs/test_vocoder_wavegrad.json b/tests/inputs/test_vocoder_wavegrad.json index 6378c07a6d..bb06bf2448 100644 --- a/tests/inputs/test_vocoder_wavegrad.json +++ b/tests/inputs/test_vocoder_wavegrad.json @@ -113,4 +113,3 @@ // PATHS "output_path": "tests/train_outputs/" } - diff --git a/tests/inputs/test_vocoder_wavernn_config.json b/tests/inputs/test_vocoder_wavernn_config.json index ee4e5f8e42..1dd8a229f2 100644 --- a/tests/inputs/test_vocoder_wavernn_config.json +++ b/tests/inputs/test_vocoder_wavernn_config.json @@ -109,4 +109,3 @@ // PATHS "output_path": "tests/train_outputs/" } - diff --git a/tests/inputs/xtts_vocab.json b/tests/inputs/xtts_vocab.json index a3c6dcec77..e25b4e4863 100644 --- a/tests/inputs/xtts_vocab.json +++ b/tests/inputs/xtts_vocab.json @@ -12666,4 +12666,4 @@ "da kara" ] } -} \ No newline at end of file +} From 0504ae3a0289366fa9eb95e88238ee1344c7715f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 19:53:06 +0200 Subject: [PATCH 084/255] ci: add script to automatically generate requirements.dev.txt Having this file is still useful to allow installing *only* dev requirements (e.g. in CI) with: pip install -r requirements.dev.txt Generate that file automatically from the pyproject.toml based on: https://github.com/numpy/numpydoc/blob/e7c6baf00f5f73a4a8f8318d0cb4e04949c9a5d1/tools/generate_requirements.py --- .pre-commit-config.yaml | 7 ++++++ Makefile | 3 ++- pyproject.toml | 2 ++ requirements.dev.txt | 4 ++++ scripts/generate_requirements.py | 39 ++++++++++++++++++++++++++++++++ 5 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 scripts/generate_requirements.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 21e4898734..f96f6f38ac 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,3 +15,10 @@ repos: hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] + - repo: local + hooks: + - id: generate_requirements.py + name: generate_requirements.py + language: system + entry: python scripts/generate_requirements.py + files: "pyproject.toml|requirements.*\\.txt|tools/generate_requirements.py" diff --git a/Makefile b/Makefile index 4379b5567b..e3372d3071 100644 --- a/Makefile +++ b/Makefile @@ -66,7 +66,8 @@ build-docs: ## build the docs cd docs && make clean && make build install: ## install 🐸 TTS for development. - pip install -e .[all] + pip install -e .[all,dev] + pre-commit install docs: ## build the docs $(MAKE) -C docs clean && $(MAKE) -C docs html diff --git a/pyproject.toml b/pyproject.toml index ef8554f801..ed64714e7b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -94,7 +94,9 @@ dev = [ "black==24.2.0", "coverage[toml]", "nose2", + "pre-commit", "ruff==0.3.0", + "tomli; python_version < '3.11'", ] # Dependencies for building the documentation docs = [ diff --git a/requirements.dev.txt b/requirements.dev.txt index 7f76b2400a..0095dae3c2 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,4 +1,8 @@ +# Generated via scripts/generate_requirements.py and pre-commit hook. +# Do not edit this file; modify pyproject.toml instead. black==24.2.0 coverage[toml] nose2 +pre-commit ruff==0.3.0 +tomli; python_version < '3.11' diff --git a/scripts/generate_requirements.py b/scripts/generate_requirements.py new file mode 100644 index 0000000000..bbd32bafd2 --- /dev/null +++ b/scripts/generate_requirements.py @@ -0,0 +1,39 @@ +#!/usr/bin/env python +"""Generate requirements/*.txt files from pyproject.toml. + +Adapted from: +https://github.com/numpy/numpydoc/blob/e7c6baf00f5f73a4a8f8318d0cb4e04949c9a5d1/tools/generate_requirements.py +""" + +import sys +from pathlib import Path + +try: # standard module since Python 3.11 + import tomllib as toml +except ImportError: + try: # available for older Python via pip + import tomli as toml + except ImportError: + sys.exit("Please install `tomli` first: `pip install tomli`") + +script_pth = Path(__file__) +repo_dir = script_pth.parent.parent +script_relpth = script_pth.relative_to(repo_dir) +header = [ + f"# Generated via {script_relpth.as_posix()} and pre-commit hook.", + "# Do not edit this file; modify pyproject.toml instead.", +] + + +def generate_requirement_file(name: str, req_list: list[str]) -> None: + req_fname = repo_dir / f"requirements.{name}.txt" + req_fname.write_text("\n".join(header + req_list) + "\n") + + +def main() -> None: + pyproject = toml.loads((repo_dir / "pyproject.toml").read_text()) + generate_requirement_file("dev", pyproject["project"]["optional-dependencies"]["dev"]) + + +if __name__ == "__main__": + main() From 129b488614230d5c37cee700a6d65aaf013763f2 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 7 May 2024 23:03:18 +0200 Subject: [PATCH 085/255] build: update pip and setuptools in dockerfile Otherwise a form of this issue occurs due to older setuptools not supporting pyproject.toml: https://github.com/pypa/setuptools/issues/3269 Might be resolved on Ubuntu 24.04 images. --- Dockerfile | 1 + dockerfiles/Dockerfile.dev | 1 + 2 files changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 9fb3005ef4..05c37d78fa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,6 +3,7 @@ FROM ${BASE} RUN apt-get update && apt-get upgrade -y RUN apt-get install -y --no-install-recommends gcc g++ make python3 python3-dev python3-pip python3-venv python3-wheel espeak-ng libsndfile1-dev && rm -rf /var/lib/apt/lists/* +RUN pip3 install -U pip setuptools RUN pip3 install llvmlite --ignore-installed # Install Dependencies: diff --git a/dockerfiles/Dockerfile.dev b/dockerfiles/Dockerfile.dev index 32e5c6d72f..8b77a9a223 100644 --- a/dockerfiles/Dockerfile.dev +++ b/dockerfiles/Dockerfile.dev @@ -11,6 +11,7 @@ RUN apt-get install -y --no-install-recommends \ && rm -rf /var/lib/apt/lists/* # Install Major Python Dependencies: +RUN pip3 install -U pip setuptools RUN pip3 install llvmlite --ignore-installed RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 RUN rm -rf /root/.cache/pip From e3fed5cf7097d50a2db8041fff27ce0badddc61e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 8 May 2024 09:51:01 +0200 Subject: [PATCH 086/255] build: create separate makefile target for development install --- CONTRIBUTING.md | 2 +- Makefile | 7 +++++-- dockerfiles/Dockerfile.dev | 2 +- docs/source/installation.md | 5 +++++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d8e8fc61bb..e93858f27d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -60,7 +60,7 @@ The following steps are tested on an Ubuntu system. ```bash $ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. - $ make install + $ make install_dev ``` 4. Create a new branch with an informative name for your goal. diff --git a/Makefile b/Makefile index e3372d3071..077b4b23e5 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: test system-deps dev-deps style lint install help docs +.PHONY: test system-deps dev-deps style lint install install_dev help docs help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' @@ -65,7 +65,10 @@ dev-deps: ## install development deps build-docs: ## build the docs cd docs && make clean && make build -install: ## install 🐸 TTS for development. +install: ## install 🐸 TTS + pip install -e .[all] + +install_dev: ## install 🐸 TTS for development. pip install -e .[all,dev] pre-commit install diff --git a/dockerfiles/Dockerfile.dev b/dockerfiles/Dockerfile.dev index 8b77a9a223..12242d1251 100644 --- a/dockerfiles/Dockerfile.dev +++ b/dockerfiles/Dockerfile.dev @@ -41,4 +41,4 @@ RUN pip install \ COPY . /root # Installing the TTS package itself: -RUN make install +RUN make install_dev diff --git a/docs/source/installation.md b/docs/source/installation.md index f0b2a00f19..405c436643 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -26,7 +26,12 @@ This is recommended for development and more control over 🐸TTS. git clone https://github.com/idiap/coqui-ai-TTS cd coqui-ai-TTS make system-deps # only on Linux systems. + +# Install package and optional extras make install + +# Same as above + dev dependencies and pre-commit +make install_dev ``` ## On Windows From 20e82bc92de97dff7fc1610c1c3e01e3773659db Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 8 May 2024 12:10:29 +0200 Subject: [PATCH 087/255] build: update development dockerfile and test it in ci --- .github/workflows/docker.yaml | 55 +++++++++++++++++++++++++++++++++++ dockerfiles/Dockerfile.dev | 25 ++-------------- 2 files changed, 57 insertions(+), 23 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 3e526e60a8..249816a320 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -63,3 +63,58 @@ jobs: push: ${{ github.event_name == 'push' }} build-args: "BASE=${{ matrix.base }}" tags: ${{ steps.compute-tag.outputs.tags }} + docker-dev-build: + name: "Build the development Docker image" + runs-on: ubuntu-latest + strategy: + matrix: + arch: ["amd64"] + base: + - "nvidia/cuda:11.8.0-base-ubuntu22.04" # GPU enabled + steps: + - uses: actions/checkout@v4 + - name: Log in to the Container registry + uses: docker/login-action@v1 + with: + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + - name: Compute Docker tags, check VERSION file matches tag + id: compute-tag + run: | + set -ex + base="ghcr.io/idiap/coqui-tts-dev" + tags="" # PR build + + if [[ ${{ matrix.base }} = "python:3.10.8-slim" ]]; then + base="ghcr.io/idiap/coqui-tts-dev-cpu" + fi + + if [[ "${{ startsWith(github.ref, 'refs/heads/') }}" = "true" ]]; then + # Push to branch + github_ref="${{ github.ref }}" + branch=${github_ref#*refs/heads/} # strip prefix to get branch name + tags="${base}:${branch},${base}:${{ github.sha }}," + elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" = "true" ]]; then + VERSION="v$(grep -m 1 version pyproject.toml | grep -P '\d+\.\d+\.\d+' -o)" + if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then + echo "Pushed tag does not match VERSION file. Aborting push." + exit 1 + fi + tags="${base}:${VERSION},${base}:latest,${base}:${{ github.sha }}" + fi + echo "::set-output name=tags::${tags}" + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v1 + - name: Build and push + uses: docker/build-push-action@v2 + with: + context: . + file: dockerfiles/Dockerfile.dev + platforms: linux/${{ matrix.arch }} + push: false + build-args: "BASE=${{ matrix.base }}" + tags: ${{ steps.compute-tag.outputs.tags }} diff --git a/dockerfiles/Dockerfile.dev b/dockerfiles/Dockerfile.dev index 12242d1251..af0d3fc0cd 100644 --- a/dockerfiles/Dockerfile.dev +++ b/dockerfiles/Dockerfile.dev @@ -16,29 +16,8 @@ RUN pip3 install llvmlite --ignore-installed RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118 RUN rm -rf /root/.cache/pip -WORKDIR /root - -# Copy Dependency Lock Files: -COPY \ - Makefile \ - pyproject.toml \ - setup.py \ - requirements.dev.txt \ - requirements.ja.txt \ - requirements.notebooks.txt \ - requirements.txt \ - /root/ - -# Install Project Dependencies -# Separate stage to limit re-downloading: -RUN pip install \ - -r requirements.txt \ - -r requirements.dev.txt \ - -r requirements.ja.txt \ - -r requirements.notebooks.txt - # Copy TTS repository contents: +WORKDIR /root COPY . /root -# Installing the TTS package itself: -RUN make install_dev +RUN make install From 4f2eff418ec0ccda9a2c590ad836ebd67119c8d7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 6 May 2024 18:37:15 +0200 Subject: [PATCH 088/255] chore: enable ruff rules that already pass --- pyproject.toml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index ed64714e7b..b2c0a31721 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -162,14 +162,9 @@ lint.extend-select = [ ] lint.ignore = [ - "E501", # line too long "E722", # bare except (TODO: fix these) "E731", # don't use lambdas "E741", # ambiguous variable name - "PLR0912", # too-many-branches - "PLR0913", # too-many-arguments - "PLR0915", # too-many-statements - "UP004", # useless-object-inheritance "F821", # TODO: enable "F841", # TODO: enable "PLW0602", # TODO: enable From ea893c37957ab6399a89225d49de175b96507ac6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 8 May 2024 12:39:09 +0200 Subject: [PATCH 089/255] fix: make bangla g2p deps optional --- .github/workflows/tests.yml | 2 +- README.md | 26 +++++++++++++++++++++- TTS/tts/utils/text/bangla/phonemizer.py | 9 +++++--- TTS/tts/utils/text/phonemizers/__init__.py | 18 ++++++++++----- pyproject.toml | 16 ++++++++----- 5 files changed, 55 insertions(+), 16 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index ec4503e21b..43815f2ef0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,7 +45,7 @@ jobs: sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m uv pip install --system "coqui-tts[dev,server,ja] @ ." + python3 -m uv pip install --system "coqui-tts[dev,server,languages] @ ." python3 setup.py egg_info - name: Unit tests run: make ${{ matrix.subset }} diff --git a/README.md b/README.md index 901da37ea5..d692423e56 100644 --- a/README.md +++ b/README.md @@ -143,9 +143,33 @@ If you plan to code or train models, clone 🐸TTS and install it locally. ```bash git clone https://github.com/idiap/coqui-ai-TTS -pip install -e .[all,dev,notebooks,server] # Select the relevant extras +pip install -e . ``` +### Optional dependencies + +The following extras allow the installation of optional dependencies: + +| Name | Description | +|------|-------------| +| `all` | All optional dependencies, except `dev` and `docs` | +| `dev` | Development dependencies | +| `dev` | Dependencies for building the documentation | +| `notebooks` | Dependencies only used in notebooks | +| `server` | Dependencies to run the TTS server | +| `bn` | Bangla G2P | +| `ja` | Japanese G2P | +| `languages` | All language-specific dependencies | + +You can install them with one of the following commands: + +```bash +pip install coqui-tts[server,ja] +pip install -e .[server,ja] +``` + +### Platforms + If you are on Ubuntu (Debian), you can also run following commands for installation. ```bash diff --git a/TTS/tts/utils/text/bangla/phonemizer.py b/TTS/tts/utils/text/bangla/phonemizer.py index e15830fe8a..cddcb00fd5 100644 --- a/TTS/tts/utils/text/bangla/phonemizer.py +++ b/TTS/tts/utils/text/bangla/phonemizer.py @@ -1,8 +1,11 @@ import re -import bangla -from bnnumerizer import numerize -from bnunicodenormalizer import Normalizer +try: + import bangla + from bnnumerizer import numerize + from bnunicodenormalizer import Normalizer +except ImportError as e: + raise ImportError("Bangla requires: bangla, bnnumerizer, bnunicodenormalizer") from e # initialize bnorm = Normalizer() diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 446f288302..0f32197e6f 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -1,4 +1,3 @@ -from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer from TTS.tts.utils.text.phonemizers.base import BasePhonemizer from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak @@ -6,12 +5,17 @@ from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer +try: + from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer +except ImportError: + BN_Phonemizer = None + try: from TTS.tts.utils.text.phonemizers.ja_jp_phonemizer import JA_JP_Phonemizer except ImportError: JA_JP_Phonemizer = None -PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer, BN_Phonemizer)} +PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer)} ESPEAK_LANGS = list(ESpeak.supported_languages().keys()) @@ -34,12 +38,12 @@ DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() -DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name() -# JA phonemizer has deal breaking dependencies like MeCab for some systems. -# So we only have it when we have it. +if BN_Phonemizer is not None: + PHONEMIZERS[BN_Phonemizer.name()] = BN_Phonemizer + DEF_LANG_TO_PHONEMIZER["bn"] = BN_Phonemizer.name() if JA_JP_Phonemizer is not None: PHONEMIZERS[JA_JP_Phonemizer.name()] = JA_JP_Phonemizer DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() @@ -63,11 +67,13 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: return ZH_CN_Phonemizer(**kwargs) if name == "ja_jp_phonemizer": if JA_JP_Phonemizer is None: - raise ValueError(" ❗ You need to install JA phonemizer dependencies. Try `pip install coqui-tts[ja]`.") + raise ValueError("You need to install JA phonemizer dependencies. Try `pip install coqui-tts[ja]`.") return JA_JP_Phonemizer(**kwargs) if name == "ko_kr_phonemizer": return KO_KR_Phonemizer(**kwargs) if name == "bn_phonemizer": + if BN_Phonemizer is None: + raise ValueError("You need to install BN phonemizer dependencies. Try `pip install coqui-tts[bn]`.") return BN_Phonemizer(**kwargs) if name == "be_phonemizer": return BEL_Phonemizer(**kwargs) diff --git a/pyproject.toml b/pyproject.toml index b2c0a31721..79511c87b5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,10 +74,6 @@ dependencies = [ "g2pkk>=0.1.1", # Gruut + supported languages "gruut[de,es,fr]==2.2.3", - # Bangla - "bangla", - "bnnumerizer", - "bnunicodenormalizer", # Tortoise "einops>=0.6.0", "transformers>=4.33.0", @@ -115,15 +111,25 @@ notebooks = [ # For running the TTS server server = ["flask>=2.0.1"] # Language-specific dependencies, mainly for G2P +# Bangla +bn = [ + "bangla", + "bnnumerizer", + "bnunicodenormalizer", +] # Japanese ja = [ "mecab-python3", "unidic-lite==1.0.8", "cutlet", ] +# All language-specific dependencies +languages = [ + "coqui-tts[bn,ja]", +] # Installs all extras (except dev and docs) all = [ - "coqui-tts[notebooks,server,ja]", + "coqui-tts[notebooks,server,bn,ja]", ] [project.urls] From 55ed162f2a22e3bf2b390f361a3a8697985404bc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 8 May 2024 14:03:05 +0200 Subject: [PATCH 090/255] fix: make chinese g2p deps optional --- README.md | 3 ++- TTS/tts/layers/xtts/tokenizer.py | 5 ++++- TTS/tts/utils/text/chinese_mandarin/phonemizer.py | 7 +++++-- TTS/tts/utils/text/phonemizers/__init__.py | 12 ++++++++++-- pyproject.toml | 12 +++++++----- 5 files changed, 28 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index d692423e56..0630fd2bc4 100644 --- a/README.md +++ b/README.md @@ -159,9 +159,10 @@ The following extras allow the installation of optional dependencies: | `server` | Dependencies to run the TTS server | | `bn` | Bangla G2P | | `ja` | Japanese G2P | +| `zh` | Chinese G2P | | `languages` | All language-specific dependencies | -You can install them with one of the following commands: +You can install extras with one of the following commands: ```bash pip install coqui-tts[server,ja] diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 1b0a488b04..b1ea5b0d26 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -4,7 +4,6 @@ import textwrap from functools import cached_property -import pypinyin import torch from hangul_romanize import Transliter from hangul_romanize.rule import academic @@ -577,6 +576,10 @@ def basic_cleaners(text): def chinese_transliterate(text): + try: + import pypinyin + except ImportError as e: + raise ImportError("Chinese requires: pypinyin") from e return "".join( [p[0] for p in pypinyin.pinyin(text, style=pypinyin.Style.TONE3, heteronym=False, neutral_tone_with_five=True)] ) diff --git a/TTS/tts/utils/text/chinese_mandarin/phonemizer.py b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py index 727c881e10..e9d62e9d06 100644 --- a/TTS/tts/utils/text/chinese_mandarin/phonemizer.py +++ b/TTS/tts/utils/text/chinese_mandarin/phonemizer.py @@ -1,7 +1,10 @@ from typing import List -import jieba -import pypinyin +try: + import jieba + import pypinyin +except ImportError as e: + raise ImportError("Chinese requires: jieba, pypinyin") from e from .pinyinToPhonemes import PINYIN_DICT diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 0f32197e6f..0f76690612 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -3,7 +3,6 @@ from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer -from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer try: from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer @@ -15,6 +14,11 @@ except ImportError: JA_JP_Phonemizer = None +try: + from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer +except ImportError: + ZH_CN_Phonemizer = None + PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer)} @@ -36,7 +40,6 @@ # Force default for some languages DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] -DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name() @@ -47,6 +50,9 @@ if JA_JP_Phonemizer is not None: PHONEMIZERS[JA_JP_Phonemizer.name()] = JA_JP_Phonemizer DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() +if ZH_CN_Phonemizer is not None: + PHONEMIZERS[ZH_CN_Phonemizer.name()] = ZH_CN_Phonemizer + DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: @@ -64,6 +70,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: if name == "gruut": return Gruut(**kwargs) if name == "zh_cn_phonemizer": + if ZH_CN_Phonemizer is None: + raise ValueError("You need to install ZH phonemizer dependencies. Try `pip install coqui-tts[zh]`.") return ZH_CN_Phonemizer(**kwargs) if name == "ja_jp_phonemizer": if JA_JP_Phonemizer is None: diff --git a/pyproject.toml b/pyproject.toml index 79511c87b5..28af0eb254 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,9 +65,6 @@ dependencies = [ # Coqui stack "coqui-tts-trainer>=0.1", "coqpit>=0.0.16", - # Chinese - "jieba", - "pypinyin", # Korean "hangul_romanize", "jamo", @@ -123,13 +120,18 @@ ja = [ "unidic-lite==1.0.8", "cutlet", ] +# Chinese +zh = [ + "jieba", + "pypinyin", +] # All language-specific dependencies languages = [ - "coqui-tts[bn,ja]", + "coqui-tts[bn,ja,zh]", ] # Installs all extras (except dev and docs) all = [ - "coqui-tts[notebooks,server,bn,ja]", + "coqui-tts[notebooks,server,bn,ja,zh]", ] [project.urls] From 865a48156dd97c2e71c0ceb3081aa505e8e8f0ae Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 8 May 2024 14:43:00 +0200 Subject: [PATCH 091/255] fix: make korean g2p deps optional --- README.md | 1 + TTS/tts/layers/xtts/tokenizer.py | 7 +++++-- TTS/tts/utils/text/korean/phonemizer.py | 5 ++++- TTS/tts/utils/text/phonemizers/__init__.py | 14 +++++++++++--- pyproject.toml | 14 ++++++++------ 5 files changed, 29 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 0630fd2bc4..1033d16be4 100644 --- a/README.md +++ b/README.md @@ -159,6 +159,7 @@ The following extras allow the installation of optional dependencies: | `server` | Dependencies to run the TTS server | | `bn` | Bangla G2P | | `ja` | Japanese G2P | +| `ko` | Korean G2P | | `zh` | Chinese G2P | | `languages` | All language-specific dependencies | diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index b1ea5b0d26..5e701c085f 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -5,8 +5,6 @@ from functools import cached_property import torch -from hangul_romanize import Transliter -from hangul_romanize.rule import academic from num2words import num2words from spacy.lang.ar import Arabic from spacy.lang.en import English @@ -592,6 +590,11 @@ def japanese_cleaners(text, katsu): def korean_transliterate(text): + try: + from hangul_romanize import Transliter + from hangul_romanize.rule import academic + except ImportError as e: + raise ImportError("Korean requires: hangul_romanize") from e r = Transliter(academic) return r.translit(text) diff --git a/TTS/tts/utils/text/korean/phonemizer.py b/TTS/tts/utils/text/korean/phonemizer.py index 2c69217c40..dde039b0f5 100644 --- a/TTS/tts/utils/text/korean/phonemizer.py +++ b/TTS/tts/utils/text/korean/phonemizer.py @@ -1,4 +1,7 @@ -from jamo import hangul_to_jamo +try: + from jamo import hangul_to_jamo +except ImportError as e: + raise ImportError("Korean requires: g2pkk, jamo") from e from TTS.tts.utils.text.korean.korean import normalize diff --git a/TTS/tts/utils/text/phonemizers/__init__.py b/TTS/tts/utils/text/phonemizers/__init__.py index 0f76690612..fdf62bab3d 100644 --- a/TTS/tts/utils/text/phonemizers/__init__.py +++ b/TTS/tts/utils/text/phonemizers/__init__.py @@ -2,7 +2,6 @@ from TTS.tts.utils.text.phonemizers.belarusian_phonemizer import BEL_Phonemizer from TTS.tts.utils.text.phonemizers.espeak_wrapper import ESpeak from TTS.tts.utils.text.phonemizers.gruut_wrapper import Gruut -from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer try: from TTS.tts.utils.text.phonemizers.bangla_phonemizer import BN_Phonemizer @@ -14,12 +13,17 @@ except ImportError: JA_JP_Phonemizer = None +try: + from TTS.tts.utils.text.phonemizers.ko_kr_phonemizer import KO_KR_Phonemizer +except ImportError: + KO_KR_Phonemizer = None + try: from TTS.tts.utils.text.phonemizers.zh_cn_phonemizer import ZH_CN_Phonemizer except ImportError: ZH_CN_Phonemizer = None -PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut, KO_KR_Phonemizer)} +PHONEMIZERS = {b.name(): b for b in (ESpeak, Gruut)} ESPEAK_LANGS = list(ESpeak.supported_languages().keys()) @@ -40,7 +44,6 @@ # Force default for some languages DEF_LANG_TO_PHONEMIZER["en"] = DEF_LANG_TO_PHONEMIZER["en-us"] -DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() DEF_LANG_TO_PHONEMIZER["be"] = BEL_Phonemizer.name() @@ -50,6 +53,9 @@ if JA_JP_Phonemizer is not None: PHONEMIZERS[JA_JP_Phonemizer.name()] = JA_JP_Phonemizer DEF_LANG_TO_PHONEMIZER["ja-jp"] = JA_JP_Phonemizer.name() +if KO_KR_Phonemizer is not None: + PHONEMIZERS[KO_KR_Phonemizer.name()] = KO_KR_Phonemizer + DEF_LANG_TO_PHONEMIZER["ko-kr"] = KO_KR_Phonemizer.name() if ZH_CN_Phonemizer is not None: PHONEMIZERS[ZH_CN_Phonemizer.name()] = ZH_CN_Phonemizer DEF_LANG_TO_PHONEMIZER["zh-cn"] = ZH_CN_Phonemizer.name() @@ -78,6 +84,8 @@ def get_phonemizer_by_name(name: str, **kwargs) -> BasePhonemizer: raise ValueError("You need to install JA phonemizer dependencies. Try `pip install coqui-tts[ja]`.") return JA_JP_Phonemizer(**kwargs) if name == "ko_kr_phonemizer": + if KO_KR_Phonemizer is None: + raise ValueError("You need to install KO phonemizer dependencies. Try `pip install coqui-tts[ko]`.") return KO_KR_Phonemizer(**kwargs) if name == "bn_phonemizer": if BN_Phonemizer is None: diff --git a/pyproject.toml b/pyproject.toml index 28af0eb254..c16b76ee86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,10 +65,6 @@ dependencies = [ # Coqui stack "coqui-tts-trainer>=0.1", "coqpit>=0.0.16", - # Korean - "hangul_romanize", - "jamo", - "g2pkk>=0.1.1", # Gruut + supported languages "gruut[de,es,fr]==2.2.3", # Tortoise @@ -114,6 +110,12 @@ bn = [ "bnnumerizer", "bnunicodenormalizer", ] +# Korean +ko = [ + "hangul_romanize", + "jamo", + "g2pkk>=0.1.1", +] # Japanese ja = [ "mecab-python3", @@ -127,11 +129,11 @@ zh = [ ] # All language-specific dependencies languages = [ - "coqui-tts[bn,ja,zh]", + "coqui-tts[bn,ja,ko,zh]", ] # Installs all extras (except dev and docs) all = [ - "coqui-tts[notebooks,server,bn,ja,zh]", + "coqui-tts[notebooks,server,bn,ja,ko,zh]", ] [project.urls] From 6d563af623e9917800ffe0effff63dabf22c9ccd Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 8 May 2024 15:50:48 +0200 Subject: [PATCH 092/255] chore: remove obsolete code for torch<2 Minimum torch version is 2.1 now. --- TTS/tts/layers/glow_tts/glow.py | 6 +----- TTS/tts/layers/xtts/perceiver_encoder.py | 4 ---- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/TTS/tts/layers/glow_tts/glow.py b/TTS/tts/layers/glow_tts/glow.py index b02c311808..77a796473b 100644 --- a/TTS/tts/layers/glow_tts/glow.py +++ b/TTS/tts/layers/glow_tts/glow.py @@ -1,5 +1,4 @@ import torch -from packaging.version import Version from torch import nn from torch.nn import functional as F @@ -90,10 +89,7 @@ def __init__(self, channels, num_splits=4, no_jacobian=False, **kwargs): # pyli self.no_jacobian = no_jacobian self.weight_inv = None - if Version(torch.__version__) < Version("1.9"): - w_init = torch.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_())[0] - else: - w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0] + w_init = torch.linalg.qr(torch.FloatTensor(self.num_splits, self.num_splits).normal_(), "complete")[0] if torch.det(w_init) < 0: w_init[:, 0] = -1 * w_init[:, 0] diff --git a/TTS/tts/layers/xtts/perceiver_encoder.py b/TTS/tts/layers/xtts/perceiver_encoder.py index d1aa16c456..f4b6e84123 100644 --- a/TTS/tts/layers/xtts/perceiver_encoder.py +++ b/TTS/tts/layers/xtts/perceiver_encoder.py @@ -7,7 +7,6 @@ import torch.nn.functional as F from einops import rearrange, repeat from einops.layers.torch import Rearrange -from packaging import version from torch import einsum, nn @@ -44,9 +43,6 @@ def __init__(self, dropout=0.0, causal=False, use_flash=False): self.register_buffer("mask", None, persistent=False) self.use_flash = use_flash - assert not ( - use_flash and version.parse(torch.__version__) < version.parse("2.0.0") - ), "in order to use flash attention, you must be using pytorch 2.0 or above" # determine efficient attention configs for cuda and cpu self.config = namedtuple("EfficientAttentionConfig", ["enable_flash", "enable_math", "enable_mem_efficient"]) From 59a6c9fdf295e71d201efe95fbeeca9718a99bc7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 15 May 2024 22:56:28 +0200 Subject: [PATCH 093/255] fix(bark): add missing argument for load_voice() Fixes https://github.com/coqui-ai/TTS/issues/2795 --- TTS/tts/models/bark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index 833a909384..797ebb0805 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -174,7 +174,7 @@ def generate_voice(self, audio, speaker_id, voice_dir): if voice_dir is not None: voice_dirs = [voice_dir] try: - _ = load_voice(speaker_id, voice_dirs) + _ = load_voice(self, speaker_id, voice_dirs) except (KeyError, FileNotFoundError): output_path = os.path.join(voice_dir, speaker_id + ".npz") os.makedirs(voice_dir, exist_ok=True) From 018f1e6453a88f7c8d26de1e682159c1f0aa446f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 15 May 2024 22:56:55 +0200 Subject: [PATCH 094/255] docs(bark): update docstrings and type hints --- TTS/tts/layers/bark/inference_funcs.py | 14 +++++++++----- TTS/tts/models/bark.py | 2 +- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py index f3d3fee937..b2875c7a83 100644 --- a/TTS/tts/layers/bark/inference_funcs.py +++ b/TTS/tts/layers/bark/inference_funcs.py @@ -2,10 +2,11 @@ import os import re from glob import glob -from typing import Dict, List +from typing import Dict, List, Optional, Tuple import librosa import numpy as np +import numpy.typing as npt import torch import torchaudio import tqdm @@ -48,7 +49,7 @@ def get_voices(extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-d return voices -def load_npz(npz_file): +def load_npz(npz_file: str) -> Tuple[npt.NDArray[np.int64], npt.NDArray[np.int64], npt.NDArray[np.int64]]: x_history = np.load(npz_file) semantic = x_history["semantic_prompt"] coarse = x_history["coarse_prompt"] @@ -56,7 +57,11 @@ def load_npz(npz_file): return semantic, coarse, fine -def load_voice(model, voice: str, extra_voice_dirs: List[str] = []): # pylint: disable=dangerous-default-value +def load_voice( + model, voice: str, extra_voice_dirs: List[str] = [] +) -> Tuple[ + Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]], Optional[npt.NDArray[np.int64]] +]: # pylint: disable=dangerous-default-value if voice == "random": return None, None, None @@ -107,11 +112,10 @@ def generate_voice( model, output_path, ): - """Generate a new voice from a given audio and text prompt. + """Generate a new voice from a given audio. Args: audio (np.ndarray): The audio to use as a base for the new voice. - text (str): Transcription of the audio you are clonning. model (BarkModel): The BarkModel to use for generating the new voice. output_path (str): The path to save the generated voice to. """ diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index 797ebb0805..cdfb5efae4 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -164,7 +164,7 @@ def generate_audio( return audio_arr, [x_semantic, c, f] def generate_voice(self, audio, speaker_id, voice_dir): - """Generate a voice from the given audio and text. + """Generate a voice from the given audio. Args: audio (str): Path to the audio file. From 924f42e3af8c035f1ff9ef314251d0e687790c04 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 15 May 2024 20:25:37 +0200 Subject: [PATCH 095/255] ci: update release workflow [ci skip] --- .github/workflows/pypi-release.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 14c956fc70..efe4bf71d4 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -42,10 +42,9 @@ jobs: - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install pip requirements + - name: Install build requirements run: | - python -m pip install -U pip setuptools wheel build - python -m pip install -r requirements.txt + python -m pip install -U pip setuptools wheel build numpy cython - name: Setup and install manylinux1_x86_64 wheel run: | python setup.py bdist_wheel --plat-name=manylinux1_x86_64 From 602325021b1d0b9d01c7799f793bd88b7772dadb Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 16 May 2024 18:05:51 +0200 Subject: [PATCH 096/255] chore: update version to 0.24.0 [ci skip] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c16b76ee86..9b2fe41c0e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ include = ["TTS*"] [project] name = "coqui-tts" -version = "0.23.1" +version = "0.24.0" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" From 70bd84894db3736836af8f9503728528fd71706c Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 20 May 2024 11:26:34 +0200 Subject: [PATCH 097/255] fix(server): ensure logging output gets actually shown --- TTS/encoder/utils/prepare_voxceleb.py | 4 +++- TTS/server/server.py | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py index 8f571dd2c7..da7522a512 100644 --- a/TTS/encoder/utils/prepare_voxceleb.py +++ b/TTS/encoder/utils/prepare_voxceleb.py @@ -29,6 +29,8 @@ import soundfile as sf +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger + logger = logging.getLogger(__name__) SUBSETS = { @@ -214,7 +216,7 @@ def processor(directory, subset, force_process): if __name__ == "__main__": - logging.getLogger("TTS").setLevel(logging.INFO) + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) if len(sys.argv) != 4: print("Usage: python prepare_data.py save_directory user password") sys.exit() diff --git a/TTS/server/server.py b/TTS/server/server.py index a8f3a08817..54c1ad45a3 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -16,11 +16,12 @@ raise ImportError("Server requires requires flask, use `pip install coqui-tts[server]`.") from e from TTS.config import load_config +from TTS.utils.generic_utils import ConsoleFormatter, setup_logger from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer logger = logging.getLogger(__name__) -logging.getLogger("TTS").setLevel(logging.INFO) +setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) def create_argparser(): From 8503500d9dca865a12eb33110f4658af92697a4b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 20 May 2024 11:35:11 +0200 Subject: [PATCH 098/255] chore(server): remove duplicate code --- TTS/server/server.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/TTS/server/server.py b/TTS/server/server.py index 54c1ad45a3..df8e4a7e75 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -74,10 +74,6 @@ def convert_boolean(x): path = Path(__file__).parent / "../.models.json" manager = ModelManager(path) -if args.list_models: - manager.list_models() - sys.exit() - # update in-use models to the specified released models. model_path = None config_path = None From ab7d84bf0514a372c6d57e08b813c5b2172cfa8d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 20 May 2024 11:35:25 +0200 Subject: [PATCH 099/255] refactor(server): address linter issues --- TTS/server/server.py | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/TTS/server/server.py b/TTS/server/server.py index df8e4a7e75..f410fb7539 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -1,4 +1,7 @@ #!flask/bin/python + +"""TTS demo server.""" + import argparse import io import json @@ -13,7 +16,8 @@ try: from flask import Flask, render_template, render_template_string, request, send_file except ImportError as e: - raise ImportError("Server requires requires flask, use `pip install coqui-tts[server]`.") from e + msg = "Server requires requires flask, use `pip install coqui-tts[server]`" + raise ImportError(msg) from e from TTS.config import load_config from TTS.utils.generic_utils import ConsoleFormatter, setup_logger @@ -24,17 +28,11 @@ setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) -def create_argparser(): - def convert_boolean(x): - return x.lower() in ["true", "1", "yes"] - +def create_argparser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() parser.add_argument( "--list_models", - type=convert_boolean, - nargs="?", - const=True, - default=False, + action="store_true", help="list available pre-trained tts and vocoder models.", ) parser.add_argument( @@ -62,9 +60,13 @@ def convert_boolean(x): parser.add_argument("--vocoder_config_path", type=str, help="Path to vocoder model config file.", default=None) parser.add_argument("--speakers_file_path", type=str, help="JSON file for multi-speaker model.", default=None) parser.add_argument("--port", type=int, default=5002, help="port to listen on.") - parser.add_argument("--use_cuda", type=convert_boolean, default=False, help="true to use CUDA.") - parser.add_argument("--debug", type=convert_boolean, default=False, help="true to enable Flask debug mode.") - parser.add_argument("--show_details", type=convert_boolean, default=False, help="Generate model detail page.") + parser.add_argument("--use_cuda", action=argparse.BooleanOptionalAction, default=False, help="true to use CUDA.") + parser.add_argument( + "--debug", action=argparse.BooleanOptionalAction, default=False, help="true to enable Flask debug mode." + ) + parser.add_argument( + "--show_details", action=argparse.BooleanOptionalAction, default=False, help="Generate model detail page." + ) return parser @@ -168,17 +170,15 @@ def index(): def details(): if args.config_path is not None and os.path.isfile(args.config_path): model_config = load_config(args.config_path) - else: - if args.model_name is not None: - model_config = load_config(config_path) + elif args.model_name is not None: + model_config = load_config(config_path) if args.vocoder_config_path is not None and os.path.isfile(args.vocoder_config_path): vocoder_config = load_config(args.vocoder_config_path) + elif args.vocoder_name is not None: + vocoder_config = load_config(vocoder_config_path) else: - if args.vocoder_name is not None: - vocoder_config = load_config(vocoder_config_path) - else: - vocoder_config = None + vocoder_config = None return render_template( "details.html", From 7bf9033e5382674cbd83d508b437ff89d5d21155 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 25 May 2024 17:33:37 +0200 Subject: [PATCH 100/255] chore: update repo info [ci skip] --- .github/ISSUE_TEMPLATE/bug_report.yaml | 2 +- .github/ISSUE_TEMPLATE/config.yml | 4 ++-- .github/PR_TEMPLATE.md | 8 -------- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index 34cde7e844..6a50c24562 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -59,7 +59,7 @@ body: You can either run `TTS/bin/collect_env_info.py` ```bash - wget https://raw.githubusercontent.com/coqui-ai/TTS/main/TTS/bin/collect_env_info.py + wget https://raw.githubusercontent.com/idiap/coqui-ai-TTS/main/TTS/bin/collect_env_info.py python collect_env_info.py ``` diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 05ca7db6bd..ccaaff7565 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,8 +1,8 @@ blank_issues_enabled: false contact_links: - name: CoquiTTS GitHub Discussions - url: https://github.com/coqui-ai/TTS/discussions + url: https://github.com/idiap/coqui-ai-TTS/discussions about: Please ask and answer questions here. - name: Coqui Security issue disclosure - url: mailto:info@coqui.ai + url: mailto:enno.hermann@gmail.com about: Please report security vulnerabilities here. diff --git a/.github/PR_TEMPLATE.md b/.github/PR_TEMPLATE.md index 330109c3bc..9e7605a4ef 100644 --- a/.github/PR_TEMPLATE.md +++ b/.github/PR_TEMPLATE.md @@ -5,11 +5,3 @@ Welcome to the 🐸TTS project! We are excited to see your interest, and appreci This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file. In order to make a good pull request, please see our [CONTRIBUTING.md](CONTRIBUTING.md) file. - -Before accepting your pull request, you will be asked to sign a [Contributor License Agreement](https://cla-assistant.io/coqui-ai/TTS). - -This [Contributor License Agreement](https://cla-assistant.io/coqui-ai/TTS): - -- Protects you, Coqui, and the users of the code. -- Does not change your rights to use your contributions for any purpose. -- Does not change the license of the 🐸TTS project. It just makes the terms of your contribution clearer and lets us know you are OK to contribute. From 7df4c2fa475be941e16ff955e86a5da961695bdf Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 09:35:55 +0200 Subject: [PATCH 101/255] fix: restore TTS.__version__ attribute This is used by the TTS/bin/collect_env_info.py script with which users print version information for bug reports. We restore the TTS.__version__ attribute so that old versions of the script still work. --- TTS/__init__.py | 3 +++ docs/source/conf.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/TTS/__init__.py b/TTS/__init__.py index e69de29bb2..9e87bca4be 100644 --- a/TTS/__init__.py +++ b/TTS/__init__.py @@ -0,0 +1,3 @@ +import importlib.metadata + +__version__ = importlib.metadata.version("coqui-tts") diff --git a/docs/source/conf.py b/docs/source/conf.py index 2a9f62b3b0..e7d36c1f43 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -20,7 +20,7 @@ autodoc_mock_imports = ["soundfile"] # -- Project information ----------------------------------------------------- -project = "TTS" +project = "coqui-tts" copyright = "2021 Coqui GmbH, 2020 TTS authors" author = "Coqui GmbH" From dc629f825dad134ea37d72a72c50f7adf40a13ca Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 12:29:42 +0200 Subject: [PATCH 102/255] build: set upper version limit for transformers transformers>=4.41 break XTTS streaming, see #31 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9b2fe41c0e..e1ea662d87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "gruut[de,es,fr]==2.2.3", # Tortoise "einops>=0.6.0", - "transformers>=4.33.0", + "transformers>=4.33.0,<4.41.0", # Bark "encodec>=0.1.1", # XTTS From df4a1f5ea6c06d1eec3de63995e0777e811170fa Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 16:19:52 +0200 Subject: [PATCH 103/255] docs: update readme [ci skip] --- README.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 1033d16be4..0a3bccca4e 100644 --- a/README.md +++ b/README.md @@ -154,7 +154,7 @@ The following extras allow the installation of optional dependencies: |------|-------------| | `all` | All optional dependencies, except `dev` and `docs` | | `dev` | Development dependencies | -| `dev` | Dependencies for building the documentation | +| `docs` | Dependencies for building the documentation | | `notebooks` | Dependencies only used in notebooks | | `server` | Dependencies to run the TTS server | | `bn` | Bangla G2P | @@ -270,11 +270,10 @@ You can find the language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tt and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms). ```python -# TTS with on the fly voice conversion +# TTS with fairseq models api = TTS("tts_models/deu/fairseq/vits") -api.tts_with_vc_to_file( +api.tts_to_file( "Wie sage ich auf Italienisch, dass ich dich liebe?", - speaker_wav="target/speaker.wav", file_path="output.wav" ) ``` From 203f60f1e175f4d171a329ad1449df65a6b6bebd Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 21:30:55 +0200 Subject: [PATCH 104/255] refactor(espeak_wrapper): remove sync argument _espeak_exe is always called with sync=True, so remove code for sync==False --- TTS/tts/utils/text/phonemizers/espeak_wrapper.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index dd74db6fae..13da605c2b 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -50,7 +50,7 @@ def get_espeakng_version() -> str: _DEF_ESPEAK_VER = None -def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[bytes]: +def _espeak_exe(espeak_lib: str, args: list) -> list[bytes]: """Run espeak with the given arguments.""" cmd = [ espeak_lib, @@ -70,13 +70,6 @@ def _espeak_exe(espeak_lib: str, args: list, *, sync: bool = False) -> list[byte err = iter(p.stderr.readline, b"") for line in err: logger.warning("espeakng: %s", line.decode("utf-8").strip()) - if not sync: - p.stdout.close() - if p.stderr: - p.stderr.close() - if p.stdin: - p.stdin.close() - return res res2 = list(res) p.stdout.close() if p.stderr: @@ -201,7 +194,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False args.append(text) # compute phonemes phonemes = "" - for line in _espeak_exe(self.backend, args, sync=True): + for line in _espeak_exe(self.backend, args): logger.debug("line: %s", repr(line)) ph_decoded = line.decode("utf8").strip() # espeak: @@ -232,7 +225,7 @@ def supported_languages() -> dict[str, str]: return {} args = ["--voices"] langs = {} - for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args, sync=True)): + for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)): line = line.decode("utf8").strip() if count > 0: cols = line.split() From 49fcbd908b818d8ec6daa3123818906f03fe9868 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 28 May 2024 21:43:35 +0200 Subject: [PATCH 105/255] fix(espeak_wrapper): avoid stuck process on windows Fixes #24 --- .../utils/text/phonemizers/espeak_wrapper.py | 39 +++++++------------ 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 13da605c2b..91fb93c70e 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -50,7 +50,7 @@ def get_espeakng_version() -> str: _DEF_ESPEAK_VER = None -def _espeak_exe(espeak_lib: str, args: list) -> list[bytes]: +def _espeak_exe(espeak_lib: str, args: list) -> list[str]: """Run espeak with the given arguments.""" cmd = [ espeak_lib, @@ -59,25 +59,18 @@ def _espeak_exe(espeak_lib: str, args: list) -> list[bytes]: "1", # UTF8 text encoding ] cmd.extend(args) - logger.debug("espeakng: executing %s", repr(cmd)) - - with subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) as p: - res = iter(p.stdout.readline, b"") - err = iter(p.stderr.readline, b"") - for line in err: - logger.warning("espeakng: %s", line.decode("utf-8").strip()) - res2 = list(res) - p.stdout.close() - if p.stderr: - p.stderr.close() - if p.stdin: - p.stdin.close() - p.wait() - return res2 + logger.debug("Executing: %s", repr(cmd)) + + p = subprocess.run(cmd, capture_output=True, encoding="utf8", check=True) + for line in p.stderr.strip().split("\n"): + if line.strip() != "": + logger.warning("%s: %s", espeak_lib, line.strip()) + res = [] + for line in p.stdout.strip().split("\n"): + if line.strip() != "": + logger.debug("%s: %s", espeak_lib, line.strip()) + res.append(line.strip()) + return res class ESpeak(BasePhonemizer): @@ -195,8 +188,6 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False # compute phonemes phonemes = "" for line in _espeak_exe(self.backend, args): - logger.debug("line: %s", repr(line)) - ph_decoded = line.decode("utf8").strip() # espeak: # version 1.48.15: " p_ɹ_ˈaɪ_ɚ t_ə n_oʊ_v_ˈɛ_m_b_ɚ t_w_ˈɛ_n_t_i t_ˈuː\n" # espeak-ng: @@ -206,7 +197,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False # "sɛʁtˈɛ̃ mˈo kɔm (en)fˈʊtbɔːl(fr) ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." # phonemize needs to remove the language flags of the returned text: # "sɛʁtˈɛ̃ mˈo kɔm fˈʊtbɔːl ʒenˈɛʁ de- flˈaɡ də- lˈɑ̃ɡ." - ph_decoded = re.sub(r"\(.+?\)", "", ph_decoded) + ph_decoded = re.sub(r"\(.+?\)", "", line) phonemes += ph_decoded.strip() return phonemes.replace("_", separator) @@ -226,13 +217,11 @@ def supported_languages() -> dict[str, str]: args = ["--voices"] langs = {} for count, line in enumerate(_espeak_exe(_DEF_ESPEAK_LIB, args)): - line = line.decode("utf8").strip() if count > 0: cols = line.split() lang_code = cols[1] lang_name = cols[3] langs[lang_code] = lang_name - logger.debug("line: %s", repr(line)) return langs def version(self) -> str: From 03430de88e8ba0fc706394f5d962fe5ff318268f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 29 May 2024 09:57:55 +0200 Subject: [PATCH 106/255] chore: bump version to 0.24.1 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e1ea662d87..ff2ff32dd4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ include = ["TTS*"] [project] name = "coqui-tts" -version = "0.24.0" +version = "0.24.1" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" From 07cbcf825c8837838229bb1b6952bd35191133eb Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 29 May 2024 09:52:18 +0200 Subject: [PATCH 107/255] fix(espeak_wrapper): read phonemize() input from file Avoids utf8 encoding issues on Windows when passing the text directly. Fixes https://github.com/coqui-ai/TTS/discussions/3761 --- TTS/tts/utils/text/phonemizers/espeak_wrapper.py | 10 +++++++++- tests/text_tests/test_phonemizer.py | 6 ++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py index 91fb93c70e..a15df716e7 100644 --- a/TTS/tts/utils/text/phonemizers/espeak_wrapper.py +++ b/TTS/tts/utils/text/phonemizers/espeak_wrapper.py @@ -3,6 +3,8 @@ import logging import re import subprocess +import tempfile +from pathlib import Path from typing import Optional from packaging.version import Version @@ -184,7 +186,12 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False if tie: args.append("--tie=%s" % tie) - args.append(text) + tmp = tempfile.NamedTemporaryFile(mode="w+t", delete=False, encoding="utf8") + tmp.write(text) + tmp.close() + args.append("-f") + args.append(tmp.name) + # compute phonemes phonemes = "" for line in _espeak_exe(self.backend, args): @@ -200,6 +207,7 @@ def phonemize_espeak(self, text: str, separator: str = "|", *, tie: bool = False ph_decoded = re.sub(r"\(.+?\)", "", line) phonemes += ph_decoded.strip() + Path(tmp.name).unlink() return phonemes.replace("_", separator) def _phonemize(self, text: str, separator: str = "") -> str: diff --git a/tests/text_tests/test_phonemizer.py b/tests/text_tests/test_phonemizer.py index ca25b302c5..f9067530e6 100644 --- a/tests/text_tests/test_phonemizer.py +++ b/tests/text_tests/test_phonemizer.py @@ -116,6 +116,12 @@ def setUp(self): output = self.phonemizer.phonemize(text, separator="") self.assertEqual(output, gt) + # UTF8 characters + text = "źrebię" + gt = "ʑrˈɛbjɛ" + output = ESpeak("pl").phonemize(text, separator="") + self.assertEqual(output, gt) + def test_name(self): self.assertEqual(self.phonemizer.name(), "espeak") From 77722cb0dd0c43becfb245051d1bb0629ada8f48 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 30 May 2024 11:12:10 +0200 Subject: [PATCH 108/255] fix(bin.synthesize): correctly handle boolean arguments Previously, e.g. `--use_cuda false` would actually set use_cuda=True: https://github.com/coqui-ai/TTS/discussions/3762 --- TTS/bin/compute_attention_masks.py | 4 +- TTS/bin/compute_embeddings.py | 2 +- TTS/bin/eval_encoder.py | 4 +- TTS/bin/extract_tts_spectrograms.py | 2 +- TTS/bin/remove_silence_using_vad.py | 10 ++-- TTS/bin/synthesize.py | 57 +++++++------------ TTS/encoder/README.md | 2 +- TTS/server/README.md | 2 +- docs/source/docker_images.md | 4 +- docs/source/models/bark.md | 6 +- docs/source/models/tortoise.md | 6 +- docs/source/models/xtts.md | 8 +-- .../ljspeech/fast_pitch/train_fast_pitch.py | 2 +- .../ljspeech/fast_speech/train_fast_speech.py | 2 +- .../ljspeech/fastspeech2/train_fastspeech2.py | 2 +- tests/zoo_tests/test_models.py | 24 ++++---- 16 files changed, 57 insertions(+), 80 deletions(-) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index 207b17e9c4..be275baa9c 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -35,7 +35,7 @@ --data_path /root/LJSpeech-1.1/ --batch_size 32 --dataset ljspeech - --use_cuda True + --use_cuda """, formatter_class=RawTextHelpFormatter, ) @@ -62,7 +62,7 @@ help="Dataset metafile inclusing file paths with transcripts.", ) parser.add_argument("--data_path", type=str, default="", help="Defines the data path. It overwrites config.json.") - parser.add_argument("--use_cuda", type=bool, default=False, help="enable/disable cuda.") + parser.add_argument("--use_cuda", action=argparse.BooleanOptionalAction, default=False, help="enable/disable cuda.") parser.add_argument( "--batch_size", default=16, type=int, help="Batch size for the model. Use batch_size=1 if you have no CUDA." diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 6795241a73..1bdb8d733c 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -150,7 +150,7 @@ def compute_embeddings( default=False, action="store_true", ) - parser.add_argument("--disable_cuda", type=bool, help="Flag to disable cuda.", default=False) + parser.add_argument("--disable_cuda", action="store_true", help="Flag to disable cuda.", default=False) parser.add_argument("--no_eval", help="Do not compute eval?. Default False", default=False, action="store_true") parser.add_argument( "--formatter_name", diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py index 8327851ca7..711c8221db 100644 --- a/TTS/bin/eval_encoder.py +++ b/TTS/bin/eval_encoder.py @@ -75,8 +75,8 @@ def compute_encoder_accuracy(dataset_items, encoder_manager): type=str, help="Path to dataset config file.", ) - parser.add_argument("--use_cuda", type=bool, help="flag to set cuda.", default=True) - parser.add_argument("--eval", type=bool, help="compute eval.", default=True) + parser.add_argument("--use_cuda", action=argparse.BooleanOptionalAction, help="flag to set cuda.", default=True) + parser.add_argument("--eval", action=argparse.BooleanOptionalAction, help="compute eval.", default=True) args = parser.parse_args() diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py index 83f2ca21c4..86a4dce177 100755 --- a/TTS/bin/extract_tts_spectrograms.py +++ b/TTS/bin/extract_tts_spectrograms.py @@ -282,7 +282,7 @@ def main(args): # pylint: disable=redefined-outer-name parser.add_argument("--debug", default=False, action="store_true", help="Save audio files for debug") parser.add_argument("--save_audio", default=False, action="store_true", help="Save audio files") parser.add_argument("--quantize_bits", type=int, default=0, help="Save quantized audio files if non-zero") - parser.add_argument("--eval", type=bool, help="compute eval.", default=True) + parser.add_argument("--eval", action=argparse.BooleanOptionalAction, help="compute eval.", default=True) args = parser.parse_args() c = load_config(args.config_path) diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py index f6d09d6bf1..edab882db8 100755 --- a/TTS/bin/remove_silence_using_vad.py +++ b/TTS/bin/remove_silence_using_vad.py @@ -80,7 +80,7 @@ def preprocess_audios(): setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser( - description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end True" + description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end" ) parser.add_argument("-i", "--input_dir", type=str, help="Dataset root dir", required=True) parser.add_argument("-o", "--output_dir", type=str, help="Output Dataset dir", default="") @@ -95,20 +95,20 @@ def preprocess_audios(): parser.add_argument( "-t", "--trim_just_beginning_and_end", - type=bool, + action=argparse.BooleanOptionalAction, default=True, - help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trim. Default True", + help="If True this script will trim just the beginning and end nonspeech parts. If False all nonspeech parts will be trimmed.", ) parser.add_argument( "-c", "--use_cuda", - type=bool, + action=argparse.BooleanOptionalAction, default=False, help="If True use cuda", ) parser.add_argument( "--use_onnx", - type=bool, + action=argparse.BooleanOptionalAction, default=False, help="If True use onnx", ) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 0464cb2943..bc01ffd595 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- + +"""Command line interface.""" import argparse import contextlib @@ -136,19 +137,8 @@ """ -def str2bool(v): - if isinstance(v, bool): - return v - if v.lower() in ("yes", "true", "t", "y", "1"): - return True - if v.lower() in ("no", "false", "f", "n", "0"): - return False - raise argparse.ArgumentTypeError("Boolean value expected.") - - -def main(): - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) - +def parse_args() -> argparse.Namespace: + """Parse arguments.""" parser = argparse.ArgumentParser( description=description.replace(" ```\n", ""), formatter_class=RawTextHelpFormatter, @@ -156,10 +146,7 @@ def main(): parser.add_argument( "--list_models", - type=str2bool, - nargs="?", - const=True, - default=False, + action="store_true", help="list available pre-trained TTS and vocoder models.", ) @@ -207,7 +194,7 @@ def main(): default="tts_output.wav", help="Output wav file path.", ) - parser.add_argument("--use_cuda", type=bool, help="Run model on CUDA.", default=False) + parser.add_argument("--use_cuda", action="store_true", help="Run model on CUDA.") parser.add_argument("--device", type=str, help="Device to run model on.", default="cpu") parser.add_argument( "--vocoder_path", @@ -226,10 +213,7 @@ def main(): parser.add_argument( "--pipe_out", help="stdout the generated TTS wav file for shell pipe.", - type=str2bool, - nargs="?", - const=True, - default=False, + action="store_true", ) # args for multi-speaker synthesis @@ -261,25 +245,18 @@ def main(): parser.add_argument( "--list_speaker_idxs", help="List available speaker ids for the defined multi-speaker model.", - type=str2bool, - nargs="?", - const=True, - default=False, + action="store_true", ) parser.add_argument( "--list_language_idxs", help="List available language ids for the defined multi-lingual model.", - type=str2bool, - nargs="?", - const=True, - default=False, + action="store_true", ) # aux args parser.add_argument( "--save_spectogram", - type=bool, - help="If true save raw spectogram for further (vocoder) processing in out_path.", - default=False, + action="store_true", + help="Save raw spectogram for further (vocoder) processing in out_path.", ) parser.add_argument( "--reference_wav", @@ -295,8 +272,8 @@ def main(): ) parser.add_argument( "--progress_bar", - type=str2bool, - help="If true shows a progress bar for the model download. Defaults to True", + action=argparse.BooleanOptionalAction, + help="Show a progress bar for the model download.", default=True, ) @@ -337,19 +314,23 @@ def main(): ] if not any(check_args): parser.parse_args(["-h"]) + return args + + +def main(): + setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + args = parse_args() pipe_out = sys.stdout if args.pipe_out else None with contextlib.redirect_stdout(None if args.pipe_out else sys.stdout): # Late-import to make things load faster - from TTS.api import TTS from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer # load model manager path = Path(__file__).parent / "../.models.json" manager = ModelManager(path, progress_bar=args.progress_bar) - api = TTS() tts_path = None tts_config_path = None diff --git a/TTS/encoder/README.md b/TTS/encoder/README.md index b38b20052b..9f829c9e2a 100644 --- a/TTS/encoder/README.md +++ b/TTS/encoder/README.md @@ -14,5 +14,5 @@ To run the code, you need to follow the same flow as in TTS. - Define 'config.json' for your needs. Note that, audio parameters should match your TTS model. - Example training call ```python speaker_encoder/train.py --config_path speaker_encoder/config.json --data_path ~/Data/Libri-TTS/train-clean-360``` -- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda true /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. +- Generate embedding vectors ```python speaker_encoder/compute_embeddings.py --use_cuda /model/path/best_model.pth model/config/path/config.json dataset/path/ output_path``` . This code parses all .wav files at the given dataset path and generates the same folder structure under the output path with the generated embedding files. - Watch training on Tensorboard as in TTS diff --git a/TTS/server/README.md b/TTS/server/README.md index 3b27575aea..ae8e38a4e3 100644 --- a/TTS/server/README.md +++ b/TTS/server/README.md @@ -15,7 +15,7 @@ Run the server with the official models. ```python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan``` Run the server with the official models on a GPU. -```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda True``` +```CUDA_VISIBLE_DEVICES="0" python TTS/server/server.py --model_name tts_models/en/ljspeech/tacotron2-DCA --vocoder_name vocoder_models/en/ljspeech/multiband-melgan --use_cuda``` Run the server with a custom models. ```python TTS/server/server.py --tts_checkpoint /path/to/tts/model.pth --tts_config /path/to/tts/config.json --vocoder_checkpoint /path/to/vocoder/model.pth --vocoder_config /path/to/vocoder/config.json``` diff --git a/docs/source/docker_images.md b/docs/source/docker_images.md index 8df5185505..58d961203e 100644 --- a/docs/source/docker_images.md +++ b/docs/source/docker_images.md @@ -32,7 +32,7 @@ For the GPU version, you need to have the latest NVIDIA drivers installed. With `nvidia-smi` you can check the CUDA version supported, it must be >= 11.8 ```bash -docker run --rm --gpus all -v ~/tts-output:/root/tts-output ghcr.io/coqui-ai/tts --text "Hello." --out_path /root/tts-output/hello.wav --use_cuda true +docker run --rm --gpus all -v ~/tts-output:/root/tts-output ghcr.io/coqui-ai/tts --text "Hello." --out_path /root/tts-output/hello.wav --use_cuda ``` ## Start a server @@ -50,7 +50,7 @@ python3 TTS/server/server.py --model_name tts_models/en/vctk/vits ```bash docker run --rm -it -p 5002:5002 --gpus all --entrypoint /bin/bash ghcr.io/coqui-ai/tts python3 TTS/server/server.py --list_models #To get the list of available models -python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda true +python3 TTS/server/server.py --model_name tts_models/en/vctk/vits --use_cuda ``` Click [there](http://[::1]:5002/) and have fun with the server! diff --git a/docs/source/models/bark.md b/docs/source/models/bark.md index c328ae6110..a180afbb91 100644 --- a/docs/source/models/bark.md +++ b/docs/source/models/bark.md @@ -69,14 +69,12 @@ tts --model_name tts_models/multilingual/multi-dataset/bark \ --text "This is an example." \ --out_path "output.wav" \ --voice_dir bark_voices/ \ ---speaker_idx "ljspeech" \ ---progress_bar True +--speaker_idx "ljspeech" # Random voice generation tts --model_name tts_models/multilingual/multi-dataset/bark \ --text "This is an example." \ ---out_path "output.wav" \ ---progress_bar True +--out_path "output.wav" ``` diff --git a/docs/source/models/tortoise.md b/docs/source/models/tortoise.md index 1a8e9ca8e9..30afd1355b 100644 --- a/docs/source/models/tortoise.md +++ b/docs/source/models/tortoise.md @@ -57,14 +57,12 @@ tts --model_name tts_models/en/multi-dataset/tortoise-v2 \ --text "This is an example." \ --out_path "output.wav" \ --voice_dir path/to/tortoise/voices/dir/ \ ---speaker_idx "lj" \ ---progress_bar True +--speaker_idx "lj" # Random voice generation tts --model_name tts_models/en/multi-dataset/tortoise-v2 \ --text "This is an example." \ ---out_path "output.wav" \ ---progress_bar True +--out_path "output.wav" ``` diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index cc7c36b729..c07d879f7c 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -72,7 +72,7 @@ You can do inference using one of the available speakers using the following com --text "It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent." \ --speaker_idx "Ana Florence" \ --language_idx en \ - --use_cuda true + --use_cuda ``` ##### Clone a voice @@ -85,7 +85,7 @@ You can clone a speaker voice using a single or multiple references: --text "Bugün okula gitmek istemiyorum." \ --speaker_wav /path/to/target/speaker.wav \ --language_idx tr \ - --use_cuda true + --use_cuda ``` ###### Multiple references @@ -94,7 +94,7 @@ You can clone a speaker voice using a single or multiple references: --text "Bugün okula gitmek istemiyorum." \ --speaker_wav /path/to/target/speaker.wav /path/to/target/speaker_2.wav /path/to/target/speaker_3.wav \ --language_idx tr \ - --use_cuda true + --use_cuda ``` or for all wav files in a directory you can use: @@ -103,7 +103,7 @@ or for all wav files in a directory you can use: --text "Bugün okula gitmek istemiyorum." \ --speaker_wav /path/to/target/*.wav \ --language_idx tr \ - --use_cuda true + --use_cuda ``` #### 🐸TTS API diff --git a/recipes/ljspeech/fast_pitch/train_fast_pitch.py b/recipes/ljspeech/fast_pitch/train_fast_pitch.py index 055526b1bc..64fd737b4e 100644 --- a/recipes/ljspeech/fast_pitch/train_fast_pitch.py +++ b/recipes/ljspeech/fast_pitch/train_fast_pitch.py @@ -65,7 +65,7 @@ model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA") # TODO: make compute_attention python callable os.system( - f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda true" + f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda" ) # INITIALIZE THE AUDIO PROCESSOR diff --git a/recipes/ljspeech/fast_speech/train_fast_speech.py b/recipes/ljspeech/fast_speech/train_fast_speech.py index 8c9a272e81..9839fcb339 100644 --- a/recipes/ljspeech/fast_speech/train_fast_speech.py +++ b/recipes/ljspeech/fast_speech/train_fast_speech.py @@ -64,7 +64,7 @@ model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA") # TODO: make compute_attention python callable os.system( - f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda true" + f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda" ) # INITIALIZE THE AUDIO PROCESSOR diff --git a/recipes/ljspeech/fastspeech2/train_fastspeech2.py b/recipes/ljspeech/fastspeech2/train_fastspeech2.py index 93737dba7f..0a7a175605 100644 --- a/recipes/ljspeech/fastspeech2/train_fastspeech2.py +++ b/recipes/ljspeech/fastspeech2/train_fastspeech2.py @@ -67,7 +67,7 @@ model_path, config_path, _ = manager.download_model("tts_models/en/ljspeech/tacotron2-DCA") # TODO: make compute_attention python callable os.system( - f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda true" + f"python TTS/bin/compute_attention_masks.py --model_path {model_path} --config_path {config_path} --dataset ljspeech --dataset_metafile metadata.csv --data_path ./recipes/ljspeech/LJSpeech-1.1/ --use_cuda" ) # INITIALIZE THE AUDIO PROCESSOR diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index 8fa56e287a..1c28e8609c 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -50,13 +50,13 @@ def run_models(offset=0, step=1): speaker_id = list(speaker_manager.name_to_id.keys())[0] run_cli( f"tts --model_name {model_name} " - f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" --progress_bar False' + f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" --no-progress_bar' ) else: # single-speaker model run_cli( f"tts --model_name {model_name} " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar' ) # remove downloaded models shutil.rmtree(local_download_dir) @@ -66,7 +66,7 @@ def run_models(offset=0, step=1): reference_wav = os.path.join(get_tests_data_path(), "ljspeech", "wavs", "LJ001-0032.wav") run_cli( f"tts --model_name {model_name} " - f'--out_path "{output_path}" --source_wav "{speaker_wav}" --target_wav "{reference_wav}" --progress_bar False' + f'--out_path "{output_path}" --source_wav "{speaker_wav}" --target_wav "{reference_wav}" --no-progress_bar' ) else: # only download the model @@ -83,14 +83,14 @@ def test_xtts(): run_cli( "yes | " f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True ' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda ' f'--speaker_wav "{speaker_wav}" --language_idx "en"' ) else: run_cli( "yes | " f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v1.1 " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False ' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar ' f'--speaker_wav "{speaker_wav}" --language_idx "en"' ) @@ -138,14 +138,14 @@ def test_xtts_v2(): run_cli( "yes | " f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True ' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda ' f'--speaker_wav "{speaker_wav}" "{speaker_wav_2}" --language_idx "en"' ) else: run_cli( "yes | " f"tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False ' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar ' f'--speaker_wav "{speaker_wav}" "{speaker_wav_2}" --language_idx "en"' ) @@ -215,12 +215,12 @@ def test_tortoise(): if use_gpu: run_cli( f" tts --model_name tts_models/en/multi-dataset/tortoise-v2 " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda' ) else: run_cli( f" tts --model_name tts_models/en/multi-dataset/tortoise-v2 " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar' ) @@ -231,12 +231,12 @@ def test_bark(): if use_gpu: run_cli( f" tts --model_name tts_models/multilingual/multi-dataset/bark " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False --use_cuda True' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar --use_cuda' ) else: run_cli( f" tts --model_name tts_models/multilingual/multi-dataset/bark " - f'--text "This is an example." --out_path "{output_path}" --progress_bar False' + f'--text "This is an example." --out_path "{output_path}" --no-progress_bar' ) @@ -249,7 +249,7 @@ def test_voice_conversion(): output_path = os.path.join(get_tests_output_path(), "output.wav") run_cli( f"tts --model_name {model_name}" - f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --progress_bar False" + f" --out_path {output_path} --speaker_wav {speaker_wav} --reference_wav {reference_wav} --language_idx {language_id} --no-progress_bar" ) From 29e91f2e77f60b5cd21010f18831a62488a3f47a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 30 May 2024 11:40:24 +0200 Subject: [PATCH 109/255] fix(utils.generic_utils): correctly call now() --- TTS/utils/generic_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 024d50277c..48c090715f 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -126,7 +126,7 @@ def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict: def get_timestamp() -> str: - return datetime.now().strftime("%y%m%d-%H%M%S") + return datetime.datetime.now().strftime("%y%m%d-%H%M%S") class ConsoleFormatter(logging.Formatter): From bdd44cf28a93b1c5690ad7a18dcbe340a80ef3ce Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 30 May 2024 22:13:42 +0200 Subject: [PATCH 110/255] docs: update readme --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 0a3bccca4e..2749a28bda 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,10 @@ Please use our dedicated channels for questions and discussion. Help is much mor [discord]: https://discord.gg/5eXr5seRrv [Tutorials and Examples]: https://github.com/coqui-ai/TTS/wiki/TTS-Notebooks-and-Tutorials +The [issues](https://github.com/coqui-ai/TTS/issues) and +[discussions](https://github.com/coqui-ai/TTS/discussions) in the original +repository are also still a useful source of information. + ## 🔗 Links and Resources | Type | Links | @@ -143,6 +147,7 @@ If you plan to code or train models, clone 🐸TTS and install it locally. ```bash git clone https://github.com/idiap/coqui-ai-TTS +cd coqui-ai-TTS pip install -e . ``` From 03de4b889e04445e29e7b3d5de9e413ee1bf019d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 13 Jun 2024 22:48:34 +0200 Subject: [PATCH 111/255] docs: fix readthedocs links [ci skip] --- README.md | 6 +++--- TTS/tts/models/xtts.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2749a28bda..c6a1db4fff 100644 --- a/README.md +++ b/README.md @@ -4,10 +4,10 @@ - 📣 ⓍTTSv2 is here with 16 languages and better performance across the board. - 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). - 📣 ⓍTTS can now stream with <200ms latency. -- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/dev/models/xtts.html) -- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/bark.html) +- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/latest/models/xtts.html) +- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/latest/models/bark.html) - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. -- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://coqui-tts.readthedocs.io/en/dev/models/tortoise.html) +- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://coqui-tts.readthedocs.io/en/latest/models/tortoise.html)
diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index df49cf54fd..e6d245a041 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -698,12 +698,12 @@ def inference_stream( def forward(self): raise NotImplementedError( - "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/dev/models/xtts.html#training" + "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/latest/models/xtts.html#training" ) def eval_step(self): raise NotImplementedError( - "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/dev/models/xtts.html#training" + "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/latest/models/xtts.html#training" ) @staticmethod @@ -792,5 +792,5 @@ def load_checkpoint( def train_step(self): raise NotImplementedError( - "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/dev/models/xtts.html#training" + "XTTS has a dedicated trainer, please check the XTTS docs: https://coqui-tts.readthedocs.io/en/latest/models/xtts.html#training" ) From e5c208d2545f7d1248c93c3a8ef8ebdedc2c0672 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:06:03 +0200 Subject: [PATCH 112/255] feat(cleaners): add multilingual phoneme cleaner This doesn't convert numbers into English words. --- TTS/tts/utils/text/cleaners.py | 19 ++++++++++++++++--- tests/text_tests/test_text_cleaners.py | 7 ++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index 794a87c866..f829e4cc1c 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -3,6 +3,7 @@ # TODO: pick the cleaner for languages dynamically import re +from typing import Optional from anyascii import anyascii @@ -44,8 +45,8 @@ def remove_aux_symbols(text): return text -def replace_symbols(text, lang="en"): - """Replace symbols based on the lenguage tag. +def replace_symbols(text, lang: Optional[str] = "en"): + """Replace symbols based on the language tag. Args: text: @@ -122,7 +123,11 @@ def english_cleaners(text): def phoneme_cleaners(text): - """Pipeline for phonemes mode, including number and abbreviation expansion.""" + """Pipeline for phonemes mode, including number and abbreviation expansion. + + NB: This cleaner converts numbers into English words, for other languages + use multilingual_phoneme_cleaners(). + """ text = en_normalize_numbers(text) text = expand_abbreviations(text) text = replace_symbols(text) @@ -131,6 +136,14 @@ def phoneme_cleaners(text): return text +def multilingual_phoneme_cleaners(text): + """Pipeline for phonemes mode, including number and abbreviation expansion.""" + text = replace_symbols(text, lang=None) + text = remove_aux_symbols(text) + text = collapse_whitespace(text) + return text + + def french_cleaners(text): """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" text = expand_abbreviations(text, lang="fr") diff --git a/tests/text_tests/test_text_cleaners.py b/tests/text_tests/test_text_cleaners.py index fcfa71e77d..bf0c8d5d8a 100644 --- a/tests/text_tests/test_text_cleaners.py +++ b/tests/text_tests/test_text_cleaners.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from TTS.tts.utils.text.cleaners import english_cleaners, phoneme_cleaners +from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners def test_time() -> None: @@ -19,3 +19,8 @@ def test_currency() -> None: def test_expand_numbers() -> None: assert phoneme_cleaners("-1") == "minus one" assert phoneme_cleaners("1") == "one" + + +def test_multilingual_phoneme_cleaners() -> None: + assert multilingual_phoneme_cleaners("(Hello)") == "Hello" + assert multilingual_phoneme_cleaners("1:") == "1," From a1495d4bc102e425e948efaf8c7427d47973b607 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:09:01 +0200 Subject: [PATCH 113/255] fix(recipes): use multilingual phoneme cleaner in non-english recipes --- recipes/thorsten_DE/align_tts/train_aligntts.py | 2 +- recipes/thorsten_DE/glow_tts/train_glowtts.py | 2 +- recipes/thorsten_DE/speedy_speech/train_speedy_speech.py | 2 +- recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py | 2 +- recipes/thorsten_DE/vits_tts/train_vits.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/recipes/thorsten_DE/align_tts/train_aligntts.py b/recipes/thorsten_DE/align_tts/train_aligntts.py index 32cfd9967f..42363940f3 100644 --- a/recipes/thorsten_DE/align_tts/train_aligntts.py +++ b/recipes/thorsten_DE/align_tts/train_aligntts.py @@ -30,7 +30,7 @@ run_eval=True, test_delay_epochs=-1, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=False, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/glow_tts/train_glowtts.py b/recipes/thorsten_DE/glow_tts/train_glowtts.py index 00c67fb5d8..f7f4a186a2 100644 --- a/recipes/thorsten_DE/glow_tts/train_glowtts.py +++ b/recipes/thorsten_DE/glow_tts/train_glowtts.py @@ -40,7 +40,7 @@ run_eval=True, test_delay_epochs=-1, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py b/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py index a3d0b9db2b..024dcaa31e 100644 --- a/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py +++ b/recipes/thorsten_DE/speedy_speech/train_speedy_speech.py @@ -45,7 +45,7 @@ test_delay_epochs=-1, epochs=1000, min_audio_len=11050, # need to up min_audio_len to avois speedy speech error - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py b/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py index bc0274f5af..a46e27e91b 100644 --- a/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py +++ b/recipes/thorsten_DE/tacotron2-DDC/train_tacotron_ddc.py @@ -49,7 +49,7 @@ gradual_training=[[0, 6, 64], [10000, 4, 32], [50000, 3, 32], [100000, 2, 32]], double_decoder_consistency=True, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), diff --git a/recipes/thorsten_DE/vits_tts/train_vits.py b/recipes/thorsten_DE/vits_tts/train_vits.py index 4ffa0f30f6..4b773c3508 100644 --- a/recipes/thorsten_DE/vits_tts/train_vits.py +++ b/recipes/thorsten_DE/vits_tts/train_vits.py @@ -40,7 +40,7 @@ run_eval=True, test_delay_epochs=-1, epochs=1000, - text_cleaner="phoneme_cleaners", + text_cleaner="multilingual_phoneme_cleaners", use_phonemes=True, phoneme_language="de", phoneme_cache_path=os.path.join(output_path, "phoneme_cache"), From 9cfcc0a0f5ced388a3d2c473e64ebf810b0e53dc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 14 Jun 2024 15:20:04 +0200 Subject: [PATCH 114/255] chore(cleaners): add type hints --- TTS/tts/utils/text/cleaners.py | 37 ++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index f829e4cc1c..fc87025f00 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -18,34 +18,37 @@ _whitespace_re = re.compile(r"\s+") -def expand_abbreviations(text, lang="en"): +def expand_abbreviations(text: str, lang: str = "en") -> str: if lang == "en": _abbreviations = abbreviations_en elif lang == "fr": _abbreviations = abbreviations_fr + else: + msg = f"Language {lang} not supported in expand_abbreviations" + raise ValueError(msg) for regex, replacement in _abbreviations: text = re.sub(regex, replacement, text) return text -def lowercase(text): +def lowercase(text: str) -> str: return text.lower() -def collapse_whitespace(text): +def collapse_whitespace(text: str) -> str: return re.sub(_whitespace_re, " ", text).strip() -def convert_to_ascii(text): +def convert_to_ascii(text: str) -> str: return anyascii(text) -def remove_aux_symbols(text): +def remove_aux_symbols(text: str) -> str: text = re.sub(r"[\<\>\(\)\[\]\"]+", "", text) return text -def replace_symbols(text, lang: Optional[str] = "en"): +def replace_symbols(text: str, lang: Optional[str] = "en") -> str: """Replace symbols based on the language tag. Args: @@ -78,14 +81,14 @@ def replace_symbols(text, lang: Optional[str] = "en"): return text -def basic_cleaners(text): +def basic_cleaners(text: str) -> str: """Basic pipeline that lowercases and collapses whitespace without transliteration.""" text = lowercase(text) text = collapse_whitespace(text) return text -def transliteration_cleaners(text): +def transliteration_cleaners(text: str) -> str: """Pipeline for non-English text that transliterates to ASCII.""" # text = convert_to_ascii(text) text = lowercase(text) @@ -93,7 +96,7 @@ def transliteration_cleaners(text): return text -def basic_german_cleaners(text): +def basic_german_cleaners(text: str) -> str: """Pipeline for German text""" text = lowercase(text) text = collapse_whitespace(text) @@ -101,7 +104,7 @@ def basic_german_cleaners(text): # TODO: elaborate it -def basic_turkish_cleaners(text): +def basic_turkish_cleaners(text: str) -> str: """Pipeline for Turkish text""" text = text.replace("I", "ı") text = lowercase(text) @@ -109,7 +112,7 @@ def basic_turkish_cleaners(text): return text -def english_cleaners(text): +def english_cleaners(text: str) -> str: """Pipeline for English text, including number and abbreviation expansion.""" # text = convert_to_ascii(text) text = lowercase(text) @@ -122,7 +125,7 @@ def english_cleaners(text): return text -def phoneme_cleaners(text): +def phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion. NB: This cleaner converts numbers into English words, for other languages @@ -136,7 +139,7 @@ def phoneme_cleaners(text): return text -def multilingual_phoneme_cleaners(text): +def multilingual_phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion.""" text = replace_symbols(text, lang=None) text = remove_aux_symbols(text) @@ -144,7 +147,7 @@ def multilingual_phoneme_cleaners(text): return text -def french_cleaners(text): +def french_cleaners(text: str) -> str: """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" text = expand_abbreviations(text, lang="fr") text = lowercase(text) @@ -154,7 +157,7 @@ def french_cleaners(text): return text -def portuguese_cleaners(text): +def portuguese_cleaners(text: str) -> str: """Basic pipeline for Portuguese text. There is no need to expand abbreviation and numbers, phonemizer already does that""" text = lowercase(text) @@ -170,7 +173,7 @@ def chinese_mandarin_cleaners(text: str) -> str: return text -def multilingual_cleaners(text): +def multilingual_cleaners(text: str) -> str: """Pipeline for multilingual text""" text = lowercase(text) text = replace_symbols(text, lang=None) @@ -179,7 +182,7 @@ def multilingual_cleaners(text): return text -def no_cleaners(text): +def no_cleaners(text: str) -> str: # remove newline characters text = text.replace("\n", "") return text From 3a20f4725fadca3dc6efa8a597eee18afd411fb8 Mon Sep 17 00:00:00 2001 From: ChristianRomberg Date: Sun, 16 Jun 2024 21:24:03 +0200 Subject: [PATCH 115/255] fix(freevc): use the specified device for pretrained speaker encoder (#45) Fixes coqui-ai#3787 --- TTS/vc/models/freevc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index f9e691256e..262fb7504f 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -382,7 +382,8 @@ def load_pretrained_speaker_encoder(self): """Load pretrained speaker encoder model as mentioned in the paper.""" logger.info("Loading pretrained speaker encoder model ...") self.enc_spk_ex = SpeakerEncoderEx( - "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/speaker_encoder.pt" + "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/speaker_encoder.pt", + device=self.device ) def init_multispeaker(self, config: Coqpit): From 4bc0e75a08933862e00892566c6e3fd6c3b528b7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 16 Jun 2024 21:58:34 +0200 Subject: [PATCH 116/255] build: add numpy2 support Identified necessary code changes with the NPY201 ruff rule. Gruut is the only dependency that doesn't support numpy2 yet. NB: At build time numpy>=2.0.0 should be required to be able to build wheels compatible with both numpy1+2: https://numpy.org/devdocs/dev/depending_on_numpy.html#numpy-2-abi-handling --- TTS/vc/models/freevc.py | 3 +-- TTS/vocoder/models/wavernn.py | 4 ++-- pyproject.toml | 5 +++-- requirements.dev.txt | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 262fb7504f..ec7cc0e0a6 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -382,8 +382,7 @@ def load_pretrained_speaker_encoder(self): """Load pretrained speaker encoder model as mentioned in the paper.""" logger.info("Loading pretrained speaker encoder model ...") self.enc_spk_ex = SpeakerEncoderEx( - "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/speaker_encoder.pt", - device=self.device + "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/speaker_encoder.pt", device=self.device ) def init_multispeaker(self, config: Coqpit): diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index 62f6ee2d2d..901afdff11 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -91,7 +91,7 @@ def __init__( use_aux_net, ): super().__init__() - self.total_scale = np.cumproduct(upsample_scales)[-1] + self.total_scale = np.cumprod(upsample_scales)[-1] self.indent = pad * self.total_scale self.use_aux_net = use_aux_net if use_aux_net: @@ -239,7 +239,7 @@ class of models has however remained an elusive problem. With a focus on text-to if self.args.use_upsample_net: assert ( - np.cumproduct(self.args.upsample_factors)[-1] == config.audio.hop_length + np.cumprod(self.args.upsample_factors)[-1] == config.audio.hop_length ), " [!] upsample scales needs to be equal to hop_length" self.upsample = UpsampleNetwork( self.args.feat_dims, diff --git a/pyproject.toml b/pyproject.toml index ff2ff32dd4..dd4ebaed6f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ "setuptools", "cython~=0.29.30", - "numpy>=1.24.3", + "numpy>=2.0.0", ] build-backend = "setuptools.build_meta" @@ -84,7 +84,7 @@ dev = [ "coverage[toml]", "nose2", "pre-commit", - "ruff==0.3.0", + "ruff==0.4.9", "tomli; python_version < '3.11'", ] # Dependencies for building the documentation @@ -169,6 +169,7 @@ lint.extend-select = [ "PLR1711", # useless-return "PLW", "W291", # trailing-whitespace + "NPY201", # NumPy 2.0 deprecation ] lint.ignore = [ diff --git a/requirements.dev.txt b/requirements.dev.txt index 0095dae3c2..1e4a7beff7 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -4,5 +4,5 @@ black==24.2.0 coverage[toml] nose2 pre-commit -ruff==0.3.0 +ruff==0.4.9 tomli; python_version < '3.11' From 4b6da4e7ba0bc842e04d420474db32b0142920bf Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 15 Jun 2024 10:10:40 +0200 Subject: [PATCH 117/255] refactor(stream_generator): update special tokens for transformers>=4.41.1 Fixes #31. The handling of special tokens in `transformers` was changed in https://github.com/huggingface/transformers/pull/30624 and https://github.com/huggingface/transformers/pull/30746. This updates the XTTS streaming code accordingly. --- TTS/tts/layers/xtts/stream_generator.py | 18 +++++------------- pyproject.toml | 2 +- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index b7e07589c5..6a1528565b 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -151,18 +151,7 @@ def generate( # noqa: PLR0911 # 2. Set generation parameters if not already defined logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() - - if generation_config.pad_token_id is None and generation_config.eos_token_id is not None: - if model_kwargs.get("attention_mask", None) is None: - logger.warning( - "The attention mask and the pad token id were not set. As a consequence, you may observe " - "unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results." - ) - eos_token_id = generation_config.eos_token_id - if isinstance(eos_token_id, list): - eos_token_id = eos_token_id[0] - logger.warning(f"Setting `pad_token_id` to `eos_token_id`:{eos_token_id} for open-end generation.") - generation_config.pad_token_id = eos_token_id + kwargs_has_attention_mask = model_kwargs.get("attention_mask", None) is not None # 3. Define model inputs # inputs_tensor has to be defined @@ -174,6 +163,9 @@ def generate( # noqa: PLR0911 ) batch_size = inputs_tensor.shape[0] + device = inputs_tensor.device + self._prepare_special_tokens(generation_config, kwargs_has_attention_mask, device=device) + # 4. Define other model kwargs model_kwargs["output_attentions"] = generation_config.output_attentions model_kwargs["output_hidden_states"] = generation_config.output_hidden_states @@ -182,7 +174,7 @@ def generate( # noqa: PLR0911 accepts_attention_mask = "attention_mask" in set(inspect.signature(self.forward).parameters.keys()) requires_attention_mask = "encoder_outputs" not in model_kwargs - if model_kwargs.get("attention_mask", None) is None and requires_attention_mask and accepts_attention_mask: + if not kwargs_has_attention_mask and requires_attention_mask and accepts_attention_mask: model_kwargs["attention_mask"] = self._prepare_attention_mask_for_generation( inputs_tensor, generation_config.pad_token_id, diff --git a/pyproject.toml b/pyproject.toml index dd4ebaed6f..dad0d5ed0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "gruut[de,es,fr]==2.2.3", # Tortoise "einops>=0.6.0", - "transformers>=4.33.0,<4.41.0", + "transformers>=4.41.1", # Bark "encodec>=0.1.1", # XTTS From 2a281237d7f97608e88a32056f41a239b5dd6e77 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 15 Jun 2024 20:04:23 +0200 Subject: [PATCH 118/255] refactor(stream_generator): update code for transformers>=4.41.1 In line with https://github.com/huggingface/transformers/blob/eed9ed679878ada2f6d2eefccdbda368cabc88b1/src/transformers/generation/utils.py --- TTS/tts/layers/xtts/stream_generator.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index 6a1528565b..77432480e8 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -201,16 +201,15 @@ def generate( # noqa: PLR0911 # 5. Prepare `input_ids` which will be used for auto-regressive generation if self.config.is_encoder_decoder: - input_ids = self._prepare_decoder_input_ids_for_generation( - batch_size, - decoder_start_token_id=generation_config.decoder_start_token_id, - bos_token_id=generation_config.bos_token_id, + input_ids, model_kwargs = self._prepare_decoder_input_ids_for_generation( + batch_size=batch_size, + model_input_name=model_input_name, model_kwargs=model_kwargs, + decoder_start_token_id=generation_config.decoder_start_token_id, device=inputs_tensor.device, ) else: - # if decoder-only then inputs_tensor has to be `input_ids` - input_ids = inputs_tensor + input_ids = inputs_tensor if model_input_name == "input_ids" else model_kwargs.pop("input_ids") # 6. Prepare `max_length` depending on other stopping criteria. input_ids_seq_length = input_ids.shape[-1] From 4d9e18ea7d5358b987d06f6030c0d251bb21c0c4 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 15 Jun 2024 20:33:46 +0200 Subject: [PATCH 119/255] chore(stream_generator): address lint issues --- TTS/tts/layers/xtts/__init__.py | 0 TTS/tts/layers/xtts/stream_generator.py | 15 ++++++++------- 2 files changed, 8 insertions(+), 7 deletions(-) create mode 100644 TTS/tts/layers/xtts/__init__.py diff --git a/TTS/tts/layers/xtts/__init__.py b/TTS/tts/layers/xtts/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index 77432480e8..cb09895824 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -4,7 +4,7 @@ import inspect import random import warnings -from typing import Callable, List, Optional, Union +from typing import Callable, Optional, Union import numpy as np import torch @@ -21,10 +21,11 @@ PreTrainedModel, StoppingCriteriaList, ) +from transformers.generation.stopping_criteria import validate_stopping_criteria from transformers.generation.utils import GenerateOutput, SampleOutput, logger -def setup_seed(seed): +def setup_seed(seed: int) -> None: if seed == -1: return torch.manual_seed(seed) @@ -49,9 +50,9 @@ def generate( # noqa: PLR0911 generation_config: Optional[StreamGenerationConfig] = None, logits_processor: Optional[LogitsProcessorList] = None, stopping_criteria: Optional[StoppingCriteriaList] = None, - prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None, + prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], list[int]]] = None, synced_gpus: Optional[bool] = False, - seed=0, + seed: int = 0, **kwargs, ) -> Union[GenerateOutput, torch.LongTensor]: r""" @@ -90,7 +91,7 @@ def generate( # noqa: PLR0911 Custom stopping criteria that complement the default stopping criteria built from arguments and a generation config. If a stopping criteria is passed that is already created with the arguments or a generation config an error is thrown. This feature is intended for advanced users. - prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], List[int]]`, *optional*): + prefix_allowed_tokens_fn (`Callable[[int, torch.Tensor], list[int]]`, *optional*): If provided, this function constraints the beam search to allowed tokens only at each step. If not provided no constraint is applied. This function takes 2 arguments: the batch ID `batch_id` and `input_ids`. It has to return a list with the allowed tokens for the next generation step conditioned @@ -568,7 +569,7 @@ def generate( # noqa: PLR0911 def typeerror(): raise ValueError( - "`force_words_ids` has to either be a `List[List[List[int]]]` or `List[List[int]]`" + "`force_words_ids` has to either be a `list[list[list[int]]]` or `list[list[int]]`" f"of positive integers, but is {generation_config.force_words_ids}." ) @@ -640,7 +641,7 @@ def sample_stream( logits_warper: Optional[LogitsProcessorList] = None, max_length: Optional[int] = None, pad_token_id: Optional[int] = None, - eos_token_id: Optional[Union[int, List[int]]] = None, + eos_token_id: Optional[Union[int, list[int]]] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, output_scores: Optional[bool] = None, From c9f71978627844b7fe0803cec886ffa2129dbe9f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 20 Jun 2024 14:16:45 +0200 Subject: [PATCH 120/255] test(helpers): add test_ prefix so tests actually run --- tests/tts_tests/test_helpers.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tts_tests/test_helpers.py b/tests/tts_tests/test_helpers.py index 23bb440a0a..a83ec9dd30 100644 --- a/tests/tts_tests/test_helpers.py +++ b/tests/tts_tests/test_helpers.py @@ -3,7 +3,7 @@ from TTS.tts.utils.helpers import average_over_durations, generate_path, rand_segments, segment, sequence_mask -def average_over_durations_test(): # pylint: disable=no-self-use +def test_average_over_durations(): # pylint: disable=no-self-use pitch = T.rand(1, 1, 128) durations = T.randint(1, 5, (1, 21)) @@ -21,7 +21,7 @@ def average_over_durations_test(): # pylint: disable=no-self-use index += dur -def seqeunce_mask_test(): +def test_sequence_mask(): lengths = T.randint(10, 15, (8,)) mask = sequence_mask(lengths) for i in range(8): @@ -30,7 +30,7 @@ def seqeunce_mask_test(): assert mask[i, l:].sum() == 0 -def segment_test(): +def test_segment(): x = T.range(0, 11) x = x.repeat(8, 1).unsqueeze(1) segment_ids = T.randint(0, 7, (8,)) @@ -50,7 +50,7 @@ def segment_test(): assert x[idx, :, start_indx : start_indx + 10].sum() == segments[idx, :, :].sum() -def rand_segments_test(): +def test_rand_segments(): x = T.rand(2, 3, 4) x_lens = T.randint(3, 4, (2,)) segments, seg_idxs = rand_segments(x, x_lens, segment_size=3) @@ -68,7 +68,7 @@ def rand_segments_test(): assert all(x_lens_back == x_lens) -def generate_path_test(): +def test_generate_path(): durations = T.randint(1, 4, (10, 21)) x_length = T.randint(18, 22, (10,)) x_mask = sequence_mask(x_length).unsqueeze(1).long() From 857cd55ce5b63f554bafc797176b08d750e8fc9e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 20 Jun 2024 14:28:44 +0200 Subject: [PATCH 121/255] test(helpers): fix test_rand_segment, test_generate_path --- tests/tts_tests/test_helpers.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tts_tests/test_helpers.py b/tests/tts_tests/test_helpers.py index a83ec9dd30..dbd7f54eb4 100644 --- a/tests/tts_tests/test_helpers.py +++ b/tests/tts_tests/test_helpers.py @@ -53,8 +53,8 @@ def test_segment(): def test_rand_segments(): x = T.rand(2, 3, 4) x_lens = T.randint(3, 4, (2,)) - segments, seg_idxs = rand_segments(x, x_lens, segment_size=3) - assert segments.shape == (2, 3, 3) + segments, seg_idxs = rand_segments(x, x_lens, segment_size=2) + assert segments.shape == (2, 3, 2) assert all(seg_idxs >= 0), seg_idxs try: segments, _ = rand_segments(x, x_lens, segment_size=5) @@ -71,7 +71,7 @@ def test_rand_segments(): def test_generate_path(): durations = T.randint(1, 4, (10, 21)) x_length = T.randint(18, 22, (10,)) - x_mask = sequence_mask(x_length).unsqueeze(1).long() + x_mask = sequence_mask(x_length, max_len=21).unsqueeze(1).long() durations = durations * x_mask.squeeze(1) y_length = durations.sum(1) y_mask = sequence_mask(y_length).unsqueeze(1).long() From 9f80e043e4746982371125a293d3b7427b043536 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 24 Jun 2024 13:28:14 +0200 Subject: [PATCH 122/255] refactor(freevc): use existing layernorm --- TTS/vc/modules/freevc/modules.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/modules/freevc/modules.py index 9bb5499003..da5bef8ab1 100644 --- a/TTS/vc/modules/freevc/modules.py +++ b/TTS/vc/modules/freevc/modules.py @@ -6,26 +6,12 @@ from torch.nn.utils.parametrize import remove_parametrizations import TTS.vc.modules.freevc.commons as commons +from TTS.tts.layers.generic.normalization import LayerNorm2 from TTS.vc.modules.freevc.commons import get_padding, init_weights LRELU_SLOPE = 0.1 -class LayerNorm(nn.Module): - def __init__(self, channels, eps=1e-5): - super().__init__() - self.channels = channels - self.eps = eps - - self.gamma = nn.Parameter(torch.ones(channels)) - self.beta = nn.Parameter(torch.zeros(channels)) - - def forward(self, x): - x = x.transpose(1, -1) - x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) - return x.transpose(1, -1) - - class ConvReluNorm(nn.Module): def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_layers, p_dropout): super().__init__() @@ -40,11 +26,11 @@ def __init__(self, in_channels, hidden_channels, out_channels, kernel_size, n_la self.conv_layers = nn.ModuleList() self.norm_layers = nn.ModuleList() self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2)) - self.norm_layers.append(LayerNorm(hidden_channels)) + self.norm_layers.append(LayerNorm2(hidden_channels)) self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout)) for _ in range(n_layers - 1): self.conv_layers.append(nn.Conv1d(hidden_channels, hidden_channels, kernel_size, padding=kernel_size // 2)) - self.norm_layers.append(LayerNorm(hidden_channels)) + self.norm_layers.append(LayerNorm2(hidden_channels)) self.proj = nn.Conv1d(hidden_channels, out_channels, 1) self.proj.weight.data.zero_() self.proj.bias.data.zero_() @@ -83,8 +69,8 @@ def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0): nn.Conv1d(channels, channels, kernel_size, groups=channels, dilation=dilation, padding=padding) ) self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) - self.norms_1.append(LayerNorm(channels)) - self.norms_2.append(LayerNorm(channels)) + self.norms_1.append(LayerNorm2(channels)) + self.norms_2.append(LayerNorm2(channels)) def forward(self, x, x_mask, g=None): if g is not None: From d65bcf65bb9d8b48304b8c6ac0fe814e9df3581e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 24 Jun 2024 13:33:36 +0200 Subject: [PATCH 123/255] chore(freevc): remove duplicate DDSConv and ElementwiseAffine Already exist as: TTS.tts.layers.vits.stochastic_duration_predictor.DilatedDepthSeparableConv TTS.tts.layers.vits.stochastic_duration_predictor.ElementwiseAffine --- TTS/vc/modules/freevc/modules.py | 60 -------------------------------- 1 file changed, 60 deletions(-) diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/modules/freevc/modules.py index da5bef8ab1..d6f2acb8e2 100644 --- a/TTS/vc/modules/freevc/modules.py +++ b/TTS/vc/modules/freevc/modules.py @@ -45,48 +45,6 @@ def forward(self, x, x_mask): return x * x_mask -class DDSConv(nn.Module): - """ - Dialted and Depth-Separable Convolution - """ - - def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0): - super().__init__() - self.channels = channels - self.kernel_size = kernel_size - self.n_layers = n_layers - self.p_dropout = p_dropout - - self.drop = nn.Dropout(p_dropout) - self.convs_sep = nn.ModuleList() - self.convs_1x1 = nn.ModuleList() - self.norms_1 = nn.ModuleList() - self.norms_2 = nn.ModuleList() - for i in range(n_layers): - dilation = kernel_size**i - padding = (kernel_size * dilation - dilation) // 2 - self.convs_sep.append( - nn.Conv1d(channels, channels, kernel_size, groups=channels, dilation=dilation, padding=padding) - ) - self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) - self.norms_1.append(LayerNorm2(channels)) - self.norms_2.append(LayerNorm2(channels)) - - def forward(self, x, x_mask, g=None): - if g is not None: - x = x + g - for i in range(self.n_layers): - y = self.convs_sep[i](x * x_mask) - y = self.norms_1[i](y) - y = F.gelu(y) - y = self.convs_1x1[i](y) - y = self.norms_2[i](y) - y = F.gelu(y) - y = self.drop(y) - x = x + y - return x * x_mask - - class WN(torch.nn.Module): def __init__(self, hidden_channels, kernel_size, dilation_rate, n_layers, gin_channels=0, p_dropout=0): super(WN, self).__init__() @@ -303,24 +261,6 @@ def forward(self, x, *args, reverse=False, **kwargs): return x -class ElementwiseAffine(nn.Module): - def __init__(self, channels): - super().__init__() - self.channels = channels - self.m = nn.Parameter(torch.zeros(channels, 1)) - self.logs = nn.Parameter(torch.zeros(channels, 1)) - - def forward(self, x, x_mask, reverse=False, **kwargs): - if not reverse: - y = self.m + torch.exp(self.logs) * x - y = y * x_mask - logdet = torch.sum(self.logs * x_mask, [1, 2]) - return y, logdet - else: - x = (x - self.m) * torch.exp(-self.logs) * x_mask - return x - - class ResidualCouplingLayer(nn.Module): def __init__( self, From cd7b6daf460a408c0b21e74ab108989b062ee77e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 25 Jun 2024 22:09:19 +0200 Subject: [PATCH 124/255] fix: clarify types, fix missing functions --- TTS/model.py | 18 ++++++++++----- TTS/tts/models/base_tts.py | 2 +- TTS/vc/models/base_vc.py | 46 ++++++++++++++++++++------------------ 3 files changed, 37 insertions(+), 29 deletions(-) diff --git a/TTS/model.py b/TTS/model.py index ae6be7b444..01dd515d81 100644 --- a/TTS/model.py +++ b/TTS/model.py @@ -1,5 +1,6 @@ +import os from abc import abstractmethod -from typing import Dict +from typing import Any, Union import torch from coqpit import Coqpit @@ -16,7 +17,7 @@ class BaseTrainerModel(TrainerModel): @staticmethod @abstractmethod - def init_from_config(config: Coqpit): + def init_from_config(config: Coqpit) -> "BaseTrainerModel": """Init the model and all its attributes from the given config. Override this depending on your model. @@ -24,7 +25,7 @@ def init_from_config(config: Coqpit): ... @abstractmethod - def inference(self, input: torch.Tensor, aux_input={}) -> Dict: + def inference(self, input: torch.Tensor, aux_input: dict[str, Any] = {}) -> dict[str, Any]: """Forward pass for inference. It must return a dictionary with the main model output and all the auxiliary outputs. The key ```model_outputs``` @@ -45,13 +46,18 @@ def inference(self, input: torch.Tensor, aux_input={}) -> Dict: @abstractmethod def load_checkpoint( - self, config: Coqpit, checkpoint_path: str, eval: bool = False, strict: bool = True, cache=False + self, + config: Coqpit, + checkpoint_path: Union[str, os.PathLike[Any]], + eval: bool = False, + strict: bool = True, + cache: bool = False, ) -> None: - """Load a model checkpoint gile and get ready for training or inference. + """Load a model checkpoint file and get ready for training or inference. Args: config (Coqpit): Model configuration. - checkpoint_path (str): Path to the model checkpoint file. + checkpoint_path (str | os.PathLike): Path to the model checkpoint file. eval (bool, optional): If true, init model for inference else for training. Defaults to False. strict (bool, optional): Match all checkpoint keys to model's keys. Defaults to True. cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False. diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index 7fbc2a3a78..ccb023ce84 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -144,7 +144,7 @@ def get_aux_input_from_test_sentences(self, sentence_info): if speaker_name is None: d_vector = self.speaker_manager.get_random_embedding() else: - d_vector = self.speaker_manager.get_d_vector_by_name(speaker_name) + d_vector = self.speaker_manager.get_mean_embedding(speaker_name) elif config.use_speaker_embedding: if speaker_name is None: speaker_id = self.speaker_manager.get_random_id() diff --git a/TTS/vc/models/base_vc.py b/TTS/vc/models/base_vc.py index c387157f19..22ffd0095c 100644 --- a/TTS/vc/models/base_vc.py +++ b/TTS/vc/models/base_vc.py @@ -1,7 +1,7 @@ import logging import os import random -from typing import Dict, List, Tuple, Union +from typing import Any, Optional, Union import torch import torch.distributed as dist @@ -10,6 +10,7 @@ from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler from trainer.torch import DistributedSampler, DistributedSamplerWrapper +from trainer.trainer import Trainer from TTS.model import BaseTrainerModel from TTS.tts.datasets.dataset import TTSDataset @@ -18,6 +19,7 @@ from TTS.tts.utils.speakers import SpeakerManager, get_speaker_balancer_weights from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.visual import plot_alignment, plot_spectrogram +from TTS.utils.audio.processor import AudioProcessor # pylint: skip-file @@ -35,10 +37,10 @@ class BaseVC(BaseTrainerModel): def __init__( self, config: Coqpit, - ap: "AudioProcessor", - speaker_manager: SpeakerManager = None, - language_manager: LanguageManager = None, - ): + ap: AudioProcessor, + speaker_manager: Optional[SpeakerManager] = None, + language_manager: Optional[LanguageManager] = None, + ) -> None: super().__init__() self.config = config self.ap = ap @@ -46,7 +48,7 @@ def __init__( self.language_manager = language_manager self._set_model_args(config) - def _set_model_args(self, config: Coqpit): + def _set_model_args(self, config: Coqpit) -> None: """Setup model args based on the config type (`ModelConfig` or `ModelArgs`). `ModelArgs` has all the fields reuqired to initialize the model architecture. @@ -67,7 +69,7 @@ def _set_model_args(self, config: Coqpit): else: raise ValueError("config must be either a *Config or *Args") - def init_multispeaker(self, config: Coqpit, data: List = None): + def init_multispeaker(self, config: Coqpit, data: Optional[list[Any]] = None) -> None: """Initialize a speaker embedding layer if needen and define expected embedding channel size for defining `in_channels` size of the connected layers. @@ -100,11 +102,11 @@ def init_multispeaker(self, config: Coqpit, data: List = None): self.speaker_embedding = nn.Embedding(self.num_speakers, self.embedded_speaker_dim) self.speaker_embedding.weight.data.normal_(0, 0.3) - def get_aux_input(self, **kwargs) -> Dict: + def get_aux_input(self, **kwargs: Any) -> dict[str, Any]: """Prepare and return `aux_input` used by `forward()`""" return {"speaker_id": None, "style_wav": None, "d_vector": None, "language_id": None} - def get_aux_input_from_test_sentences(self, sentence_info): + def get_aux_input_from_test_sentences(self, sentence_info: Union[str, list[str]]) -> dict[str, Any]: if hasattr(self.config, "model_args"): config = self.config.model_args else: @@ -132,7 +134,7 @@ def get_aux_input_from_test_sentences(self, sentence_info): if speaker_name is None: d_vector = self.speaker_manager.get_random_embedding() else: - d_vector = self.speaker_manager.get_d_vector_by_name(speaker_name) + d_vector = self.speaker_manager.get_mean_embedding(speaker_name) elif config.use_speaker_embedding: if speaker_name is None: speaker_id = self.speaker_manager.get_random_id() @@ -151,16 +153,16 @@ def get_aux_input_from_test_sentences(self, sentence_info): "language_id": language_id, } - def format_batch(self, batch: Dict) -> Dict: + def format_batch(self, batch: dict[str, Any]) -> dict[str, Any]: """Generic batch formatting for `VCDataset`. You must override this if you use a custom dataset. Args: - batch (Dict): [description] + batch (dict): [description] Returns: - Dict: [description] + dict: [description] """ # setup input batch text_input = batch["token_id"] @@ -230,7 +232,7 @@ def format_batch(self, batch: Dict) -> Dict: "audio_unique_names": batch["audio_unique_names"], } - def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): + def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus: int = 1): weights = None data_items = dataset.samples @@ -271,12 +273,12 @@ def get_sampler(self, config: Coqpit, dataset: TTSDataset, num_gpus=1): def get_data_loader( self, config: Coqpit, - assets: Dict, + assets: dict, is_eval: bool, - samples: Union[List[Dict], List[List]], + samples: Union[list[dict], list[list]], verbose: bool, num_gpus: int, - rank: int = None, + rank: Optional[int] = None, ) -> "DataLoader": if is_eval and not config.run_eval: loader = None @@ -352,9 +354,9 @@ def get_data_loader( def _get_test_aux_input( self, - ) -> Dict: + ) -> dict[str, Any]: d_vector = None - if self.config.use_d_vector_file: + if self.speaker_manager is not None and self.config.use_d_vector_file: d_vector = [self.speaker_manager.embeddings[name]["embedding"] for name in self.speaker_manager.embeddings] d_vector = (random.sample(sorted(d_vector), 1),) @@ -369,7 +371,7 @@ def _get_test_aux_input( } return aux_inputs - def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: + def test_run(self, assets: dict) -> tuple[dict, dict]: """Generic test run for `vc` models used by `Trainer`. You can override this for a different behaviour. @@ -378,7 +380,7 @@ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: assets (dict): A dict of training assets. For `vc` models, it must include `{'audio_processor': ap}`. Returns: - Tuple[Dict, Dict]: Test figures and audios to be projected to Tensorboard. + tuple[dict, dict]: Test figures and audios to be projected to Tensorboard. """ logger.info("Synthesizing test sentences.") test_audios = {} @@ -409,7 +411,7 @@ def test_run(self, assets: Dict) -> Tuple[Dict, Dict]: ) return test_figures, test_audios - def on_init_start(self, trainer): + def on_init_start(self, trainer: Trainer) -> None: """Save the speaker.pth and language_ids.json at the beginning of the training. Also update both paths.""" if self.speaker_manager is not None: output_path = os.path.join(trainer.output_path, "speakers.pth") From f8df19a10ced1104f3a20b8e58002db51d02c9f4 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 20 Jun 2024 13:57:06 +0200 Subject: [PATCH 125/255] refactor: remove duplicate convert_pad_shape --- TTS/tts/layers/glow_tts/transformer.py | 11 +++-------- TTS/tts/layers/vits/networks.py | 6 ------ TTS/tts/utils/helpers.py | 5 ++--- TTS/vc/modules/freevc/commons.py | 8 ++------ 4 files changed, 7 insertions(+), 23 deletions(-) diff --git a/TTS/tts/layers/glow_tts/transformer.py b/TTS/tts/layers/glow_tts/transformer.py index 02688d611f..c97d070a95 100644 --- a/TTS/tts/layers/glow_tts/transformer.py +++ b/TTS/tts/layers/glow_tts/transformer.py @@ -5,6 +5,7 @@ from torch.nn import functional as F from TTS.tts.layers.generic.normalization import LayerNorm, LayerNorm2 +from TTS.tts.utils.helpers import convert_pad_shape class RelativePositionMultiHeadAttention(nn.Module): @@ -300,7 +301,7 @@ def _causal_padding(self, x): pad_l = self.kernel_size - 1 pad_r = 0 padding = [[0, 0], [0, 0], [pad_l, pad_r]] - x = F.pad(x, self._pad_shape(padding)) + x = F.pad(x, convert_pad_shape(padding)) return x def _same_padding(self, x): @@ -309,15 +310,9 @@ def _same_padding(self, x): pad_l = (self.kernel_size - 1) // 2 pad_r = self.kernel_size // 2 padding = [[0, 0], [0, 0], [pad_l, pad_r]] - x = F.pad(x, self._pad_shape(padding)) + x = F.pad(x, convert_pad_shape(padding)) return x - @staticmethod - def _pad_shape(padding): - l = padding[::-1] - pad_shape = [item for sublist in l for item in sublist] - return pad_shape - class RelativePositionTransformer(nn.Module): """Transformer with Relative Potional Encoding. diff --git a/TTS/tts/layers/vits/networks.py b/TTS/tts/layers/vits/networks.py index f97b584fe6..cb7ff3c80b 100644 --- a/TTS/tts/layers/vits/networks.py +++ b/TTS/tts/layers/vits/networks.py @@ -10,12 +10,6 @@ LRELU_SLOPE = 0.1 -def convert_pad_shape(pad_shape): - l = pad_shape[::-1] - pad_shape = [item for sublist in l for item in sublist] - return pad_shape - - def init_weights(m, mean=0.0, std=0.01): classname = m.__class__.__name__ if classname.find("Conv") != -1: diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py index 7b37201f84..7429d0fcc8 100644 --- a/TTS/tts/utils/helpers.py +++ b/TTS/tts/utils/helpers.py @@ -145,10 +145,9 @@ def average_over_durations(values, durs): return avg -def convert_pad_shape(pad_shape): +def convert_pad_shape(pad_shape: list[list]) -> list: l = pad_shape[::-1] - pad_shape = [item for sublist in l for item in sublist] - return pad_shape + return [item for sublist in l for item in sublist] def generate_path(duration, mask): diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py index e5fb13c11c..e78135131c 100644 --- a/TTS/vc/modules/freevc/commons.py +++ b/TTS/vc/modules/freevc/commons.py @@ -3,6 +3,8 @@ import torch from torch.nn import functional as F +from TTS.tts.utils.helpers import convert_pad_shape + def init_weights(m, mean=0.0, std=0.01): classname = m.__class__.__name__ @@ -14,12 +16,6 @@ def get_padding(kernel_size, dilation=1): return int((kernel_size * dilation - dilation) / 2) -def convert_pad_shape(pad_shape): - l = pad_shape[::-1] - pad_shape = [item for sublist in l for item in sublist] - return pad_shape - - def intersperse(lst, item): result = [item] * (len(lst) * 2 + 1) result[1::2] = lst From a755328e4965f8b4d9ff033853e1048b25141865 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 20 Jun 2024 14:05:19 +0200 Subject: [PATCH 126/255] refactor(freevc): remove duplicate sequence_mask --- TTS/vc/models/freevc.py | 3 ++- TTS/vc/modules/freevc/commons.py | 11 +---------- 2 files changed, 3 insertions(+), 11 deletions(-) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index ec7cc0e0a6..36f4017c4e 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -14,6 +14,7 @@ import TTS.vc.modules.freevc.commons as commons import TTS.vc.modules.freevc.modules as modules +from TTS.tts.utils.helpers import sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.utils.io import load_fsspec from TTS.vc.configs.freevc_config import FreeVCConfig @@ -80,7 +81,7 @@ def __init__( self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) def forward(self, x, x_lengths, g=None): - x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x_mask = torch.unsqueeze(sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) x = self.pre(x) * x_mask x = self.enc(x, x_mask, g=g) stats = self.proj(x) * x_mask diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py index e78135131c..898728009e 100644 --- a/TTS/vc/modules/freevc/commons.py +++ b/TTS/vc/modules/freevc/commons.py @@ -3,7 +3,7 @@ import torch from torch.nn import functional as F -from TTS.tts.utils.helpers import convert_pad_shape +from TTS.tts.utils.helpers import convert_pad_shape, sequence_mask def init_weights(m, mean=0.0, std=0.01): @@ -115,20 +115,11 @@ def shift_1d(x): return x -def sequence_mask(length, max_length=None): - if max_length is None: - max_length = length.max() - x = torch.arange(max_length, dtype=length.dtype, device=length.device) - return x.unsqueeze(0) < length.unsqueeze(1) - - def generate_path(duration, mask): """ duration: [b, 1, t_x] mask: [b, 1, t_y, t_x] """ - device = duration.device - b, _, t_y, t_x = mask.shape cum_duration = torch.cumsum(duration, -1) From c5241d71ab0628261acb070cf339b8cd1a52f32e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 26 Jun 2024 00:24:04 +0200 Subject: [PATCH 127/255] chore: address pytorch deprecations torch.range(a, b) == torch.arange(a, b+1) meshgrid indexing: https://github.com/pytorch/pytorch/issues/50276 checkpoint use_reentrant: https://dev-discuss.pytorch.org/t/bc-breaking-update-to-torch-utils-checkpoint-not-passing-in-use-reentrant-flag-will-raise-an-error/1745 optimizer.step() before scheduler.step(): https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate --- TTS/bin/train_encoder.py | 7 ++++--- TTS/encoder/dataset.py | 2 +- TTS/tts/layers/losses.py | 2 +- TTS/tts/layers/overflow/neural_hmm.py | 3 ++- tests/tts_tests/test_helpers.py | 2 +- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index c0292743bf..49b450cf82 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -161,9 +161,6 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, loader_time = time.time() - end_time global_step += 1 - # setup lr - if c.lr_decay: - scheduler.step() optimizer.zero_grad() # dispatch data to GPU @@ -182,6 +179,10 @@ def train(model, optimizer, scheduler, criterion, data_loader, eval_data_loader, grad_norm, _ = check_update(model, c.grad_clip) optimizer.step() + # setup lr + if c.lr_decay: + scheduler.step() + step_time = time.time() - start_time epoch_time += step_time diff --git a/TTS/encoder/dataset.py b/TTS/encoder/dataset.py index 81385c6c1f..bb780e3c1d 100644 --- a/TTS/encoder/dataset.py +++ b/TTS/encoder/dataset.py @@ -55,7 +55,7 @@ def __init__( logger.info(" | Number of instances: %d", len(self.items)) logger.info(" | Sequence length: %d", self.seq_len) logger.info(" | Number of classes: %d", len(self.classes)) - logger.info(" | Classes: %d", self.classes) + logger.info(" | Classes: %s", self.classes) def load_wav(self, filename): audio = self.ap.load_wav(filename, sr=self.ap.sample_rate) diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index cd6cd0aeb2..5ebed81dda 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -255,7 +255,7 @@ def forward(self, att_ws, ilens, olens): @staticmethod def _make_ga_mask(ilen, olen, sigma): - grid_x, grid_y = torch.meshgrid(torch.arange(olen).to(olen), torch.arange(ilen).to(ilen)) + grid_x, grid_y = torch.meshgrid(torch.arange(olen).to(olen), torch.arange(ilen).to(ilen), indexing="ij") grid_x, grid_y = grid_x.float(), grid_y.float() return 1.0 - torch.exp(-((grid_y / ilen - grid_x / olen) ** 2) / (2 * (sigma**2))) diff --git a/TTS/tts/layers/overflow/neural_hmm.py b/TTS/tts/layers/overflow/neural_hmm.py index 0631ba98c0..a12becef03 100644 --- a/TTS/tts/layers/overflow/neural_hmm.py +++ b/TTS/tts/layers/overflow/neural_hmm.py @@ -128,7 +128,8 @@ def forward(self, inputs, inputs_len, mels, mel_lens): # Get mean, std and transition vector from decoder for this timestep # Note: Gradient checkpointing currently doesn't works with multiple gpus inside a loop if self.use_grad_checkpointing and self.training: - mean, std, transition_vector = checkpoint(self.output_net, h_memory, inputs) + # TODO: use_reentrant=False is recommended + mean, std, transition_vector = checkpoint(self.output_net, h_memory, inputs, use_reentrant=True) else: mean, std, transition_vector = self.output_net(h_memory, inputs) diff --git a/tests/tts_tests/test_helpers.py b/tests/tts_tests/test_helpers.py index dbd7f54eb4..d07efa3620 100644 --- a/tests/tts_tests/test_helpers.py +++ b/tests/tts_tests/test_helpers.py @@ -31,7 +31,7 @@ def test_sequence_mask(): def test_segment(): - x = T.range(0, 11) + x = T.arange(0, 12) x = x.repeat(8, 1).unsqueeze(1) segment_ids = T.randint(0, 7, (8,)) From c30fb0f56bfccc9aabec938dcc65e09f6957dccf Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 26 Jun 2024 11:46:37 +0200 Subject: [PATCH 128/255] chore: remove duplicate init_weights --- TTS/tts/layers/vits/networks.py | 6 ------ TTS/tts/models/delightful_tts.py | 6 ------ TTS/vc/modules/freevc/commons.py | 2 +- 3 files changed, 1 insertion(+), 13 deletions(-) diff --git a/TTS/tts/layers/vits/networks.py b/TTS/tts/layers/vits/networks.py index cb7ff3c80b..04a1cd2488 100644 --- a/TTS/tts/layers/vits/networks.py +++ b/TTS/tts/layers/vits/networks.py @@ -10,12 +10,6 @@ LRELU_SLOPE = 0.1 -def init_weights(m, mean=0.0, std=0.01): - classname = m.__class__.__name__ - if classname.find("Conv") != -1: - m.weight.data.normal_(mean, std) - - def get_padding(kernel_size, dilation=1): return int((kernel_size * dilation - dilation) / 2) diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index ed318923e9..4230fcc33d 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -88,12 +88,6 @@ def pad(input_ele: List[torch.Tensor], max_len: int) -> torch.Tensor: return out_padded -def init_weights(m: nn.Module, mean: float = 0.0, std: float = 0.01): - classname = m.__class__.__name__ - if classname.find("Conv") != -1: - m.weight.data.normal_(mean, std) - - def stride_lens(lens: torch.Tensor, stride: int = 2) -> torch.Tensor: return torch.ceil(lens / stride).int() diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py index 898728009e..587612f637 100644 --- a/TTS/vc/modules/freevc/commons.py +++ b/TTS/vc/modules/freevc/commons.py @@ -6,7 +6,7 @@ from TTS.tts.utils.helpers import convert_pad_shape, sequence_mask -def init_weights(m, mean=0.0, std=0.01): +def init_weights(m: torch.nn.Module, mean: float = 0.0, std: float = 0.01) -> None: classname = m.__class__.__name__ if classname.find("Conv") != -1: m.weight.data.normal_(mean, std) From 4bd3df26072939bbc2b8b242528d0d4285014156 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 26 Jun 2024 11:54:36 +0200 Subject: [PATCH 129/255] refactor: remove duplicate get_padding --- TTS/tts/layers/vits/networks.py | 4 ---- TTS/tts/layers/xtts/hifigan_decoder.py | 5 +---- TTS/vc/models/freevc.py | 3 ++- TTS/vc/modules/freevc/commons.py | 4 ---- TTS/vc/modules/freevc/modules.py | 3 ++- TTS/vocoder/models/hifigan_discriminator.py | 3 ++- TTS/vocoder/models/hifigan_generator.py | 4 ++-- 7 files changed, 9 insertions(+), 17 deletions(-) diff --git a/TTS/tts/layers/vits/networks.py b/TTS/tts/layers/vits/networks.py index 04a1cd2488..50ed1024de 100644 --- a/TTS/tts/layers/vits/networks.py +++ b/TTS/tts/layers/vits/networks.py @@ -10,10 +10,6 @@ LRELU_SLOPE = 0.1 -def get_padding(kernel_size, dilation=1): - return int((kernel_size * dilation - dilation) / 2) - - class TextEncoder(nn.Module): def __init__( self, diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 42f64e6807..9160529bf9 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -9,16 +9,13 @@ from torch.nn.utils.parametrize import remove_parametrizations from TTS.utils.io import load_fsspec +from TTS.vocoder.models.hifigan_generator import get_padding logger = logging.getLogger(__name__) LRELU_SLOPE = 0.1 -def get_padding(k, d): - return int((k * d - d) / 2) - - class ResBlock1(torch.nn.Module): """Residual Block Type 1. It has 3 convolutional layers in each convolutional block. diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 36f4017c4e..7746572f23 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -19,10 +19,11 @@ from TTS.utils.io import load_fsspec from TTS.vc.configs.freevc_config import FreeVCConfig from TTS.vc.models.base_vc import BaseVC -from TTS.vc.modules.freevc.commons import get_padding, init_weights +from TTS.vc.modules.freevc.commons import init_weights from TTS.vc.modules.freevc.mel_processing import mel_spectrogram_torch from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx from TTS.vc.modules.freevc.wavlm import get_wavlm +from TTS.vocoder.models.hifigan_generator import get_padding logger = logging.getLogger(__name__) diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py index 587612f637..feea7f34dc 100644 --- a/TTS/vc/modules/freevc/commons.py +++ b/TTS/vc/modules/freevc/commons.py @@ -12,10 +12,6 @@ def init_weights(m: torch.nn.Module, mean: float = 0.0, std: float = 0.01) -> No m.weight.data.normal_(mean, std) -def get_padding(kernel_size, dilation=1): - return int((kernel_size * dilation - dilation) / 2) - - def intersperse(lst, item): result = [item] * (len(lst) * 2 + 1) result[1::2] = lst diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/modules/freevc/modules.py index d6f2acb8e2..722444a303 100644 --- a/TTS/vc/modules/freevc/modules.py +++ b/TTS/vc/modules/freevc/modules.py @@ -7,7 +7,8 @@ import TTS.vc.modules.freevc.commons as commons from TTS.tts.layers.generic.normalization import LayerNorm2 -from TTS.vc.modules.freevc.commons import get_padding, init_weights +from TTS.vc.modules.freevc.commons import init_weights +from TTS.vocoder.models.hifigan_generator import get_padding LRELU_SLOPE = 0.1 diff --git a/TTS/vocoder/models/hifigan_discriminator.py b/TTS/vocoder/models/hifigan_discriminator.py index 7447a5fbc4..1cbc6ab357 100644 --- a/TTS/vocoder/models/hifigan_discriminator.py +++ b/TTS/vocoder/models/hifigan_discriminator.py @@ -3,6 +3,8 @@ from torch import nn from torch.nn import functional as F +from TTS.vocoder.models.hifigan_generator import get_padding + LRELU_SLOPE = 0.1 @@ -29,7 +31,6 @@ class DiscriminatorP(torch.nn.Module): def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): super().__init__() self.period = period - get_padding = lambda k, d: int((k * d - d) / 2) norm_f = nn.utils.spectral_norm if use_spectral_norm else nn.utils.parametrizations.weight_norm self.convs = nn.ModuleList( [ diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py index b9561f6ff6..083ce344fb 100644 --- a/TTS/vocoder/models/hifigan_generator.py +++ b/TTS/vocoder/models/hifigan_generator.py @@ -15,8 +15,8 @@ LRELU_SLOPE = 0.1 -def get_padding(k, d): - return int((k * d - d) / 2) +def get_padding(kernel_size: int, dilation: int = 1) -> int: + return int((kernel_size * dilation - dilation) / 2) class ResBlock1(torch.nn.Module): From 59ef28d70833913cd1163b14b4ef373b0e2725a5 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 26 Jun 2024 23:53:17 +0200 Subject: [PATCH 130/255] build: move umap-learn into optional notebook dependencies Except for notebooks, it's only used to show embedding plots during speaker encoder training, in which case a warning is now shown to install it. --- TTS/bin/train_encoder.py | 14 +++++++++----- TTS/encoder/utils/visual.py | 5 ++++- pyproject.toml | 3 +-- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index 49b450cf82..ba03c42b6d 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -6,6 +6,7 @@ import sys import time import traceback +import warnings import torch from torch.utils.data import DataLoader @@ -116,11 +117,14 @@ def evaluation(model, criterion, data_loader, global_step): eval_avg_loss = eval_loss / len(data_loader) # save stats dashboard_logger.eval_stats(global_step, {"loss": eval_avg_loss}) - # plot the last batch in the evaluation - figures = { - "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch), - } - dashboard_logger.eval_figures(global_step, figures) + try: + # plot the last batch in the evaluation + figures = { + "UMAP Plot": plot_embeddings(outputs.detach().cpu().numpy(), c.num_classes_in_batch), + } + dashboard_logger.eval_figures(global_step, figures) + except ImportError: + warnings.warn("Install the `umap-learn` package to see embedding plots.") return eval_avg_loss diff --git a/TTS/encoder/utils/visual.py b/TTS/encoder/utils/visual.py index 6575b86ec2..bfe40605df 100644 --- a/TTS/encoder/utils/visual.py +++ b/TTS/encoder/utils/visual.py @@ -1,7 +1,6 @@ import matplotlib import matplotlib.pyplot as plt import numpy as np -import umap matplotlib.use("Agg") @@ -30,6 +29,10 @@ def plot_embeddings(embeddings, num_classes_in_batch): + try: + import umap + except ImportError as e: + raise ImportError("Package not installed: umap-learn") from e num_utter_per_class = embeddings.shape[0] // num_classes_in_batch # if necessary get just the first 10 classes diff --git a/pyproject.toml b/pyproject.toml index dad0d5ed0d..93486ff03a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -58,8 +58,6 @@ dependencies = [ "packaging>=23.1", # Inference "pysbd>=0.3.4", - # Notebooks - "umap-learn>=0.5.1", # Training "matplotlib>=3.7.0", # Coqui stack @@ -100,6 +98,7 @@ docs = [ notebooks = [ "bokeh==1.4.0", "pandas>=1.4,<2.0", + "umap-learn>=0.5.1", ] # For running the TTS server server = ["flask>=2.0.1"] From c693b088301aa9f4167ddf4084763e3a5575aef8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 27 Jun 2024 10:10:34 +0200 Subject: [PATCH 131/255] build: update trainer to 0.1.4 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dad0d5ed0d..424ef52196 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,7 +63,7 @@ dependencies = [ # Training "matplotlib>=3.7.0", # Coqui stack - "coqui-tts-trainer>=0.1", + "coqui-tts-trainer>=0.1.4", "coqpit>=0.0.16", # Gruut + supported languages "gruut[de,es,fr]==2.2.3", From 28296c6458d02c41f7d76e395577ac7c9b1fa62a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 27 Jun 2024 10:44:59 +0200 Subject: [PATCH 132/255] refactor: use get_git_branch from trainer --- TTS/encoder/utils/training.py | 3 +-- TTS/utils/generic_utils.py | 14 -------------- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py index 7692478d6b..f9088003fc 100644 --- a/TTS/encoder/utils/training.py +++ b/TTS/encoder/utils/training.py @@ -3,14 +3,13 @@ from coqpit import Coqpit from trainer import TrainerArgs, get_last_checkpoint -from trainer.generic_utils import get_experiment_folder_path +from trainer.generic_utils import get_experiment_folder_path, get_git_branch from trainer.io import copy_model_files from trainer.logging import logger_factory from trainer.logging.console_logger import ConsoleLogger from TTS.config import load_config, register_config from TTS.tts.utils.text.characters import parse_symbols -from TTS.utils.generic_utils import get_git_branch @dataclass diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 48c090715f..b956eb119f 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -4,7 +4,6 @@ import logging import os import re -import subprocess import sys from pathlib import Path from typing import Dict, Optional @@ -12,19 +11,6 @@ logger = logging.getLogger(__name__) -# TODO: This method is duplicated in Trainer but out of date there -def get_git_branch(): - try: - out = subprocess.check_output(["git", "branch"]).decode("utf8") - current = next(line for line in out.split("\n") if line.startswith("*")) - current.replace("* ", "") - except subprocess.CalledProcessError: - current = "inside_docker" - except (FileNotFoundError, StopIteration) as e: - current = "unknown" - return current - - def to_camel(text): text = text.capitalize() text = re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) From 0fb26f97df3c32fbce50fd48fe2b332b2bdc98b1 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 27 Jun 2024 10:46:15 +0200 Subject: [PATCH 133/255] refactor: use get_user_data_dir from trainer --- TTS/model.py | 3 ++- TTS/tts/configs/bark_config.py | 3 ++- TTS/tts/models/base_tacotron.py | 3 ++- TTS/utils/generic_utils.py | 24 ------------------------ TTS/utils/io.py | 3 +-- TTS/utils/manage.py | 2 +- TTS/vc/modules/freevc/wavlm/__init__.py | 2 +- tests/zoo_tests/test_models.py | 2 +- 8 files changed, 10 insertions(+), 32 deletions(-) diff --git a/TTS/model.py b/TTS/model.py index 01dd515d81..c3707c85ae 100644 --- a/TTS/model.py +++ b/TTS/model.py @@ -60,6 +60,7 @@ def load_checkpoint( checkpoint_path (str | os.PathLike): Path to the model checkpoint file. eval (bool, optional): If true, init model for inference else for training. Defaults to False. strict (bool, optional): Match all checkpoint keys to model's keys. Defaults to True. - cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False. + cache (bool, optional): If True, cache the file locally for subsequent calls. + It is cached under `trainer.io.get_user_data_dir()/tts_cache`. Defaults to False. """ ... diff --git a/TTS/tts/configs/bark_config.py b/TTS/tts/configs/bark_config.py index 4d1cd1374a..3b893558aa 100644 --- a/TTS/tts/configs/bark_config.py +++ b/TTS/tts/configs/bark_config.py @@ -2,11 +2,12 @@ from dataclasses import dataclass, field from typing import Dict +from trainer.io import get_user_data_dir + from TTS.tts.configs.shared_configs import BaseTTSConfig from TTS.tts.layers.bark.model import GPTConfig from TTS.tts.layers.bark.model_fine import FineGPTConfig from TTS.tts.models.bark import BarkAudioConfig -from TTS.utils.generic_utils import get_user_data_dir @dataclass diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py index 33e1c11ab7..58b0e2c6fe 100644 --- a/TTS/tts/models/base_tacotron.py +++ b/TTS/tts/models/base_tacotron.py @@ -103,7 +103,8 @@ def load_checkpoint( config (Coqpi): model configuration. checkpoint_path (str): path to checkpoint file. eval (bool, optional): whether to load model for evaluation. - cache (bool, optional): If True, cache the file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to False. + cache (bool, optional): If True, cache the file locally for subsequent calls. + It is cached under `trainer.io.get_user_data_dir()/tts_cache`. Defaults to False. """ state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) self.load_state_dict(state["model"]) diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index b956eb119f..91f8844262 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -2,9 +2,7 @@ import datetime import importlib import logging -import os import re -import sys from pathlib import Path from typing import Dict, Optional @@ -53,28 +51,6 @@ def get_import_path(obj: object) -> str: return ".".join([type(obj).__module__, type(obj).__name__]) -def get_user_data_dir(appname): - TTS_HOME = os.environ.get("TTS_HOME") - XDG_DATA_HOME = os.environ.get("XDG_DATA_HOME") - if TTS_HOME is not None: - ans = Path(TTS_HOME).expanduser().resolve(strict=False) - elif XDG_DATA_HOME is not None: - ans = Path(XDG_DATA_HOME).expanduser().resolve(strict=False) - elif sys.platform == "win32": - import winreg # pylint: disable=import-outside-toplevel - - key = winreg.OpenKey( - winreg.HKEY_CURRENT_USER, r"Software\Microsoft\Windows\CurrentVersion\Explorer\Shell Folders" - ) - dir_, _ = winreg.QueryValueEx(key, "Local AppData") - ans = Path(dir_).resolve(strict=False) - elif sys.platform == "darwin": - ans = Path("~/Library/Application Support/").expanduser() - else: - ans = Path.home().joinpath(".local/share") - return ans.joinpath(appname) - - def set_init_dict(model_dict, checkpoint_state, c): # Partial initialization: if there is a mismatch with new and old layer, it is skipped. for k, v in checkpoint_state.items(): diff --git a/TTS/utils/io.py b/TTS/utils/io.py index 3107ba661b..a837f186b8 100644 --- a/TTS/utils/io.py +++ b/TTS/utils/io.py @@ -4,8 +4,7 @@ import fsspec import torch - -from TTS.utils.generic_utils import get_user_data_dir +from trainer.io import get_user_data_dir class RenamingUnpickler(pickle_tts.Unpickler): diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index d4781d54e6..fb5071d9b0 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -11,9 +11,9 @@ import fsspec import requests from tqdm import tqdm +from trainer.io import get_user_data_dir from TTS.config import load_config, read_json_with_comments -from TTS.utils.generic_utils import get_user_data_dir logger = logging.getLogger(__name__) diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/modules/freevc/wavlm/__init__.py index 0033d22c48..03b2f5827b 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/modules/freevc/wavlm/__init__.py @@ -3,8 +3,8 @@ import urllib.request import torch +from trainer.io import get_user_data_dir -from TTS.utils.generic_utils import get_user_data_dir from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig logger = logging.getLogger(__name__) diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index 1c28e8609c..b944423988 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -4,11 +4,11 @@ import shutil import torch +from trainer.io import get_user_data_dir from tests import get_tests_data_path, get_tests_output_path, run_cli from TTS.tts.utils.languages import LanguageManager from TTS.tts.utils.speakers import SpeakerManager -from TTS.utils.generic_utils import get_user_data_dir from TTS.utils.manage import ModelManager MODELS_WITH_SEP_TESTS = [ From da82d55329ec287e48d115986f2cc3a724f1275f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 27 Jun 2024 11:10:34 +0200 Subject: [PATCH 134/255] refactor: use load_fsspec from trainer Made automatically with: rg "from TTS.utils.io import load_fsspec" --files-with-matches | xargs sed -i 's/from TTS.utils.io import load_fsspec/from trainer.io import load_fsspec/g' --- TTS/encoder/models/base_encoder.py | 2 +- TTS/tts/layers/xtts/hifigan_decoder.py | 2 +- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 2 +- TTS/tts/models/align_tts.py | 2 +- TTS/tts/models/base_tacotron.py | 2 +- TTS/tts/models/delightful_tts.py | 2 +- TTS/tts/models/forward_tts.py | 2 +- TTS/tts/models/glow_tts.py | 2 +- TTS/tts/models/neuralhmm_tts.py | 2 +- TTS/tts/models/overflow.py | 2 +- TTS/tts/models/vits.py | 2 +- TTS/tts/models/xtts.py | 2 +- TTS/vc/models/freevc.py | 2 +- TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py | 2 +- TTS/vocoder/models/gan.py | 2 +- TTS/vocoder/models/hifigan_generator.py | 3 +-- TTS/vocoder/models/melgan_generator.py | 2 +- TTS/vocoder/models/parallel_wavegan_generator.py | 2 +- TTS/vocoder/models/wavegrad.py | 2 +- TTS/vocoder/models/wavernn.py | 2 +- 20 files changed, 20 insertions(+), 21 deletions(-) diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py index 374062463d..f7137c2186 100644 --- a/TTS/encoder/models/base_encoder.py +++ b/TTS/encoder/models/base_encoder.py @@ -5,10 +5,10 @@ import torchaudio from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from TTS.encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss from TTS.utils.generic_utils import set_init_dict -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 9160529bf9..b6032e5584 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -7,8 +7,8 @@ from torch.nn import functional as F from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vocoder.models.hifigan_generator import get_padding logger = logging.getLogger(__name__) diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 0f161324f8..04d123778b 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -7,6 +7,7 @@ import torchaudio from coqpit import Coqpit from torch.utils.data import DataLoader +from trainer.io import load_fsspec from trainer.torch import DistributedSampler from trainer.trainer_utils import get_optimizer, get_scheduler @@ -18,7 +19,6 @@ from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset from TTS.tts.models.base_tts import BaseTTS from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index 18b9cde385..2d27a57850 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -4,6 +4,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from TTS.tts.layers.align_tts.mdn import MDNBlock from TTS.tts.layers.feed_forward.decoder import Decoder @@ -15,7 +16,6 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram -from TTS.utils.io import load_fsspec @dataclass diff --git a/TTS/tts/models/base_tacotron.py b/TTS/tts/models/base_tacotron.py index 58b0e2c6fe..79cdf1a7d4 100644 --- a/TTS/tts/models/base_tacotron.py +++ b/TTS/tts/models/base_tacotron.py @@ -6,6 +6,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from TTS.tts.layers.losses import TacotronLoss from TTS.tts.models.base_tts import BaseTTS @@ -15,7 +16,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.generic_utils import format_aux_input -from TTS.utils.io import load_fsspec from TTS.utils.training import gradual_training_scheduler logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 4230fcc33d..a938a3a4ab 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -16,6 +16,7 @@ from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler +from trainer.io import load_fsspec from trainer.torch import DistributedSampler, DistributedSamplerWrapper from trainer.trainer_utils import get_optimizer, get_scheduler @@ -32,7 +33,6 @@ from TTS.utils.audio.numpy_transforms import db_to_amp as db_to_amp_numpy from TTS.utils.audio.numpy_transforms import mel_to_wav as mel_to_wav_numpy from TTS.utils.audio.processor import AudioProcessor -from TTS.utils.io import load_fsspec from TTS.vocoder.layers.losses import MultiScaleSTFTLoss from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index b108a554d5..4b74462dd5 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -6,6 +6,7 @@ from coqpit import Coqpit from torch import nn from torch.cuda.amp.autocast_mode import autocast +from trainer.io import load_fsspec from TTS.tts.layers.feed_forward.decoder import Decoder from TTS.tts.layers.feed_forward.encoder import Encoder @@ -17,7 +18,6 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_avg_energy, plot_avg_pitch, plot_spectrogram -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index a4ae012166..64954d283c 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -7,6 +7,7 @@ from torch import nn from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F +from trainer.io import load_fsspec from TTS.tts.configs.glow_tts_config import GlowTTSConfig from TTS.tts.layers.glow_tts.decoder import Decoder @@ -17,7 +18,6 @@ from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index d5bd9d1311..277369e644 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -5,6 +5,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from trainer.logging.tensorboard_logger import TensorboardLogger from TTS.tts.layers.overflow.common_layers import Encoder, OverflowUtils @@ -18,7 +19,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.generic_utils import format_aux_input -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index 0218d0452b..b05b75009b 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -5,6 +5,7 @@ import torch from coqpit import Coqpit from torch import nn +from trainer.io import load_fsspec from trainer.logging.tensorboard_logger import TensorboardLogger from TTS.tts.layers.overflow.common_layers import Encoder, OverflowUtils @@ -19,7 +20,6 @@ from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram from TTS.utils.generic_utils import format_aux_input -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 2552133753..b014e4fdde 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -16,6 +16,7 @@ from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler +from trainer.io import load_fsspec from trainer.torch import DistributedSampler, DistributedSamplerWrapper from trainer.trainer_utils import get_optimizer, get_scheduler @@ -34,7 +35,6 @@ from TTS.tts.utils.text.characters import BaseCharacters, BaseVocabulary, _characters, _pad, _phonemes, _punctuations from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment -from TTS.utils.io import load_fsspec from TTS.utils.samplers import BucketBatchSampler from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index e6d245a041..fa320aacd0 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -7,6 +7,7 @@ import torch.nn.functional as F import torchaudio from coqpit import Coqpit +from trainer.io import load_fsspec from TTS.tts.layers.xtts.gpt import GPT from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder @@ -14,7 +15,6 @@ from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer, split_sentence from TTS.tts.layers.xtts.xtts_manager import LanguageManager, SpeakerManager from TTS.tts.models.base_tts import BaseTTS -from TTS.utils.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 7746572f23..e5cfdc1e61 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -11,12 +11,12 @@ from torch.nn.utils import spectral_norm from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations +from trainer.io import load_fsspec import TTS.vc.modules.freevc.commons as commons import TTS.vc.modules.freevc.modules as modules from TTS.tts.utils.helpers import sequence_mask from TTS.tts.utils.speakers import SpeakerManager -from TTS.utils.io import load_fsspec from TTS.vc.configs.freevc_config import FreeVCConfig from TTS.vc.models.base_vc import BaseVC from TTS.vc.modules.freevc.commons import init_weights diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py index 2636400b90..294bf322cb 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py @@ -5,8 +5,8 @@ import numpy as np import torch from torch import nn +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vc.modules.freevc.speaker_encoder import audio from TTS.vc.modules.freevc.speaker_encoder.hparams import ( mel_n_channels, diff --git a/TTS/vocoder/models/gan.py b/TTS/vocoder/models/gan.py index 9b6508d8ba..8792950a56 100644 --- a/TTS/vocoder/models/gan.py +++ b/TTS/vocoder/models/gan.py @@ -7,10 +7,10 @@ from torch import nn from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from trainer.io import load_fsspec from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.utils.audio import AudioProcessor -from TTS.utils.io import load_fsspec from TTS.vocoder.datasets.gan_dataset import GANDataset from TTS.vocoder.layers.losses import DiscriminatorLoss, GeneratorLoss from TTS.vocoder.models import setup_discriminator, setup_generator diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py index 083ce344fb..afdd59a859 100644 --- a/TTS/vocoder/models/hifigan_generator.py +++ b/TTS/vocoder/models/hifigan_generator.py @@ -7,8 +7,7 @@ from torch.nn import functional as F from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations - -from TTS.utils.io import load_fsspec +from trainer.io import load_fsspec logger = logging.getLogger(__name__) diff --git a/TTS/vocoder/models/melgan_generator.py b/TTS/vocoder/models/melgan_generator.py index bb3fee789c..03c971afa4 100644 --- a/TTS/vocoder/models/melgan_generator.py +++ b/TTS/vocoder/models/melgan_generator.py @@ -1,8 +1,8 @@ import torch from torch import nn from torch.nn.utils.parametrizations import weight_norm +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vocoder.layers.melgan import ResidualStack diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index 96684d2a0a..6a4d4ca6e7 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -4,8 +4,8 @@ import numpy as np import torch from torch.nn.utils.parametrize import remove_parametrizations +from trainer.io import load_fsspec -from TTS.utils.io import load_fsspec from TTS.vocoder.layers.parallel_wavegan import ResidualBlock from TTS.vocoder.layers.upsample import ConvUpsample diff --git a/TTS/vocoder/models/wavegrad.py b/TTS/vocoder/models/wavegrad.py index 70d9edb342..c49abd2201 100644 --- a/TTS/vocoder/models/wavegrad.py +++ b/TTS/vocoder/models/wavegrad.py @@ -9,9 +9,9 @@ from torch.nn.utils.parametrize import remove_parametrizations from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from trainer.io import load_fsspec from trainer.trainer_utils import get_optimizer, get_scheduler -from TTS.utils.io import load_fsspec from TTS.vocoder.datasets import WaveGradDataset from TTS.vocoder.layers.wavegrad import Conv1d, DBlock, FiLM, UBlock from TTS.vocoder.models.base_vocoder import BaseVocoder diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index 901afdff11..723f18dde2 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -10,11 +10,11 @@ from torch import nn from torch.utils.data import DataLoader from torch.utils.data.distributed import DistributedSampler +from trainer.io import load_fsspec from TTS.tts.utils.visual import plot_spectrogram from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import mulaw_decode -from TTS.utils.io import load_fsspec from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.layers.losses import WaveRNNLoss from TTS.vocoder.models.base_vocoder import BaseVocoder From e869b9b658f534219272ef2ff089ff5f18227c6e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 27 Jun 2024 11:13:41 +0200 Subject: [PATCH 135/255] refactor: use load_checkpoint from trainer --- TTS/bin/compute_attention_masks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index be275baa9c..127199186b 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -8,6 +8,7 @@ import torch from torch.utils.data import DataLoader from tqdm import tqdm +from trainer.io import load_checkpoint from TTS.config import load_config from TTS.tts.datasets.TTSDataset import TTSDataset @@ -15,7 +16,6 @@ from TTS.tts.utils.text.characters import make_symbols, phonemes, symbols from TTS.utils.audio import AudioProcessor from TTS.utils.generic_utils import ConsoleFormatter, setup_logger -from TTS.utils.io import load_checkpoint if __name__ == "__main__": setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) From 2d06aeb79b27204f295f2f5a91374acba81bb567 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 27 Jun 2024 11:15:57 +0200 Subject: [PATCH 136/255] chore: remove unused TTS.utils.io module All uses of these methods were replaced with the equivalents from coqui-tts-trainer --- TTS/encoder/utils/training.py | 2 +- TTS/utils/io.py | 69 ----------------------------------- TTS/vocoder/layers/losses.py | 2 +- 3 files changed, 2 insertions(+), 71 deletions(-) delete mode 100644 TTS/utils/io.py diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py index f9088003fc..cc3a78b084 100644 --- a/TTS/encoder/utils/training.py +++ b/TTS/encoder/utils/training.py @@ -29,7 +29,7 @@ def process_args(args, config=None): args (argparse.Namespace or dict like): Parsed input arguments. config (Coqpit): Model config. If none, it is generated from `args`. Defaults to None. Returns: - c (TTS.utils.io.AttrDict): Config paramaters. + c (Coqpit): Config paramaters. out_path (str): Path to save models and logging. audio_path (str): Path to save generated test audios. c_logger (TTS.utils.console_logger.ConsoleLogger): Class that does diff --git a/TTS/utils/io.py b/TTS/utils/io.py deleted file mode 100644 index a837f186b8..0000000000 --- a/TTS/utils/io.py +++ /dev/null @@ -1,69 +0,0 @@ -import os -import pickle as pickle_tts -from typing import Any, Callable, Dict, Union - -import fsspec -import torch -from trainer.io import get_user_data_dir - - -class RenamingUnpickler(pickle_tts.Unpickler): - """Overload default pickler to solve module renaming problem""" - - def find_class(self, module, name): - return super().find_class(module.replace("mozilla_voice_tts", "TTS"), name) - - -class AttrDict(dict): - """A custom dict which converts dict keys - to class attributes""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self.__dict__ = self - - -def load_fsspec( - path: str, - map_location: Union[str, Callable, torch.device, Dict[Union[str, torch.device], Union[str, torch.device]]] = None, - cache: bool = True, - **kwargs, -) -> Any: - """Like torch.load but can load from other locations (e.g. s3:// , gs://). - - Args: - path: Any path or url supported by fsspec. - map_location: torch.device or str. - cache: If True, cache a remote file locally for subsequent calls. It is cached under `get_user_data_dir()/tts_cache`. Defaults to True. - **kwargs: Keyword arguments forwarded to torch.load. - - Returns: - Object stored in path. - """ - is_local = os.path.isdir(path) or os.path.isfile(path) - if cache and not is_local: - with fsspec.open( - f"filecache::{path}", - filecache={"cache_storage": str(get_user_data_dir("tts_cache"))}, - mode="rb", - ) as f: - return torch.load(f, map_location=map_location, **kwargs) - else: - with fsspec.open(path, "rb") as f: - return torch.load(f, map_location=map_location, **kwargs) - - -def load_checkpoint( - model, checkpoint_path, use_cuda=False, eval=False, cache=False -): # pylint: disable=redefined-builtin - try: - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) - except ModuleNotFoundError: - pickle_tts.Unpickler = RenamingUnpickler - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), pickle_module=pickle_tts, cache=cache) - model.load_state_dict(state["model"]) - if use_cuda: - model.cuda() - if eval: - model.eval() - return model, state diff --git a/TTS/vocoder/layers/losses.py b/TTS/vocoder/layers/losses.py index 1f977755cc..8d4dd725ef 100644 --- a/TTS/vocoder/layers/losses.py +++ b/TTS/vocoder/layers/losses.py @@ -221,7 +221,7 @@ class GeneratorLoss(nn.Module): changing configurations. Args: - C (AttrDict): model configuration. + C (Coqpit): model configuration. """ def __init__(self, C): From 808a9381712cc90215303faef8aa2f4def725ed7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 29 Jun 2024 15:12:29 +0200 Subject: [PATCH 137/255] build: specify minimum versions for dependencies --- pyproject.toml | 38 +++++++++++++++++++------------------- requirements.dev.txt | 8 ++++---- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 424ef52196..07aaa42d5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,7 +73,7 @@ dependencies = [ # Bark "encodec>=0.1.1", # XTTS - "num2words", + "num2words>=0.5.11", "spacy[ja]>=3" ] @@ -81,20 +81,20 @@ dependencies = [ # Development dependencies dev = [ "black==24.2.0", - "coverage[toml]", - "nose2", - "pre-commit", + "coverage[toml]>=7", + "nose2>=0.15", + "pre-commit>=3", "ruff==0.4.9", - "tomli; python_version < '3.11'", + "tomli>=2; python_version < '3.11'", ] # Dependencies for building the documentation docs = [ - "furo", + "furo>=2023.5.20", "myst-parser==2.0.0", "sphinx==7.2.5", - "sphinx_inline_tabs", - "sphinx_copybutton", - "linkify-it-py", + "sphinx_inline_tabs>=2023.4.21", + "sphinx_copybutton>=0.1", + "linkify-it-py>=2.0.0", ] # Only used in notebooks notebooks = [ @@ -102,30 +102,30 @@ notebooks = [ "pandas>=1.4,<2.0", ] # For running the TTS server -server = ["flask>=2.0.1"] +server = ["flask>=3.0.0"] # Language-specific dependencies, mainly for G2P # Bangla bn = [ - "bangla", - "bnnumerizer", - "bnunicodenormalizer", + "bangla>=0.0.2", + "bnnumerizer>=0.0.2", + "bnunicodenormalizer>=0.1.0", ] # Korean ko = [ - "hangul_romanize", - "jamo", + "hangul_romanize>=0.1.0", + "jamo>=0.4.1", "g2pkk>=0.1.1", ] # Japanese ja = [ - "mecab-python3", + "mecab-python3>=1.0.2", "unidic-lite==1.0.8", - "cutlet", + "cutlet>=0.2.0", ] # Chinese zh = [ - "jieba", - "pypinyin", + "jieba>=0.42.1", + "pypinyin>=0.40.0", ] # All language-specific dependencies languages = [ diff --git a/requirements.dev.txt b/requirements.dev.txt index 1e4a7beff7..74ec0cd80c 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,8 +1,8 @@ # Generated via scripts/generate_requirements.py and pre-commit hook. # Do not edit this file; modify pyproject.toml instead. black==24.2.0 -coverage[toml] -nose2 -pre-commit +coverage[toml]>=7 +nose2>=0.15 +pre-commit>=3 ruff==0.4.9 -tomli; python_version < '3.11' +tomli>=2; python_version < '3.11' From 8cab2e3b4e59d4668427b2262cc5cf8e54cddb2b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 29 Jun 2024 15:45:34 +0200 Subject: [PATCH 138/255] ci: test lowest and highest compatible versions of dependencies --- .github/workflows/tests.yml | 7 +++++-- pyproject.toml | 1 + 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 43815f2ef0..88cc8e7949 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -45,8 +45,11 @@ jobs: sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Install TTS run: | - python3 -m uv pip install --system "coqui-tts[dev,server,languages] @ ." - python3 setup.py egg_info + resolution=highest + if [ "${{ matrix.python-version }}" == "3.9" ]; then + resolution=lowest-direct + fi + python3 -m uv pip install --resolution=$resolution --system "coqui-tts[dev,server,languages] @ ." - name: Unit tests run: make ${{ matrix.subset }} - name: Upload coverage data diff --git a/pyproject.toml b/pyproject.toml index 07aaa42d5a..07f15d0595 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,7 @@ [build-system] requires = [ "setuptools", + "setuptools-scm", "cython~=0.29.30", "numpy>=2.0.0", ] From 6ea3b75b8466c064cf3a98645de5bab6060a2e43 Mon Sep 17 00:00:00 2001 From: Abraham Mathews <50496762+abrahammathews2000@users.noreply.github.com> Date: Tue, 2 Jul 2024 17:13:52 +0530 Subject: [PATCH 139/255] Update xtts.py (#53) docs(xtts): fix typo in example --- TTS/tts/models/xtts.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index fa320aacd0..8dda180a01 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -200,7 +200,7 @@ class Xtts(BaseTTS): >>> from TTS.tts.configs.xtts_config import XttsConfig >>> from TTS.tts.models.xtts import Xtts >>> config = XttsConfig() - >>> model = Xtts.inif_from_config(config) + >>> model = Xtts.init_from_config(config) >>> model.load_checkpoint(config, checkpoint_dir="paths/to/models_dir/", eval=True) """ From 9192ef1aa6a349bff239ec6dd41de9676d347e55 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 5 Jul 2024 13:52:01 +0200 Subject: [PATCH 140/255] fix(xtts): load tokenizer file based on config as last resort --- TTS/tts/models/xtts.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index fa320aacd0..539a2a2da2 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -1,6 +1,7 @@ import logging import os from dataclasses import dataclass +from pathlib import Path import librosa import torch @@ -760,7 +761,11 @@ def load_checkpoint( """ model_path = checkpoint_path or os.path.join(checkpoint_dir, "model.pth") - vocab_path = vocab_path or os.path.join(checkpoint_dir, "vocab.json") + if vocab_path is None: + if checkpoint_dir is not None and (Path(checkpoint_dir) / "vocab.json").is_file(): + vocab_path = str(Path(checkpoint_dir) / "vocab.json") + else: + vocab_path = config.model_args.tokenizer_file if speaker_file_path is None and checkpoint_dir is not None: speaker_file_path = os.path.join(checkpoint_dir, "speakers_xtts.pth") From 20bbb411c2be5b952ed9e60caf7a197ed3331990 Mon Sep 17 00:00:00 2001 From: Daniel Walmsley Date: Thu, 25 Jul 2024 07:24:10 -0700 Subject: [PATCH 141/255] fix(xtts): update streaming for transformers>=4.42.0 (#59) * Fix Stream Generator on MacOS * Make it work on mps * Implement custom tensor.isin * Fix for latest TF * Comment out hack for now * Remove unused code * build: increase minimum transformers version * style: fix --------- Co-authored-by: Enno Hermann --- TTS/tts/layers/xtts/stream_generator.py | 6 +++--- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index cb09895824..efc92a04ef 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -376,7 +376,7 @@ def generate( # noqa: PLR0911 elif is_sample_gen_mode: # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config) + logits_warper = self._get_logits_warper(generation_config, inputs_tensor.device) # 12. expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -401,7 +401,7 @@ def generate( # noqa: PLR0911 ) elif is_sample_gen_stream_mode: # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config) + logits_warper = self._get_logits_warper(generation_config, inputs_tensor.device) # 12. expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -463,7 +463,7 @@ def generate( # noqa: PLR0911 elif is_beam_sample_gen_mode: # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config) + logits_warper = self._get_logits_warper(generation_config, inputs_tensor.device) if stopping_criteria.max_length is None: raise ValueError("`max_length` needs to be a stopping_criteria for now.") diff --git a/pyproject.toml b/pyproject.toml index fc748ff46b..a8c52fc176 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ dependencies = [ "gruut[de,es,fr]==2.2.3", # Tortoise "einops>=0.6.0", - "transformers>=4.41.1", + "transformers>=4.42.0", # Bark "encodec>=0.1.1", # XTTS From 8c460d0cd066b29188dc9be3bb53cbc488545929 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 31 Jul 2024 15:20:56 +0200 Subject: [PATCH 142/255] fix(dataset): skip files where audio length can't be computed Avoids hard failures when the audio can't be decoded. --- TTS/tts/datasets/dataset.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 3886a8f8c9..f718f3d4ad 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -3,7 +3,7 @@ import logging import os import random -from typing import Dict, List, Union +from typing import Any, Dict, List, Union import numpy as np import torch @@ -46,15 +46,21 @@ def string2filename(string): return filename -def get_audio_size(audiopath) -> int: +def get_audio_size(audiopath: Union[str, os.PathLike[Any]]) -> int: """Return the number of samples in the audio file.""" + if not isinstance(audiopath, str): + audiopath = str(audiopath) extension = audiopath.rpartition(".")[-1].lower() if extension not in {"mp3", "wav", "flac"}: raise RuntimeError( f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!" ) - return torchaudio.info(audiopath).num_frames + try: + return torchaudio.info(audiopath).num_frames + except RuntimeError as e: + msg = f"Failed to decode {audiopath}" + raise RuntimeError(msg) from e class TTSDataset(Dataset): @@ -186,7 +192,11 @@ def lengths(self): lens = [] for item in self.samples: _, wav_file, *_ = _parse_sample(item) - audio_len = get_audio_size(wav_file) + try: + audio_len = get_audio_size(wav_file) + except RuntimeError: + logger.warn(f"Failed to compute length for {item['audio_file']}") + audio_len = 0 lens.append(audio_len) return lens @@ -304,7 +314,11 @@ def load_data(self, idx): def _compute_lengths(samples): new_samples = [] for item in samples: - audio_length = get_audio_size(item["audio_file"]) + try: + audio_length = get_audio_size(item["audio_file"]) + except RuntimeError: + logger.warn(f"Failed to compute length, skipping {item['audio_file']}") + continue text_lenght = len(item["text"]) item["audio_length"] = audio_length item["text_length"] = text_lenght From 9c604c1de0af05cc0863f687ec695d9b43864c4c Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 31 Jul 2024 15:40:46 +0200 Subject: [PATCH 143/255] chore(dataset): address lint issues --- TTS/tts/datasets/dataset.py | 201 +++++++++++++++++++----------------- 1 file changed, 109 insertions(+), 92 deletions(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index f718f3d4ad..37e3a1779d 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -3,9 +3,10 @@ import logging import os import random -from typing import Any, Dict, List, Union +from typing import Any, Optional, Union import numpy as np +import numpy.typing as npt import torch import torchaudio import tqdm @@ -32,18 +33,18 @@ def _parse_sample(item): elif len(item) == 3: text, wav_file, speaker_name = item else: - raise ValueError(" [!] Dataset cannot parse the sample.") + msg = "Dataset cannot parse the sample." + raise ValueError(msg) return text, wav_file, speaker_name, language_name, attn_file -def noise_augment_audio(wav): +def noise_augment_audio(wav: npt.NDArray) -> npt.NDArray: return wav + (1.0 / 32768.0) * np.random.rand(*wav.shape) -def string2filename(string): +def string2filename(string: str) -> str: # generate a safe and reversible filename based on a string - filename = base64.urlsafe_b64encode(string.encode("utf-8")).decode("utf-8", "ignore") - return filename + return base64.urlsafe_b64encode(string.encode("utf-8")).decode("utf-8", "ignore") def get_audio_size(audiopath: Union[str, os.PathLike[Any]]) -> int: @@ -52,9 +53,8 @@ def get_audio_size(audiopath: Union[str, os.PathLike[Any]]) -> int: audiopath = str(audiopath) extension = audiopath.rpartition(".")[-1].lower() if extension not in {"mp3", "wav", "flac"}: - raise RuntimeError( - f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!" - ) + msg = f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!" + raise RuntimeError(msg) try: return torchaudio.info(audiopath).num_frames @@ -69,31 +69,32 @@ def __init__( outputs_per_step: int = 1, compute_linear_spec: bool = False, ap: AudioProcessor = None, - samples: List[Dict] = None, + samples: Optional[list[dict]] = None, tokenizer: "TTSTokenizer" = None, compute_f0: bool = False, compute_energy: bool = False, - f0_cache_path: str = None, - energy_cache_path: str = None, + f0_cache_path: Optional[str] = None, + energy_cache_path: Optional[str] = None, return_wav: bool = False, batch_group_size: int = 0, min_text_len: int = 0, max_text_len: int = float("inf"), min_audio_len: int = 0, max_audio_len: int = float("inf"), - phoneme_cache_path: str = None, + phoneme_cache_path: Optional[str] = None, precompute_num_workers: int = 0, - speaker_id_mapping: Dict = None, - d_vector_mapping: Dict = None, - language_id_mapping: Dict = None, + speaker_id_mapping: Optional[dict] = None, + d_vector_mapping: Optional[dict] = None, + language_id_mapping: Optional[dict] = None, use_noise_augment: bool = False, start_by_longest: bool = False, - ): + ) -> None: """Generic 📂 data loader for `tts` models. It is configurable for different outputs and needs. If you need something different, you can subclass and override. Args: + ---- outputs_per_step (int): Number of time frames predicted per step. compute_linear_spec (bool): compute linear spectrogram if True. @@ -145,6 +146,7 @@ def __init__( use_noise_augment (bool): Enable adding random noise to wav for augmentation. Defaults to False. start_by_longest (bool): Start by longest sequence. It is especially useful to check OOM. Defaults to False. + """ super().__init__() self.batch_group_size = batch_group_size @@ -174,28 +176,37 @@ def __init__( if self.tokenizer.use_phonemes: self.phoneme_dataset = PhonemeDataset( - self.samples, self.tokenizer, phoneme_cache_path, precompute_num_workers=precompute_num_workers + self.samples, + self.tokenizer, + phoneme_cache_path, + precompute_num_workers=precompute_num_workers, ) if compute_f0: self.f0_dataset = F0Dataset( - self.samples, self.ap, cache_path=f0_cache_path, precompute_num_workers=precompute_num_workers + self.samples, + self.ap, + cache_path=f0_cache_path, + precompute_num_workers=precompute_num_workers, ) if compute_energy: self.energy_dataset = EnergyDataset( - self.samples, self.ap, cache_path=energy_cache_path, precompute_num_workers=precompute_num_workers + self.samples, + self.ap, + cache_path=energy_cache_path, + precompute_num_workers=precompute_num_workers, ) self.print_logs() @property - def lengths(self): + def lengths(self) -> list[int]: lens = [] for item in self.samples: _, wav_file, *_ = _parse_sample(item) try: audio_len = get_audio_size(wav_file) except RuntimeError: - logger.warn(f"Failed to compute length for {item['audio_file']}") + logger.warning(f"Failed to compute length for {item['audio_file']}") audio_len = 0 lens.append(audio_len) return lens @@ -205,7 +216,7 @@ def samples(self): return self._samples @samples.setter - def samples(self, new_samples): + def samples(self, new_samples) -> None: self._samples = new_samples if hasattr(self, "f0_dataset"): self.f0_dataset.samples = new_samples @@ -214,7 +225,7 @@ def samples(self, new_samples): if hasattr(self, "phoneme_dataset"): self.phoneme_dataset.samples = new_samples - def __len__(self): + def __len__(self) -> int: return len(self.samples) def __getitem__(self, idx): @@ -261,7 +272,7 @@ def get_token_ids(self, idx, text): token_ids = self.tokenizer.text_to_ids(text) return np.array(token_ids, dtype=np.int32) - def load_data(self, idx): + def load_data(self, idx) -> dict[str, Any]: item = self.samples[idx] raw_text = item["text"] @@ -295,7 +306,7 @@ def load_data(self, idx): if self.compute_energy: energy = self.get_energy(idx)["energy"] - sample = { + return { "raw_text": raw_text, "token_ids": token_ids, "wav": wav, @@ -308,7 +319,6 @@ def load_data(self, idx): "wav_file_name": os.path.basename(item["audio_file"]), "audio_unique_name": item["audio_unique_name"], } - return sample @staticmethod def _compute_lengths(samples): @@ -317,7 +327,7 @@ def _compute_lengths(samples): try: audio_length = get_audio_size(item["audio_file"]) except RuntimeError: - logger.warn(f"Failed to compute length, skipping {item['audio_file']}") + logger.warning(f"Failed to compute length, skipping {item['audio_file']}") continue text_lenght = len(item["text"]) item["audio_length"] = audio_length @@ -326,7 +336,7 @@ def _compute_lengths(samples): return new_samples @staticmethod - def filter_by_length(lengths: List[int], min_len: int, max_len: int): + def filter_by_length(lengths: list[int], min_len: int, max_len: int): idxs = np.argsort(lengths) # ascending order ignore_idx = [] keep_idx = [] @@ -339,10 +349,9 @@ def filter_by_length(lengths: List[int], min_len: int, max_len: int): return ignore_idx, keep_idx @staticmethod - def sort_by_length(samples: List[List]): + def sort_by_length(samples: list[list]): audio_lengths = [s["audio_length"] for s in samples] - idxs = np.argsort(audio_lengths) # ascending order - return idxs + return np.argsort(audio_lengths) # ascending order @staticmethod def create_buckets(samples, batch_group_size: int): @@ -362,7 +371,7 @@ def _select_samples_by_idx(idxs, samples): samples_new.append(samples[idx]) return samples_new - def preprocess_samples(self): + def preprocess_samples(self) -> None: r"""Sort `items` based on text length or audio length in ascending order. Filter out samples out or the length range. """ @@ -388,7 +397,8 @@ def preprocess_samples(self): samples = self._select_samples_by_idx(sorted_idxs, samples) if len(samples) == 0: - raise RuntimeError(" [!] No samples left") + msg = "No samples left." + raise RuntimeError(msg) # shuffle batch groups # create batches with similar length items @@ -402,36 +412,37 @@ def preprocess_samples(self): self.samples = samples logger.info("Preprocessing samples") - logger.info("Max text length: {}".format(np.max(text_lengths))) - logger.info("Min text length: {}".format(np.min(text_lengths))) - logger.info("Avg text length: {}".format(np.mean(text_lengths))) - logger.info("Max audio length: {}".format(np.max(audio_lengths))) - logger.info("Min audio length: {}".format(np.min(audio_lengths))) - logger.info("Avg audio length: {}".format(np.mean(audio_lengths))) + logger.info(f"Max text length: {np.max(text_lengths)}") + logger.info(f"Min text length: {np.min(text_lengths)}") + logger.info(f"Avg text length: {np.mean(text_lengths)}") + logger.info(f"Max audio length: {np.max(audio_lengths)}") + logger.info(f"Min audio length: {np.min(audio_lengths)}") + logger.info(f"Avg audio length: {np.mean(audio_lengths)}") logger.info("Num. instances discarded samples: %d", len(ignore_idx)) - logger.info("Batch group size: {}.".format(self.batch_group_size)) + logger.info(f"Batch group size: {self.batch_group_size}.") @staticmethod def _sort_batch(batch, text_lengths): """Sort the batch by the input text length for RNN efficiency. Args: + ---- batch (Dict): Batch returned by `__getitem__`. text_lengths (List[int]): Lengths of the input character sequences. + """ text_lengths, ids_sorted_decreasing = torch.sort(torch.LongTensor(text_lengths), dim=0, descending=True) batch = [batch[idx] for idx in ids_sorted_decreasing] return batch, text_lengths, ids_sorted_decreasing def collate_fn(self, batch): - r""" - Perform preprocessing and create a final data batch: + """Perform preprocessing and create a final data batch. + 1. Sort batch instances by text-length 2. Convert Audio signal to features. 3. PAD sequences wrt r. 4. Load to Torch. """ - # Puts each data field into a tensor with outer dimension batch size if isinstance(batch[0], collections.abc.Mapping): token_ids_lengths = np.array([len(d["token_ids"]) for d in batch]) @@ -576,23 +587,18 @@ def collate_fn(self, batch): "audio_unique_names": batch["audio_unique_name"], } - raise TypeError( - ( - "batch must contain tensors, numbers, dicts or lists;\ - found {}".format( - type(batch[0]) - ) - ) - ) + msg = f"batch must contain tensors, numbers, dicts or lists; found {type(batch[0])}" + raise TypeError(msg) class PhonemeDataset(Dataset): - """Phoneme Dataset for converting input text to phonemes and then token IDs + """Phoneme Dataset for converting input text to phonemes and then token IDs. At initialization, it pre-computes the phonemes under `cache_path` and loads them in training to reduce data loading latency. If `cache_path` is already present, it skips the pre-computation. Args: + ---- samples (Union[List[List], List[Dict]]): List of samples. Each sample is a list or a dict. @@ -604,15 +610,16 @@ class PhonemeDataset(Dataset): precompute_num_workers (int): Number of workers used for pre-computing the phonemes. Defaults to 0. + """ def __init__( self, - samples: Union[List[Dict], List[List]], + samples: Union[list[dict], list[list]], tokenizer: "TTSTokenizer", cache_path: str, - precompute_num_workers=0, - ): + precompute_num_workers: int = 0, + ) -> None: self.samples = samples self.tokenizer = tokenizer self.cache_path = cache_path @@ -620,16 +627,16 @@ def __init__( os.makedirs(cache_path) self.precompute(precompute_num_workers) - def __getitem__(self, index): + def __getitem__(self, index) -> dict[str, Any]: item = self.samples[index] ids = self.compute_or_load(string2filename(item["audio_unique_name"]), item["text"], item["language"]) ph_hat = self.tokenizer.ids_to_text(ids) return {"text": item["text"], "ph_hat": ph_hat, "token_ids": ids, "token_ids_len": len(ids)} - def __len__(self): + def __len__(self) -> int: return len(self.samples) - def compute_or_load(self, file_name, text, language): + def compute_or_load(self, file_name: str, text: str, language: str) -> list[int]: """Compute phonemes for the given text. If the phonemes are already cached, load them from cache. @@ -643,11 +650,11 @@ def compute_or_load(self, file_name, text, language): np.save(cache_path, ids) return ids - def get_pad_id(self): - """Get pad token ID for sequence padding""" + def get_pad_id(self) -> int: + """Get pad token ID for sequence padding.""" return self.tokenizer.pad_id - def precompute(self, num_workers=1): + def precompute(self, num_workers: int = 1) -> None: """Precompute phonemes for all samples. We use pytorch dataloader because we are lazy. @@ -656,7 +663,11 @@ def precompute(self, num_workers=1): with tqdm.tqdm(total=len(self)) as pbar: batch_size = num_workers if num_workers > 0 else 1 dataloder = torch.utils.data.DataLoader( - batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn + batch_size=batch_size, + dataset=self, + shuffle=False, + num_workers=num_workers, + collate_fn=self.collate_fn, ) for _ in dataloder: pbar.update(batch_size) @@ -681,12 +692,13 @@ def print_logs(self, level: int = 0) -> None: class F0Dataset: - """F0 Dataset for computing F0 from wav files in CPU + """F0 Dataset for computing F0 from wav files in CPU. Pre-compute F0 values for all the samples at initialization if `cache_path` is not None or already present. It also computes the mean and std of F0 values if `normalize_f0` is True. Args: + ---- samples (Union[List[List], List[Dict]]): List of samples. Each sample is a list or a dict. @@ -702,17 +714,18 @@ class F0Dataset: normalize_f0 (bool): Whether to normalize F0 values by mean and std. Defaults to True. + """ def __init__( self, - samples: Union[List[List], List[Dict]], + samples: Union[list[list], list[dict]], ap: "AudioProcessor", audio_config=None, # pylint: disable=unused-argument - cache_path: str = None, - precompute_num_workers=0, - normalize_f0=True, - ): + cache_path: Optional[str] = None, + precompute_num_workers: int = 0, + normalize_f0: bool = True, + ) -> None: self.samples = samples self.ap = ap self.cache_path = cache_path @@ -734,10 +747,10 @@ def __getitem__(self, idx): f0 = self.normalize(f0) return {"audio_unique_name": item["audio_unique_name"], "f0": f0} - def __len__(self): + def __len__(self) -> int: return len(self.samples) - def precompute(self, num_workers=0): + def precompute(self, num_workers: int = 0) -> None: logger.info("Pre-computing F0s...") with tqdm.tqdm(total=len(self)) as pbar: batch_size = num_workers if num_workers > 0 else 1 @@ -745,7 +758,11 @@ def precompute(self, num_workers=0): normalize_f0 = self.normalize_f0 self.normalize_f0 = False dataloder = torch.utils.data.DataLoader( - batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn + batch_size=batch_size, + dataset=self, + shuffle=False, + num_workers=num_workers, + collate_fn=self.collate_fn, ) computed_data = [] for batch in dataloder: @@ -764,9 +781,8 @@ def get_pad_id(self): return self.pad_id @staticmethod - def create_pitch_file_path(file_name, cache_path): - pitch_file = os.path.join(cache_path, file_name + "_pitch.npy") - return pitch_file + def create_pitch_file_path(file_name: str, cache_path: str) -> str: + return os.path.join(cache_path, file_name + "_pitch.npy") @staticmethod def _compute_and_save_pitch(ap, wav_file, pitch_file=None): @@ -782,7 +798,7 @@ def compute_pitch_stats(pitch_vecs): mean, std = np.mean(nonzeros), np.std(nonzeros) return mean, std - def load_stats(self, cache_path): + def load_stats(self, cache_path) -> None: stats_path = os.path.join(cache_path, "pitch_stats.npy") stats = np.load(stats_path, allow_pickle=True).item() self.mean = stats["mean"].astype(np.float32) @@ -803,9 +819,7 @@ def denormalize(self, pitch): return pitch def compute_or_load(self, wav_file, audio_unique_name): - """ - compute pitch and return a numpy array of pitch values - """ + """Compute pitch and return a numpy array of pitch values.""" pitch_file = self.create_pitch_file_path(audio_unique_name, self.cache_path) if not os.path.exists(pitch_file): pitch = self._compute_and_save_pitch(self.ap, wav_file, pitch_file) @@ -830,12 +844,13 @@ def print_logs(self, level: int = 0) -> None: class EnergyDataset: - """Energy Dataset for computing Energy from wav files in CPU + """Energy Dataset for computing Energy from wav files in CPU. Pre-compute Energy values for all the samples at initialization if `cache_path` is not None or already present. It also computes the mean and std of Energy values if `normalize_Energy` is True. Args: + ---- samples (Union[List[List], List[Dict]]): List of samples. Each sample is a list or a dict. @@ -851,16 +866,17 @@ class EnergyDataset: normalize_Energy (bool): Whether to normalize Energy values by mean and std. Defaults to True. + """ def __init__( self, - samples: Union[List[List], List[Dict]], + samples: Union[list[list], list[dict]], ap: "AudioProcessor", - cache_path: str = None, + cache_path: Optional[str] = None, precompute_num_workers=0, normalize_energy=True, - ): + ) -> None: self.samples = samples self.ap = ap self.cache_path = cache_path @@ -882,10 +898,10 @@ def __getitem__(self, idx): energy = self.normalize(energy) return {"audio_unique_name": item["audio_unique_name"], "energy": energy} - def __len__(self): + def __len__(self) -> int: return len(self.samples) - def precompute(self, num_workers=0): + def precompute(self, num_workers=0) -> None: logger.info("Pre-computing energys...") with tqdm.tqdm(total=len(self)) as pbar: batch_size = num_workers if num_workers > 0 else 1 @@ -893,7 +909,11 @@ def precompute(self, num_workers=0): normalize_energy = self.normalize_energy self.normalize_energy = False dataloder = torch.utils.data.DataLoader( - batch_size=batch_size, dataset=self, shuffle=False, num_workers=num_workers, collate_fn=self.collate_fn + batch_size=batch_size, + dataset=self, + shuffle=False, + num_workers=num_workers, + collate_fn=self.collate_fn, ) computed_data = [] for batch in dataloder: @@ -914,8 +934,7 @@ def get_pad_id(self): @staticmethod def create_energy_file_path(wav_file, cache_path): file_name = os.path.splitext(os.path.basename(wav_file))[0] - energy_file = os.path.join(cache_path, file_name + "_energy.npy") - return energy_file + return os.path.join(cache_path, file_name + "_energy.npy") @staticmethod def _compute_and_save_energy(ap, wav_file, energy_file=None): @@ -931,7 +950,7 @@ def compute_energy_stats(energy_vecs): mean, std = np.mean(nonzeros), np.std(nonzeros) return mean, std - def load_stats(self, cache_path): + def load_stats(self, cache_path) -> None: stats_path = os.path.join(cache_path, "energy_stats.npy") stats = np.load(stats_path, allow_pickle=True).item() self.mean = stats["mean"].astype(np.float32) @@ -952,9 +971,7 @@ def denormalize(self, energy): return energy def compute_or_load(self, wav_file, audio_unique_name): - """ - compute energy and return a numpy array of energy values - """ + """Compute energy and return a numpy array of energy values.""" energy_file = self.create_energy_file_path(audio_unique_name, self.cache_path) if not os.path.exists(energy_file): energy = self._compute_and_save_energy(self.ap, wav_file, energy_file) From d304ab2769a35c25ff6638d945053fc63bd46e57 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 3 Jul 2024 19:28:47 +0200 Subject: [PATCH 144/255] build: update gruut version for numpy2 support --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a8c52fc176..570dd1a4b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,7 @@ dependencies = [ "coqui-tts-trainer>=0.1.4", "coqpit>=0.0.16", # Gruut + supported languages - "gruut[de,es,fr]==2.2.3", + "gruut[de,es,fr]>=2.4.0", # Tortoise "einops>=0.6.0", "transformers>=4.42.0", From b1558b06d75bb73b4e02518ef8d6a5841eeba5bb Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 3 Jul 2024 19:41:56 +0200 Subject: [PATCH 145/255] build: require numpy<2 because spacy/thinc lack support --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 570dd1a4b9..539556902d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ classifiers = [ ] dependencies = [ # Core - "numpy>=1.24.3", + "numpy>=1.24.3,<2.0.0", # TODO: remove upper bound after spacy/thinc release "cython>=0.29.30", "scipy>=1.11.2", "torch>=2.1", From 7014782ad4fedea1d5097bb3010fe77c3a386267 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 5 Aug 2024 10:28:03 +0200 Subject: [PATCH 146/255] build: add upper bound for transformers 4.43.* broke XTTS streaming again --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 539556902d..94ed3a2c36 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ dependencies = [ "gruut[de,es,fr]>=2.4.0", # Tortoise "einops>=0.6.0", - "transformers>=4.42.0", + "transformers>=4.42.0,<4.43.0", # Bark "encodec>=0.1.1", # XTTS From 233dfb54aef870985c737e52ab6599370eb943a4 Mon Sep 17 00:00:00 2001 From: Azalea <22280294+hykilpikonna@users.noreply.github.com> Date: Sun, 25 Aug 2024 06:27:27 -0400 Subject: [PATCH 147/255] docs(tacotron): fix wrong paper links (#74) --- docs/source/models/tacotron1-2.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/models/tacotron1-2.md b/docs/source/models/tacotron1-2.md index f35cfeca4c..285d4f3c55 100644 --- a/docs/source/models/tacotron1-2.md +++ b/docs/source/models/tacotron1-2.md @@ -20,8 +20,8 @@ If you have a limited VRAM, then you can try using the Guided Attention Loss or ## Important resources & papers -- Tacotron: https://arxiv.org/abs/2006.06873 -- Tacotron2: https://arxiv.org/abs/2008.03802 +- Tacotron: [Tacotron: Towards End-to-End Speech Synthesis](https://arxiv.org/abs/1703.10135) +- Tacotron2: [Natural TTS Synthesis by Conditioning WaveNet on Mel Spectrogram Predictions](https://arxiv.org/abs/1712.05884) - Double Decoder Consistency: https://coqui.ai/blog/tts/solving-attention-problems-of-tts-models-with-double-decoder-consistency - Guided Attention Loss: https://arxiv.org/abs/1710.08969 - Forward & Backward Decoder: https://arxiv.org/abs/1907.09006 From 192032882272945bdeb1253b140074ec2bce7737 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Sep 2024 20:29:21 +0100 Subject: [PATCH 148/255] feat(xtts): support hindi in tokenizer (#64) Added proper tokenizer support for Hindi Language which would prevent crash while fine tuning Hindi language. Co-authored-by: Akshat Bhardwaj <157223825+akshatrocky@users.noreply.github.com> --- TTS/tts/layers/xtts/tokenizer.py | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 5e701c085f..e87eb0766b 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -233,6 +233,12 @@ def split_sentence(text, lang, text_split_length=250): # Korean doesn't typically use abbreviations in the same way as Latin-based scripts. ] ], + "hi": [ + (re.compile("\\b%s\\." % x[0], re.IGNORECASE), x[1]) + for x in [ + # Hindi doesn't typically use abbreviations in the same way as Latin-based scripts. + ] + ], } @@ -429,6 +435,18 @@ def expand_abbreviations_multilingual(text, lang="en"): ("°", " 도 "), ] ], + "hi": [ + (re.compile(r"%s" % re.escape(x[0]), re.IGNORECASE), x[1]) + for x in [ + ("&", " और "), + ("@", " ऐट दी रेट "), + ("%", " प्रतिशत "), + ("#", " हैश "), + ("$", " डॉलर "), + ("£", " पाउंड "), + ("°", " डिग्री "), + ] + ], } @@ -454,6 +472,7 @@ def expand_symbols_multilingual(text, lang="en"): "tr": re.compile(r"([0-9]+)(\.|inci|nci|uncu|üncü|\.)"), "hu": re.compile(r"([0-9]+)(\.|adik|edik|odik|edik|ödik|ödike|ik)"), "ko": re.compile(r"([0-9]+)(번째|번|차|째)"), + "hi": re.compile(r"([0-9]+)(st|nd|rd|th)"), # To check } _number_re = re.compile(r"[0-9]+") _currency_re = { @@ -505,6 +524,7 @@ def _expand_currency(m, lang="en", currency="USD"): "tr": ", ", "hu": ", ", "ko": ", ", + "hi": ", ", } if amount.is_integer(): @@ -644,7 +664,7 @@ def check_input_length(self, txt, lang): ) def preprocess_text(self, txt, lang): - if lang in {"ar", "cs", "de", "en", "es", "fr", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "ko"}: + if lang in {"ar", "cs", "de", "en", "es", "fr", "hi", "hu", "it", "nl", "pl", "pt", "ru", "tr", "zh", "ko"}: txt = multilingual_cleaners(txt, lang) if lang == "zh": txt = chinese_transliterate(txt) @@ -652,9 +672,6 @@ def preprocess_text(self, txt, lang): txt = korean_transliterate(txt) elif lang == "ja": txt = japanese_cleaners(txt, self.katsu) - elif lang == "hi": - # @manmay will implement this - txt = basic_cleaners(txt) else: raise NotImplementedError(f"Language '{lang}' is not supported.") return txt @@ -777,6 +794,9 @@ def test_expand_numbers_multilingual(): ("12.5 초 안에.", "십이 점 다섯 초 안에.", "ko"), ("50 명의 병사가 있었다.", "오십 명의 병사가 있었다.", "ko"), ("이것은 1 번째 테스트입니다", "이것은 첫 번째 테스트입니다", "ko"), + # Hindi + ("12.5 सेकंड में।", "साढ़े बारह सेकंड में।", "hi"), + ("50 सैनिक थे।", "पचास सैनिक थे।", "hi"), ] for a, b, lang in test_cases: out = expand_numbers_multilingual(a, lang=lang) @@ -846,6 +866,7 @@ def test_symbols_multilingual(): ("Pilim %14 dolu.", "Pilim yüzde 14 dolu.", "tr"), ("Az akkumulátorom töltöttsége 14%", "Az akkumulátorom töltöttsége 14 százalék", "hu"), ("배터리 잔량이 14%입니다.", "배터리 잔량이 14 퍼센트입니다.", "ko"), + ("मेरे पास 14% बैटरी है।", "मेरे पास चौदह प्रतिशत बैटरी है।", "hi"), ] for a, b, lang in test_cases: From 17ca24c3d6178c01a59acf672c7cda68da8b4823 Mon Sep 17 00:00:00 2001 From: shavit Date: Sat, 31 Aug 2024 06:15:24 -0400 Subject: [PATCH 149/255] fix: load weights only in torch.load --- TTS/tts/layers/bark/load_model.py | 2 +- TTS/tts/layers/tortoise/arch_utils.py | 2 +- TTS/tts/layers/tortoise/audio_utils.py | 2 +- TTS/tts/layers/xtts/dvae.py | 2 +- TTS/tts/layers/xtts/hifigan_decoder.py | 2 +- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 4 ++-- TTS/tts/layers/xtts/xtts_manager.py | 2 +- TTS/tts/models/neuralhmm_tts.py | 4 ++-- TTS/tts/models/overflow.py | 4 ++-- TTS/tts/models/tortoise.py | 13 +++++++++---- TTS/tts/models/xtts.py | 2 +- TTS/tts/utils/fairseq.py | 2 +- TTS/tts/utils/managers.py | 2 +- TTS/vc/modules/freevc/wavlm/__init__.py | 2 +- notebooks/TestAttention.ipynb | 4 ++-- 15 files changed, 27 insertions(+), 22 deletions(-) diff --git a/TTS/tts/layers/bark/load_model.py b/TTS/tts/layers/bark/load_model.py index ce6b757f05..7785aab845 100644 --- a/TTS/tts/layers/bark/load_model.py +++ b/TTS/tts/layers/bark/load_model.py @@ -118,7 +118,7 @@ def load_model(ckpt_path, device, config, model_type="text"): logger.info(f"{model_type} model not found, downloading...") _download(config.REMOTE_MODEL_PATHS[model_type]["path"], ckpt_path, config.CACHE_DIR) - checkpoint = torch.load(ckpt_path, map_location=device) + checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True) # this is a hack model_args = checkpoint["model_args"] if "input_vocab_size" not in model_args: diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index c79ef31b0c..f4dbcc8054 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -332,7 +332,7 @@ def __init__( self.mel_norm_file = mel_norm_file if self.mel_norm_file is not None: with fsspec.open(self.mel_norm_file) as f: - self.mel_norms = torch.load(f) + self.mel_norms = torch.load(f, weights_only=True) else: self.mel_norms = None diff --git a/TTS/tts/layers/tortoise/audio_utils.py b/TTS/tts/layers/tortoise/audio_utils.py index 0b8701227b..94c2bae6fa 100644 --- a/TTS/tts/layers/tortoise/audio_utils.py +++ b/TTS/tts/layers/tortoise/audio_utils.py @@ -124,7 +124,7 @@ def load_voice(voice: str, extra_voice_dirs: List[str] = []): voices = get_voices(extra_voice_dirs) paths = voices[voice] if len(paths) == 1 and paths[0].endswith(".pth"): - return None, torch.load(paths[0]) + return None, torch.load(paths[0], weights_only=True) else: conds = [] for cond_path in paths: diff --git a/TTS/tts/layers/xtts/dvae.py b/TTS/tts/layers/xtts/dvae.py index 4a37307e74..58f91785a1 100644 --- a/TTS/tts/layers/xtts/dvae.py +++ b/TTS/tts/layers/xtts/dvae.py @@ -46,7 +46,7 @@ def dvae_wav_to_mel( mel = mel_stft(wav) mel = torch.log(torch.clamp(mel, min=1e-5)) if mel_norms is None: - mel_norms = torch.load(mel_norms_file, map_location=device) + mel_norms = torch.load(mel_norms_file, map_location=device, weights_only=True) mel = mel / mel_norms.unsqueeze(0).unsqueeze(-1) return mel diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index b6032e5584..09bd06dfde 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -328,7 +328,7 @@ def remove_weight_norm(self): def load_checkpoint( self, config, checkpoint_path, eval=False, cache=False ): # pylint: disable=unused-argument, redefined-builtin - state = torch.load(checkpoint_path, map_location=torch.device("cpu")) + state = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True) self.load_state_dict(state["model"]) if eval: self.eval() diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 04d123778b..f1aa6f8cd0 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -91,7 +91,7 @@ def __init__(self, config: Coqpit): # load GPT if available if self.args.gpt_checkpoint: - gpt_checkpoint = torch.load(self.args.gpt_checkpoint, map_location=torch.device("cpu")) + gpt_checkpoint = torch.load(self.args.gpt_checkpoint, map_location=torch.device("cpu"), weights_only=True) # deal with coqui Trainer exported model if "model" in gpt_checkpoint.keys() and "config" in gpt_checkpoint.keys(): logger.info("Coqui Trainer checkpoint detected! Converting it!") @@ -184,7 +184,7 @@ def __init__(self, config: Coqpit): self.dvae.eval() if self.args.dvae_checkpoint: - dvae_checkpoint = torch.load(self.args.dvae_checkpoint, map_location=torch.device("cpu")) + dvae_checkpoint = torch.load(self.args.dvae_checkpoint, map_location=torch.device("cpu"), weights_only=True) self.dvae.load_state_dict(dvae_checkpoint, strict=False) logger.info("DVAE weights restored from: %s", self.args.dvae_checkpoint) else: diff --git a/TTS/tts/layers/xtts/xtts_manager.py b/TTS/tts/layers/xtts/xtts_manager.py index 5560e87687..5a3c47aead 100644 --- a/TTS/tts/layers/xtts/xtts_manager.py +++ b/TTS/tts/layers/xtts/xtts_manager.py @@ -3,7 +3,7 @@ class SpeakerManager: def __init__(self, speaker_file_path=None): - self.speakers = torch.load(speaker_file_path) + self.speakers = torch.load(speaker_file_path, weights_only=True) @property def name_to_id(self): diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index 277369e644..49c48c2bd4 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -107,7 +107,7 @@ def update_mean_std(self, statistics_dict: Dict): def preprocess_batch(self, text, text_len, mels, mel_len): if self.mean.item() == 0 or self.std.item() == 1: - statistics_dict = torch.load(self.mel_statistics_parameter_path) + statistics_dict = torch.load(self.mel_statistics_parameter_path, weights_only=True) self.update_mean_std(statistics_dict) mels = self.normalize(mels) @@ -292,7 +292,7 @@ def on_init_start(self, trainer): "Data parameters found for: %s. Loading mel normalization parameters...", trainer.config.mel_statistics_parameter_path, ) - statistics = torch.load(trainer.config.mel_statistics_parameter_path) + statistics = torch.load(trainer.config.mel_statistics_parameter_path, weights_only=True) data_mean, data_std, init_transition_prob = ( statistics["mean"], statistics["std"], diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index b05b75009b..4c0f341be3 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -120,7 +120,7 @@ def update_mean_std(self, statistics_dict: Dict): def preprocess_batch(self, text, text_len, mels, mel_len): if self.mean.item() == 0 or self.std.item() == 1: - statistics_dict = torch.load(self.mel_statistics_parameter_path) + statistics_dict = torch.load(self.mel_statistics_parameter_path, weights_only=True) self.update_mean_std(statistics_dict) mels = self.normalize(mels) @@ -308,7 +308,7 @@ def on_init_start(self, trainer): "Data parameters found for: %s. Loading mel normalization parameters...", trainer.config.mel_statistics_parameter_path, ) - statistics = torch.load(trainer.config.mel_statistics_parameter_path) + statistics = torch.load(trainer.config.mel_statistics_parameter_path, weights_only=True) data_mean, data_std, init_transition_prob = ( statistics["mean"], statistics["std"], diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py index 17303c69f7..98e79d0cf1 100644 --- a/TTS/tts/models/tortoise.py +++ b/TTS/tts/models/tortoise.py @@ -170,7 +170,9 @@ def classify_audio_clip(clip, model_dir): kernel_size=5, distribute_zero_label=False, ) - classifier.load_state_dict(torch.load(os.path.join(model_dir, "classifier.pth"), map_location=torch.device("cpu"))) + classifier.load_state_dict( + torch.load(os.path.join(model_dir, "classifier.pth"), map_location=torch.device("cpu"), weights_only=True) + ) clip = clip.cpu().unsqueeze(0) results = F.softmax(classifier(clip), dim=-1) return results[0][0] @@ -488,6 +490,7 @@ def get_random_conditioning_latents(self): torch.load( os.path.join(self.models_dir, "rlg_auto.pth"), map_location=torch.device("cpu"), + weights_only=True, ) ) self.rlg_diffusion = RandomLatentConverter(2048).eval() @@ -495,6 +498,7 @@ def get_random_conditioning_latents(self): torch.load( os.path.join(self.models_dir, "rlg_diffuser.pth"), map_location=torch.device("cpu"), + weights_only=True, ) ) with torch.no_grad(): @@ -881,17 +885,17 @@ def load_checkpoint( if os.path.exists(ar_path): # remove keys from the checkpoint that are not in the model - checkpoint = torch.load(ar_path, map_location=torch.device("cpu")) + checkpoint = torch.load(ar_path, map_location=torch.device("cpu"), weights_only=True) # strict set False # due to removed `bias` and `masked_bias` changes in Transformers self.autoregressive.load_state_dict(checkpoint, strict=False) if os.path.exists(diff_path): - self.diffusion.load_state_dict(torch.load(diff_path), strict=strict) + self.diffusion.load_state_dict(torch.load(diff_path, weights_only=True), strict=strict) if os.path.exists(clvp_path): - self.clvp.load_state_dict(torch.load(clvp_path), strict=strict) + self.clvp.load_state_dict(torch.load(clvp_path, weights_only=True), strict=strict) if os.path.exists(vocoder_checkpoint_path): self.vocoder.load_state_dict( @@ -899,6 +903,7 @@ def load_checkpoint( torch.load( vocoder_checkpoint_path, map_location=torch.device("cpu"), + weights_only=True, ) ) ) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index ef09344217..0b7652e450 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -65,7 +65,7 @@ def wav_to_mel_cloning( mel = mel_stft(wav) mel = torch.log(torch.clamp(mel, min=1e-5)) if mel_norms is None: - mel_norms = torch.load(mel_norms_file, map_location=device) + mel_norms = torch.load(mel_norms_file, map_location=device, weights_only=True) mel = mel / mel_norms.unsqueeze(0).unsqueeze(-1) return mel diff --git a/TTS/tts/utils/fairseq.py b/TTS/tts/utils/fairseq.py index 3d8eec2b4e..6eb1905d96 100644 --- a/TTS/tts/utils/fairseq.py +++ b/TTS/tts/utils/fairseq.py @@ -2,7 +2,7 @@ def rehash_fairseq_vits_checkpoint(checkpoint_file): - chk = torch.load(checkpoint_file, map_location=torch.device("cpu"))["model"] + chk = torch.load(checkpoint_file, map_location=torch.device("cpu"), weights_only=True)["model"] new_chk = {} for k, v in chk.items(): if "enc_p." in k: diff --git a/TTS/tts/utils/managers.py b/TTS/tts/utils/managers.py index 23aa52a8a2..6f72581c08 100644 --- a/TTS/tts/utils/managers.py +++ b/TTS/tts/utils/managers.py @@ -17,7 +17,7 @@ def load_file(path: str): return json.load(f) elif path.endswith(".pth"): with fsspec.open(path, "rb") as f: - return torch.load(f, map_location="cpu") + return torch.load(f, map_location="cpu", weights_only=True) else: raise ValueError("Unsupported file type") diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/modules/freevc/wavlm/__init__.py index 03b2f5827b..528fade772 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/modules/freevc/wavlm/__init__.py @@ -26,7 +26,7 @@ def get_wavlm(device="cpu"): logger.info("Downloading WavLM model to %s ...", output_path) urllib.request.urlretrieve(model_uri, output_path) - checkpoint = torch.load(output_path, map_location=torch.device(device)) + checkpoint = torch.load(output_path, map_location=torch.device(device), weights_only=True) cfg = WavLMConfig(checkpoint["cfg"]) wavlm = WavLM(cfg).to(device) wavlm.load_state_dict(checkpoint["model"]) diff --git a/notebooks/TestAttention.ipynb b/notebooks/TestAttention.ipynb index d85ca1035a..f52fa028e5 100644 --- a/notebooks/TestAttention.ipynb +++ b/notebooks/TestAttention.ipynb @@ -119,9 +119,9 @@ "\n", "# load model state\n", "if use_cuda:\n", - " cp = torch.load(MODEL_PATH)\n", + " cp = torch.load(MODEL_PATH, weights_only=True)\n", "else:\n", - " cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage)\n", + " cp = torch.load(MODEL_PATH, map_location=lambda storage, loc: storage, weights_only=True)\n", "\n", "# load the model\n", "model.load_state_dict(cp['model'])\n", From 86b58fb6d992a2250313481580be4a3eab4fa28f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Sep 2024 17:04:10 +0200 Subject: [PATCH 150/255] fix: define torch safe globals for torch.load Required for loading some models using torch.load(..., weights_only=True). This is only available from Pytorch 2.4 --- TTS/__init__.py | 26 ++++++++++++++++++++++++++ TTS/utils/synthesizer.py | 3 --- pyproject.toml | 4 ++-- 3 files changed, 28 insertions(+), 5 deletions(-) diff --git a/TTS/__init__.py b/TTS/__init__.py index 9e87bca4be..64c7369bc0 100644 --- a/TTS/__init__.py +++ b/TTS/__init__.py @@ -1,3 +1,29 @@ +import _codecs import importlib.metadata +from collections import defaultdict + +import numpy as np +import torch + +from TTS.config.shared_configs import BaseDatasetConfig +from TTS.tts.configs.xtts_config import XttsConfig +from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig +from TTS.utils.radam import RAdam __version__ = importlib.metadata.version("coqui-tts") + + +torch.serialization.add_safe_globals([dict, defaultdict, RAdam]) + +# Bark +torch.serialization.add_safe_globals( + [ + np.core.multiarray.scalar, + np.dtype, + np.dtypes.Float64DType, + _codecs.encode, # TODO: safe by default from Pytorch 2.5 + ] +) + +# XTTS +torch.serialization.add_safe_globals([BaseDatasetConfig, XttsConfig, XttsAudioConfig, XttsArgs]) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 50a7893047..90af4f48f9 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -12,9 +12,6 @@ from TTS.tts.configs.vits_config import VitsConfig from TTS.tts.models import setup_model as setup_tts_model from TTS.tts.models.vits import Vits - -# pylint: disable=unused-wildcard-import -# pylint: disable=wildcard-import from TTS.tts.utils.synthesis import synthesis, transfer_voice, trim_silence from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import save_wav diff --git a/pyproject.toml b/pyproject.toml index 94ed3a2c36..371d0b10dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,10 +44,10 @@ classifiers = [ ] dependencies = [ # Core - "numpy>=1.24.3,<2.0.0", # TODO: remove upper bound after spacy/thinc release + "numpy>=1.25.2,<2.0.0", # TODO: remove upper bound after spacy/thinc release "cython>=0.29.30", "scipy>=1.11.2", - "torch>=2.1", + "torch>=2.4", "torchaudio", "soundfile>=0.12.0", "librosa>=0.10.1", From 659b4852ba87ad21185b67691eddedbeb47316c8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Sep 2024 20:52:07 +0200 Subject: [PATCH 151/255] chore(bark): remove manual download of hubert model Bark was previously adapted to download Hubert from HuggingFace, so the manual download is superfluous. --- TTS/.models.json | 1 - TTS/tts/configs/bark_config.py | 1 - TTS/tts/layers/bark/hubert/kmeans_hubert.py | 2 +- TTS/tts/layers/bark/inference_funcs.py | 3 +-- TTS/tts/models/bark.py | 3 --- 5 files changed, 2 insertions(+), 8 deletions(-) diff --git a/TTS/.models.json b/TTS/.models.json index a77ebea1cf..a5add6e34f 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -48,7 +48,6 @@ "https://coqui.gateway.scarf.sh/hf/bark/fine_2.pt", "https://coqui.gateway.scarf.sh/hf/bark/text_2.pt", "https://coqui.gateway.scarf.sh/hf/bark/config.json", - "https://coqui.gateway.scarf.sh/hf/bark/hubert.pt", "https://coqui.gateway.scarf.sh/hf/bark/tokenizer.pth" ], "default_vocoder": null, diff --git a/TTS/tts/configs/bark_config.py b/TTS/tts/configs/bark_config.py index 3b893558aa..b846febe85 100644 --- a/TTS/tts/configs/bark_config.py +++ b/TTS/tts/configs/bark_config.py @@ -96,7 +96,6 @@ def __post_init__(self): "coarse": os.path.join(self.CACHE_DIR, "coarse_2.pt"), "fine": os.path.join(self.CACHE_DIR, "fine_2.pt"), "hubert_tokenizer": os.path.join(self.CACHE_DIR, "tokenizer.pth"), - "hubert": os.path.join(self.CACHE_DIR, "hubert.pt"), } self.SMALL_REMOTE_MODEL_PATHS = { "text": {"path": os.path.join(self.REMOTE_BASE_URL, "text.pt")}, diff --git a/TTS/tts/layers/bark/hubert/kmeans_hubert.py b/TTS/tts/layers/bark/hubert/kmeans_hubert.py index 9e487b1e9d..58a614cb87 100644 --- a/TTS/tts/layers/bark/hubert/kmeans_hubert.py +++ b/TTS/tts/layers/bark/hubert/kmeans_hubert.py @@ -40,7 +40,7 @@ class CustomHubert(nn.Module): or you can train your own """ - def __init__(self, checkpoint_path, target_sample_hz=16000, seq_len_multiple_of=None, output_layer=9, device=None): + def __init__(self, target_sample_hz=16000, seq_len_multiple_of=None, output_layer=9, device=None): super().__init__() self.target_sample_hz = target_sample_hz self.seq_len_multiple_of = seq_len_multiple_of diff --git a/TTS/tts/layers/bark/inference_funcs.py b/TTS/tts/layers/bark/inference_funcs.py index b2875c7a83..65c7800dcf 100644 --- a/TTS/tts/layers/bark/inference_funcs.py +++ b/TTS/tts/layers/bark/inference_funcs.py @@ -134,10 +134,9 @@ def generate_voice( # generate semantic tokens # Load the HuBERT model hubert_manager = HubertManager() - # hubert_manager.make_sure_hubert_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert"]) hubert_manager.make_sure_tokenizer_installed(model_path=model.config.LOCAL_MODEL_PATHS["hubert_tokenizer"]) - hubert_model = CustomHubert(checkpoint_path=model.config.LOCAL_MODEL_PATHS["hubert"]).to(model.device) + hubert_model = CustomHubert().to(model.device) # Load the CustomTokenizer model tokenizer = HubertTokenizer.load_from_checkpoint( diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index cdfb5efae4..ced8f60ed8 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -243,7 +243,6 @@ def load_checkpoint( text_model_path=None, coarse_model_path=None, fine_model_path=None, - hubert_model_path=None, hubert_tokenizer_path=None, eval=False, strict=True, @@ -266,13 +265,11 @@ def load_checkpoint( text_model_path = text_model_path or os.path.join(checkpoint_dir, "text_2.pt") coarse_model_path = coarse_model_path or os.path.join(checkpoint_dir, "coarse_2.pt") fine_model_path = fine_model_path or os.path.join(checkpoint_dir, "fine_2.pt") - hubert_model_path = hubert_model_path or os.path.join(checkpoint_dir, "hubert.pt") hubert_tokenizer_path = hubert_tokenizer_path or os.path.join(checkpoint_dir, "tokenizer.pth") self.config.LOCAL_MODEL_PATHS["text"] = text_model_path self.config.LOCAL_MODEL_PATHS["coarse"] = coarse_model_path self.config.LOCAL_MODEL_PATHS["fine"] = fine_model_path - self.config.LOCAL_MODEL_PATHS["hubert"] = hubert_model_path self.config.LOCAL_MODEL_PATHS["hubert_tokenizer"] = hubert_tokenizer_path self.load_bark_models() From f5e21489e53789a41379c2834829e31f77c67624 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Sep 2024 22:39:32 +0200 Subject: [PATCH 152/255] ci: explicitly upload hidden files for coverage Due to breaking change in upload-artifact action: actions/upload-artifact#602 --- .github/workflows/tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 88cc8e7949..cdb30ea0e0 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -55,6 +55,7 @@ jobs: - name: Upload coverage data uses: actions/upload-artifact@v4 with: + include-hidden-files: true name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} path: .coverage.* if-no-files-found: ignore From 0a1841828613b32ff4187f9263e7cc63d430e70a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 13 Sep 2024 11:12:14 +0100 Subject: [PATCH 153/255] build: allow numpy2, which should be supported in spacy 3.8 now (#81) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 371d0b10dc..3919d1d0f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ classifiers = [ ] dependencies = [ # Core - "numpy>=1.25.2,<2.0.0", # TODO: remove upper bound after spacy/thinc release + "numpy>=1.25.2", "cython>=0.29.30", "scipy>=1.11.2", "torch>=2.4", From 3e8125c99f6f8182b70d855dd216fd87954db757 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 17 Sep 2024 12:00:39 +0200 Subject: [PATCH 154/255] ci: switch to cibuildwheel --- .github/workflows/pypi-release.yml | 50 +++++++++--------------------- pyproject.toml | 4 +++ 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index efe4bf71d4..822990e967 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -8,6 +8,7 @@ defaults: bash jobs: build-sdist: + name: Build source distribution runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -23,37 +24,31 @@ jobs: with: python-version: 3.9 - run: | - python -m pip install -U pip setuptools wheel build + python -m pip install -U pip setuptools build - run: | python -m build - run: | pip install dist/*.tar.gz - uses: actions/upload-artifact@v4 with: - name: sdist + name: build-sdist path: dist/*.tar.gz build-wheels: - runs-on: ubuntu-latest + name: Build wheels on ${{ matrix.os }} + runs-on: ${{ matrix.os }} strategy: matrix: - python-version: ["3.9", "3.10", "3.11", "3.12"] + os: [ubuntu-latest, windows-latest, macos-latest] steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - name: Install build requirements - run: | - python -m pip install -U pip setuptools wheel build numpy cython - - name: Setup and install manylinux1_x86_64 wheel - run: | - python setup.py bdist_wheel --plat-name=manylinux1_x86_64 - python -m pip install dist/*-manylinux*.whl + - name: Build wheels + uses: pypa/cibuildwheel@v2.21.1 - uses: actions/upload-artifact@v4 with: - name: wheel-${{ matrix.python-version }} - path: dist/*-manylinux*.whl + name: build-wheels-${{ matrix.os }} + path: ./wheelhouse/*.whl publish-artifacts: + name: Publish to PyPI runs-on: ubuntu-latest needs: [build-sdist, build-wheels] environment: @@ -62,28 +57,11 @@ jobs: permissions: id-token: write steps: - - run: | - mkdir dist - - uses: actions/download-artifact@v4 - with: - name: "sdist" - path: "dist/" - - uses: actions/download-artifact@v4 - with: - name: "wheel-3.9" - path: "dist/" - - uses: actions/download-artifact@v4 - with: - name: "wheel-3.10" - path: "dist/" - - uses: actions/download-artifact@v4 - with: - name: "wheel-3.11" - path: "dist/" - uses: actions/download-artifact@v4 with: - name: "wheel-3.12" - path: "dist/" + path: dist + pattern: build-* + merge-multiple: true - run: | ls -lh dist/ - name: Publish package distributions to PyPI diff --git a/pyproject.toml b/pyproject.toml index 3919d1d0f3..b9258247f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -205,3 +205,7 @@ target-version = ['py39'] [tool.coverage.run] parallel = true source = ["TTS"] + +[tool.cibuildwheel] +build = "cp*" +skip = "*-win32 *i686 *musllinux*" From 36611a7192cdc55e5471835bda6dae9c0aaf80d3 Mon Sep 17 00:00:00 2001 From: Shavit Date: Wed, 2 Oct 2024 11:01:19 -0400 Subject: [PATCH 155/255] feat: normalize unicode characters in text cleaners (#85) * Add normalizer type C to text cleaners * Linter recommendations * Add unicode normalize to every cleaner * Format test_text_cleaners.py --- TTS/tts/utils/text/cleaners.py | 20 ++++++++++++++++-- tests/text_tests/test_text_cleaners.py | 29 +++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/TTS/tts/utils/text/cleaners.py b/TTS/tts/utils/text/cleaners.py index fc87025f00..f496b9f0dd 100644 --- a/TTS/tts/utils/text/cleaners.py +++ b/TTS/tts/utils/text/cleaners.py @@ -1,9 +1,8 @@ """Set of default text cleaners""" -# TODO: pick the cleaner for languages dynamically - import re from typing import Optional +from unicodedata import normalize from anyascii import anyascii @@ -83,6 +82,7 @@ def replace_symbols(text: str, lang: Optional[str] = "en") -> str: def basic_cleaners(text: str) -> str: """Basic pipeline that lowercases and collapses whitespace without transliteration.""" + text = normalize_unicode(text) text = lowercase(text) text = collapse_whitespace(text) return text @@ -90,6 +90,7 @@ def basic_cleaners(text: str) -> str: def transliteration_cleaners(text: str) -> str: """Pipeline for non-English text that transliterates to ASCII.""" + text = normalize_unicode(text) # text = convert_to_ascii(text) text = lowercase(text) text = collapse_whitespace(text) @@ -98,6 +99,7 @@ def transliteration_cleaners(text: str) -> str: def basic_german_cleaners(text: str) -> str: """Pipeline for German text""" + text = normalize_unicode(text) text = lowercase(text) text = collapse_whitespace(text) return text @@ -106,6 +108,7 @@ def basic_german_cleaners(text: str) -> str: # TODO: elaborate it def basic_turkish_cleaners(text: str) -> str: """Pipeline for Turkish text""" + text = normalize_unicode(text) text = text.replace("I", "ı") text = lowercase(text) text = collapse_whitespace(text) @@ -114,6 +117,7 @@ def basic_turkish_cleaners(text: str) -> str: def english_cleaners(text: str) -> str: """Pipeline for English text, including number and abbreviation expansion.""" + text = normalize_unicode(text) # text = convert_to_ascii(text) text = lowercase(text) text = expand_time_english(text) @@ -131,6 +135,7 @@ def phoneme_cleaners(text: str) -> str: NB: This cleaner converts numbers into English words, for other languages use multilingual_phoneme_cleaners(). """ + text = normalize_unicode(text) text = en_normalize_numbers(text) text = expand_abbreviations(text) text = replace_symbols(text) @@ -141,6 +146,7 @@ def phoneme_cleaners(text: str) -> str: def multilingual_phoneme_cleaners(text: str) -> str: """Pipeline for phonemes mode, including number and abbreviation expansion.""" + text = normalize_unicode(text) text = replace_symbols(text, lang=None) text = remove_aux_symbols(text) text = collapse_whitespace(text) @@ -149,6 +155,7 @@ def multilingual_phoneme_cleaners(text: str) -> str: def french_cleaners(text: str) -> str: """Pipeline for French text. There is no need to expand numbers, phonemizer already does that""" + text = normalize_unicode(text) text = expand_abbreviations(text, lang="fr") text = lowercase(text) text = replace_symbols(text, lang="fr") @@ -160,6 +167,7 @@ def french_cleaners(text: str) -> str: def portuguese_cleaners(text: str) -> str: """Basic pipeline for Portuguese text. There is no need to expand abbreviation and numbers, phonemizer already does that""" + text = normalize_unicode(text) text = lowercase(text) text = replace_symbols(text, lang="pt") text = remove_aux_symbols(text) @@ -169,12 +177,14 @@ def portuguese_cleaners(text: str) -> str: def chinese_mandarin_cleaners(text: str) -> str: """Basic pipeline for chinese""" + text = normalize_unicode(text) text = replace_numbers_to_characters_in_text(text) return text def multilingual_cleaners(text: str) -> str: """Pipeline for multilingual text""" + text = normalize_unicode(text) text = lowercase(text) text = replace_symbols(text, lang=None) text = remove_aux_symbols(text) @@ -186,3 +196,9 @@ def no_cleaners(text: str) -> str: # remove newline characters text = text.replace("\n", "") return text + + +def normalize_unicode(text: str) -> str: + """Normalize Unicode characters.""" + text = normalize("NFC", text) + return text diff --git a/tests/text_tests/test_text_cleaners.py b/tests/text_tests/test_text_cleaners.py index bf0c8d5d8a..9be1f0bf41 100644 --- a/tests/text_tests/test_text_cleaners.py +++ b/tests/text_tests/test_text_cleaners.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 -from TTS.tts.utils.text.cleaners import english_cleaners, multilingual_phoneme_cleaners, phoneme_cleaners +from TTS.tts.utils.text.cleaners import ( + english_cleaners, + multilingual_phoneme_cleaners, + normalize_unicode, + phoneme_cleaners, +) def test_time() -> None: @@ -24,3 +29,25 @@ def test_expand_numbers() -> None: def test_multilingual_phoneme_cleaners() -> None: assert multilingual_phoneme_cleaners("(Hello)") == "Hello" assert multilingual_phoneme_cleaners("1:") == "1," + + +def test_normalize_unicode() -> None: + test_cases = [ + ("Häagen-Dazs", "Häagen-Dazs"), + ("你好!", "你好!"), + ("𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼", "𝔄𝔅ℭ⓵⓶⓷︷,︸,i⁹,i₉,㌀,¼"), + ("é", "é"), + ("e\u0301", "é"), + ("a\u0300", "à"), + ("a\u0327", "a̧"), + ("na\u0303", "nã"), + ("o\u0302u", "ôu"), + ("n\u0303", "ñ"), + ("\u4E2D\u56FD", "中国"), + ("niño", "niño"), + ("a\u0308", "ä"), + ("\u3053\u3093\u306b\u3061\u306f", "こんにちは"), + ("\u03B1\u03B2", "αβ"), + ] + for arg, expect in test_cases: + assert normalize_unicode(arg) == expect From f75d0952f1e0a36b811b8f789306a32da76529d5 Mon Sep 17 00:00:00 2001 From: Kolja Beigel Date: Fri, 4 Oct 2024 12:02:59 +0200 Subject: [PATCH 156/255] fix(build): restrict spacy version to unbreak installation (#92) * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * Update pyproject.toml * build: simplify requirement restrictions --------- Co-authored-by: Enno Hermann --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3919d1d0f3..7c297fbffb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -2,7 +2,7 @@ requires = [ "setuptools", "setuptools-scm", - "cython~=0.29.30", + "cython>=3.0.0", "numpy>=2.0.0", ] build-backend = "setuptools.build_meta" @@ -45,7 +45,7 @@ classifiers = [ dependencies = [ # Core "numpy>=1.25.2", - "cython>=0.29.30", + "cython>=3.0.0", "scipy>=1.11.2", "torch>=2.4", "torchaudio", @@ -73,7 +73,7 @@ dependencies = [ "encodec>=0.1.1", # XTTS "num2words>=0.5.11", - "spacy[ja]>=3" + "spacy[ja]>=3,<3.8", ] [project.optional-dependencies] From 6c2e0be0b6c6ed085544fc01ae238fbee1fe908e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 4 Oct 2024 12:05:27 +0200 Subject: [PATCH 157/255] chore: bump version to 0.24.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b9258247f4..c0d11a365f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["TTS*"] [project] name = "coqui-tts" -version = "0.24.1" +version = "0.24.2" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" From 018d4ba1db2c6327c595d96f83848a5bd6a98ebd Mon Sep 17 00:00:00 2001 From: Johnny Street Date: Sat, 5 Oct 2024 17:00:12 -0400 Subject: [PATCH 158/255] fix(xtts): support transformers>=4.43.0 in streaming inference --- TTS/tts/layers/xtts/stream_generator.py | 82 +++++++++++++++++++------ pyproject.toml | 2 +- 2 files changed, 64 insertions(+), 20 deletions(-) diff --git a/TTS/tts/layers/xtts/stream_generator.py b/TTS/tts/layers/xtts/stream_generator.py index efc92a04ef..44cf940c69 100644 --- a/TTS/tts/layers/xtts/stream_generator.py +++ b/TTS/tts/layers/xtts/stream_generator.py @@ -20,8 +20,10 @@ PhrasalConstraint, PreTrainedModel, StoppingCriteriaList, + TemperatureLogitsWarper, + TopKLogitsWarper, + TopPLogitsWarper, ) -from transformers.generation.stopping_criteria import validate_stopping_criteria from transformers.generation.utils import GenerateOutput, SampleOutput, logger @@ -152,7 +154,18 @@ def generate( # noqa: PLR0911 # 2. Set generation parameters if not already defined logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList() stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList() - kwargs_has_attention_mask = model_kwargs.get("attention_mask", None) is not None + + if generation_config.pad_token_id is None and generation_config.eos_token_id is not None: + if model_kwargs.get("attention_mask", None) is None: + logger.warning( + "The attention mask and the pad token id were not set. As a consequence, you may observe " + "unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results." + ) + eos_token_id = generation_config.eos_token_id + if isinstance(eos_token_id, list): + eos_token_id = eos_token_id[0] + logger.warning(f"Setting `pad_token_id` to `eos_token_id`:{eos_token_id} for open-end generation.") + generation_config.pad_token_id = eos_token_id # 3. Define model inputs # inputs_tensor has to be defined @@ -164,22 +177,38 @@ def generate( # noqa: PLR0911 ) batch_size = inputs_tensor.shape[0] - device = inputs_tensor.device - self._prepare_special_tokens(generation_config, kwargs_has_attention_mask, device=device) - # 4. Define other model kwargs model_kwargs["output_attentions"] = generation_config.output_attentions model_kwargs["output_hidden_states"] = generation_config.output_hidden_states model_kwargs["use_cache"] = generation_config.use_cache + model_kwargs["cache_position"] = torch.Tensor([0]).to(inputs_tensor.device) accepts_attention_mask = "attention_mask" in set(inspect.signature(self.forward).parameters.keys()) requires_attention_mask = "encoder_outputs" not in model_kwargs - if not kwargs_has_attention_mask and requires_attention_mask and accepts_attention_mask: + if model_kwargs.get("attention_mask", None) is None and requires_attention_mask and accepts_attention_mask: + setattr( + generation_config, + "_pad_token_tensor", + torch.full( + (inputs_tensor.shape[0], inputs_tensor.shape[1]), + generation_config.pad_token_id, + device=inputs_tensor.device, + ), + ) + setattr( + generation_config, + "_eos_token_tensor", + torch.full( + (inputs_tensor.shape[0], inputs_tensor.shape[1]), + generation_config.eos_token_id, + device=inputs_tensor.device, + ), + ) model_kwargs["attention_mask"] = self._prepare_attention_mask_for_generation( inputs_tensor, - generation_config.pad_token_id, - generation_config.eos_token_id, + generation_config._pad_token_tensor, + generation_config._eos_token_tensor, ) # decoder-only models should use left-padding for generation @@ -202,15 +231,16 @@ def generate( # noqa: PLR0911 # 5. Prepare `input_ids` which will be used for auto-regressive generation if self.config.is_encoder_decoder: - input_ids, model_kwargs = self._prepare_decoder_input_ids_for_generation( - batch_size=batch_size, - model_input_name=model_input_name, - model_kwargs=model_kwargs, + input_ids = self._prepare_decoder_input_ids_for_generation( + batch_size, decoder_start_token_id=generation_config.decoder_start_token_id, + bos_token_id=generation_config.bos_token_id, + model_kwargs=model_kwargs, device=inputs_tensor.device, ) else: - input_ids = inputs_tensor if model_input_name == "input_ids" else model_kwargs.pop("input_ids") + # if decoder-only then inputs_tensor has to be `input_ids` + input_ids = inputs_tensor # 6. Prepare `max_length` depending on other stopping criteria. input_ids_seq_length = input_ids.shape[-1] @@ -376,7 +406,7 @@ def generate( # noqa: PLR0911 elif is_sample_gen_mode: # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, inputs_tensor.device) + logits_warper = _get_logits_warper(generation_config) # 12. expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -401,7 +431,7 @@ def generate( # noqa: PLR0911 ) elif is_sample_gen_stream_mode: # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, inputs_tensor.device) + logits_warper = _get_logits_warper(generation_config) # 12. expand input_ids with `num_return_sequences` additional sequences per batch input_ids, model_kwargs = self._expand_inputs_for_generation( @@ -463,7 +493,7 @@ def generate( # noqa: PLR0911 elif is_beam_sample_gen_mode: # 11. prepare logits warper - logits_warper = self._get_logits_warper(generation_config, inputs_tensor.device) + logits_warper = _get_logits_warper(generation_config) if stopping_criteria.max_length is None: raise ValueError("`max_length` needs to be a stopping_criteria for now.") @@ -877,10 +907,10 @@ def init_stream_support(): if __name__ == "__main__": - from transformers import AutoModelForCausalLM, AutoTokenizer - - init_stream_support() + from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedModel + PreTrainedModel.generate = NewGenerationMixin.generate + PreTrainedModel.sample_stream = NewGenerationMixin.sample_stream model = AutoModelForCausalLM.from_pretrained("bigscience/bloom-560m", torch_dtype=torch.float16) tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m") @@ -920,3 +950,17 @@ def init_stream_support(): chunk = tokenizer.decode(x, skip_special_tokens=True) stream_result += chunk print(stream_result) + + +def _get_logits_warper(generation_config: GenerationConfig) -> LogitsProcessorList: + + warpers = LogitsProcessorList() + + if generation_config.temperature is not None and generation_config.temperature != 1.0: + warpers.append(TemperatureLogitsWarper(generation_config.temperature)) + if generation_config.top_k is not None and generation_config.top_k != 0: + warpers.append(TopKLogitsWarper(top_k=generation_config.top_k, min_tokens_to_keep=1)) + if generation_config.top_p is not None and generation_config.top_p < 1.0: + warpers.append(TopPLogitsWarper(top_p=generation_config.top_p, min_tokens_to_keep=1)) + + return warpers diff --git a/pyproject.toml b/pyproject.toml index 4d01e91b0a..9b2b137d63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,7 @@ dependencies = [ "gruut[de,es,fr]>=2.4.0", # Tortoise "einops>=0.6.0", - "transformers>=4.42.0,<4.43.0", + "transformers>=4.43.0", # Bark "encodec>=0.1.1", # XTTS From a510ec3b68541c984c52a32e18cf7266e22e0ca0 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 20 Oct 2024 14:06:44 +0200 Subject: [PATCH 159/255] build(uv): add constraint on numba to avoid resolution error Otherwise it backtracks to an old numba and then llvmlite version that can't be built: https://github.com/astral-sh/uv/issues/6281 --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9b2b137d63..7bef41aa35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -147,6 +147,9 @@ Discussions = "https://github.com/idiap/coqui-ai-TTS/discussions" tts = "TTS.bin.synthesize:main" tts-server = "TTS.server.server:main" +[tool.uv] +constraint-dependencies = ["numba>0.58.0"] + [tool.ruff] target-version = "py39" line-length = 120 From ad435b5440466e0ef24134aded0cc815e045d115 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 20 Oct 2024 14:16:18 +0200 Subject: [PATCH 160/255] build: again restrict to numpy<2 Some parts of spacy/thinc are still causing issues --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 7bef41aa35..d8aab49417 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,7 +44,7 @@ classifiers = [ ] dependencies = [ # Core - "numpy>=1.25.2", + "numpy>=1.25.2,<2.0", "cython>=3.0.0", "scipy>=1.11.2", "torch>=2.4", From 964b813235e81f6cf5e260cf880e0152c75fa0f4 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 25 Oct 2024 17:50:24 +0200 Subject: [PATCH 161/255] fix(gpt): set attention mask and address other warnings --- TTS/tts/layers/tortoise/autoregressive.py | 38 +++++++++++++++++++++++ TTS/tts/layers/xtts/gpt.py | 4 +++ TTS/tts/layers/xtts/gpt_inference.py | 7 +++-- TTS/tts/models/xtts.py | 1 + 4 files changed, 48 insertions(+), 2 deletions(-) diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py index 14d881bc10..aaae695516 100644 --- a/TTS/tts/layers/tortoise/autoregressive.py +++ b/TTS/tts/layers/tortoise/autoregressive.py @@ -1,14 +1,22 @@ # AGPL: a notification must be added stating that changes have been made to that file. import functools +from typing import Optional import torch import torch.nn as nn import torch.nn.functional as F +import transformers +from packaging.version import Version from transformers import GPT2Config, GPT2PreTrainedModel, LogitsProcessorList from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions from TTS.tts.layers.tortoise.arch_utils import AttentionBlock, TypicalLogitsWarper +if Version(transformers.__version__) >= Version("4.45"): + isin = transformers.pytorch_utils.isin_mps_friendly +else: + isin = torch.isin + def null_position_embeddings(range, dim): return torch.zeros((range.shape[0], range.shape[1], dim), device=range.device) @@ -596,6 +604,8 @@ def inference_speech( max_length = ( trunc_index + self.max_mel_tokens - 1 if max_generate_length is None else trunc_index + max_generate_length ) + stop_token_tensor = torch.tensor(self.stop_mel_token, device=inputs.device, dtype=torch.long) + attention_mask = _prepare_attention_mask_for_generation(inputs, stop_token_tensor, stop_token_tensor) gen = self.inference_model.generate( inputs, bos_token_id=self.start_mel_token, @@ -604,11 +614,39 @@ def inference_speech( max_length=max_length, logits_processor=logits_processor, num_return_sequences=num_return_sequences, + attention_mask=attention_mask, **hf_generate_kwargs, ) return gen[:, trunc_index:] +def _prepare_attention_mask_for_generation( + inputs: torch.Tensor, + pad_token_id: Optional[torch.Tensor], + eos_token_id: Optional[torch.Tensor], +) -> torch.LongTensor: + # No information for attention mask inference -> return default attention mask + default_attention_mask = torch.ones(inputs.shape[:2], dtype=torch.long, device=inputs.device) + if pad_token_id is None: + return default_attention_mask + + is_input_ids = len(inputs.shape) == 2 and inputs.dtype in [torch.int, torch.long] + if not is_input_ids: + return default_attention_mask + + is_pad_token_in_inputs = (pad_token_id is not None) and (isin(elements=inputs, test_elements=pad_token_id).any()) + is_pad_token_not_equal_to_eos_token_id = (eos_token_id is None) or ~( + isin(elements=eos_token_id, test_elements=pad_token_id).any() + ) + can_infer_attention_mask = is_pad_token_in_inputs * is_pad_token_not_equal_to_eos_token_id + attention_mask_from_padding = inputs.ne(pad_token_id).long() + + attention_mask = ( + attention_mask_from_padding * can_infer_attention_mask + default_attention_mask * ~can_infer_attention_mask + ) + return attention_mask + + if __name__ == "__main__": gpt = UnifiedVoice( model_dim=256, diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index b55b84d90e..b3c3b31b47 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -8,6 +8,7 @@ import torch.nn.functional as F from transformers import GPT2Config +from TTS.tts.layers.tortoise.autoregressive import _prepare_attention_mask_for_generation from TTS.tts.layers.xtts.gpt_inference import GPT2InferenceModel from TTS.tts.layers.xtts.latent_encoder import ConditioningEncoder from TTS.tts.layers.xtts.perceiver_encoder import PerceiverResampler @@ -586,12 +587,15 @@ def generate( **hf_generate_kwargs, ): gpt_inputs = self.compute_embeddings(cond_latents, text_inputs) + stop_token_tensor = torch.tensor(self.stop_audio_token, device=gpt_inputs.device, dtype=torch.long) + attention_mask = _prepare_attention_mask_for_generation(gpt_inputs, stop_token_tensor, stop_token_tensor) gen = self.gpt_inference.generate( gpt_inputs, bos_token_id=self.start_audio_token, pad_token_id=self.stop_audio_token, eos_token_id=self.stop_audio_token, max_length=self.max_gen_mel_tokens + gpt_inputs.shape[-1], + attention_mask=attention_mask, **hf_generate_kwargs, ) if "return_dict_in_generate" in hf_generate_kwargs: diff --git a/TTS/tts/layers/xtts/gpt_inference.py b/TTS/tts/layers/xtts/gpt_inference.py index 4625ae1ba9..e94683524a 100644 --- a/TTS/tts/layers/xtts/gpt_inference.py +++ b/TTS/tts/layers/xtts/gpt_inference.py @@ -1,10 +1,12 @@ import torch from torch import nn -from transformers import GPT2PreTrainedModel +from transformers import GenerationMixin, GPT2PreTrainedModel from transformers.modeling_outputs import CausalLMOutputWithCrossAttentions +from TTS.tts.layers.xtts.stream_generator import StreamGenerationConfig -class GPT2InferenceModel(GPT2PreTrainedModel): + +class GPT2InferenceModel(GPT2PreTrainedModel, GenerationMixin): """Override GPT2LMHeadModel to allow for prefix conditioning.""" def __init__(self, config, gpt, pos_emb, embeddings, norm, linear, kv_cache): @@ -15,6 +17,7 @@ def __init__(self, config, gpt, pos_emb, embeddings, norm, linear, kv_cache): self.final_norm = norm self.lm_head = nn.Sequential(norm, linear) self.kv_cache = kv_cache + self.generation_config = StreamGenerationConfig.from_model_config(config) if self.can_generate() else None def store_prefix_emb(self, prefix_emb): self.cached_prefix_emb = prefix_emb diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 0b7652e450..c92db9c1d0 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -667,6 +667,7 @@ def inference_stream( repetition_penalty=float(repetition_penalty), output_attentions=False, output_hidden_states=True, + return_dict_in_generate=True, **hf_generate_kwargs, ) From 47ad0bf19094fc60e016f4069620cf8fd975b889 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 4 Nov 2024 17:40:23 +0100 Subject: [PATCH 162/255] fix(text.characters): add nasal diacritic (#127) --- TTS/tts/utils/text/characters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TTS/tts/utils/text/characters.py b/TTS/tts/utils/text/characters.py index c622b93c59..4bf9bf6bd5 100644 --- a/TTS/tts/utils/text/characters.py +++ b/TTS/tts/utils/text/characters.py @@ -34,8 +34,8 @@ def parse_symbols(): _pulmonic_consonants = "pbtdʈɖcɟkɡqɢʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ" _suprasegmentals = "ˈˌːˑ" _other_symbols = "ʍwɥʜʢʡɕʑɺɧʲ" -_diacrilics = "ɚ˞ɫ" -_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics +_diacritics = "̃ɚ˞ɫ" +_phonemes = _vowels + _non_pulmonic_consonants + _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacritics class BaseVocabulary: From 8e66be2c32c57b4b993f553e7052d5d4600ef244 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 25 Oct 2024 11:15:10 +0200 Subject: [PATCH 163/255] fix: only enable load with weights_only in pytorch>=2.4 Allows moving the minimum Pytorch version back to 2.1 --- TTS/__init__.py | 46 ++++++++++++---------- TTS/tts/layers/bark/load_model.py | 3 +- TTS/tts/layers/tortoise/arch_utils.py | 3 +- TTS/tts/layers/tortoise/audio_utils.py | 3 +- TTS/tts/layers/xtts/dvae.py | 4 +- TTS/tts/layers/xtts/hifigan_decoder.py | 3 +- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 9 ++++- TTS/tts/layers/xtts/xtts_manager.py | 4 +- TTS/tts/models/neuralhmm_tts.py | 8 ++-- TTS/tts/models/overflow.py | 8 ++-- TTS/tts/models/tortoise.py | 19 +++++---- TTS/tts/models/xtts.py | 3 +- TTS/tts/utils/fairseq.py | 4 +- TTS/tts/utils/managers.py | 3 +- TTS/utils/generic_utils.py | 8 ++++ TTS/vc/modules/freevc/wavlm/__init__.py | 3 +- pyproject.toml | 2 +- 17 files changed, 86 insertions(+), 47 deletions(-) diff --git a/TTS/__init__.py b/TTS/__init__.py index 64c7369bc0..8e93c9b5db 100644 --- a/TTS/__init__.py +++ b/TTS/__init__.py @@ -1,29 +1,33 @@ -import _codecs import importlib.metadata -from collections import defaultdict -import numpy as np -import torch - -from TTS.config.shared_configs import BaseDatasetConfig -from TTS.tts.configs.xtts_config import XttsConfig -from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig -from TTS.utils.radam import RAdam +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 __version__ = importlib.metadata.version("coqui-tts") -torch.serialization.add_safe_globals([dict, defaultdict, RAdam]) +if is_pytorch_at_least_2_4(): + import _codecs + from collections import defaultdict + + import numpy as np + import torch + + from TTS.config.shared_configs import BaseDatasetConfig + from TTS.tts.configs.xtts_config import XttsConfig + from TTS.tts.models.xtts import XttsArgs, XttsAudioConfig + from TTS.utils.radam import RAdam + + torch.serialization.add_safe_globals([dict, defaultdict, RAdam]) -# Bark -torch.serialization.add_safe_globals( - [ - np.core.multiarray.scalar, - np.dtype, - np.dtypes.Float64DType, - _codecs.encode, # TODO: safe by default from Pytorch 2.5 - ] -) + # Bark + torch.serialization.add_safe_globals( + [ + np.core.multiarray.scalar, + np.dtype, + np.dtypes.Float64DType, + _codecs.encode, # TODO: safe by default from Pytorch 2.5 + ] + ) -# XTTS -torch.serialization.add_safe_globals([BaseDatasetConfig, XttsConfig, XttsAudioConfig, XttsArgs]) + # XTTS + torch.serialization.add_safe_globals([BaseDatasetConfig, XttsConfig, XttsAudioConfig, XttsArgs]) diff --git a/TTS/tts/layers/bark/load_model.py b/TTS/tts/layers/bark/load_model.py index 7785aab845..72eca30ac6 100644 --- a/TTS/tts/layers/bark/load_model.py +++ b/TTS/tts/layers/bark/load_model.py @@ -10,6 +10,7 @@ from TTS.tts.layers.bark.model import GPT, GPTConfig from TTS.tts.layers.bark.model_fine import FineGPT, FineGPTConfig +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 if ( torch.cuda.is_available() @@ -118,7 +119,7 @@ def load_model(ckpt_path, device, config, model_type="text"): logger.info(f"{model_type} model not found, downloading...") _download(config.REMOTE_MODEL_PATHS[model_type]["path"], ckpt_path, config.CACHE_DIR) - checkpoint = torch.load(ckpt_path, map_location=device, weights_only=True) + checkpoint = torch.load(ckpt_path, map_location=device, weights_only=is_pytorch_at_least_2_4()) # this is a hack model_args = checkpoint["model_args"] if "input_vocab_size" not in model_args: diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index f4dbcc8054..52c2526695 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -9,6 +9,7 @@ from transformers import LogitsWarper from TTS.tts.layers.tortoise.xtransformers import ContinuousTransformerWrapper, RelativePositionBias +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 def zero_module(module): @@ -332,7 +333,7 @@ def __init__( self.mel_norm_file = mel_norm_file if self.mel_norm_file is not None: with fsspec.open(self.mel_norm_file) as f: - self.mel_norms = torch.load(f, weights_only=True) + self.mel_norms = torch.load(f, weights_only=is_pytorch_at_least_2_4()) else: self.mel_norms = None diff --git a/TTS/tts/layers/tortoise/audio_utils.py b/TTS/tts/layers/tortoise/audio_utils.py index 94c2bae6fa..4f299a8fd9 100644 --- a/TTS/tts/layers/tortoise/audio_utils.py +++ b/TTS/tts/layers/tortoise/audio_utils.py @@ -10,6 +10,7 @@ from scipy.io.wavfile import read from TTS.utils.audio.torch_transforms import TorchSTFT +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -124,7 +125,7 @@ def load_voice(voice: str, extra_voice_dirs: List[str] = []): voices = get_voices(extra_voice_dirs) paths = voices[voice] if len(paths) == 1 and paths[0].endswith(".pth"): - return None, torch.load(paths[0], weights_only=True) + return None, torch.load(paths[0], weights_only=is_pytorch_at_least_2_4()) else: conds = [] for cond_path in paths: diff --git a/TTS/tts/layers/xtts/dvae.py b/TTS/tts/layers/xtts/dvae.py index 58f91785a1..73970fb0bf 100644 --- a/TTS/tts/layers/xtts/dvae.py +++ b/TTS/tts/layers/xtts/dvae.py @@ -9,6 +9,8 @@ import torchaudio from einops import rearrange +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 + logger = logging.getLogger(__name__) @@ -46,7 +48,7 @@ def dvae_wav_to_mel( mel = mel_stft(wav) mel = torch.log(torch.clamp(mel, min=1e-5)) if mel_norms is None: - mel_norms = torch.load(mel_norms_file, map_location=device, weights_only=True) + mel_norms = torch.load(mel_norms_file, map_location=device, weights_only=is_pytorch_at_least_2_4()) mel = mel / mel_norms.unsqueeze(0).unsqueeze(-1) return mel diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 09bd06dfde..5ef0030b8b 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -9,6 +9,7 @@ from torch.nn.utils.parametrize import remove_parametrizations from trainer.io import load_fsspec +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 from TTS.vocoder.models.hifigan_generator import get_padding logger = logging.getLogger(__name__) @@ -328,7 +329,7 @@ def remove_weight_norm(self): def load_checkpoint( self, config, checkpoint_path, eval=False, cache=False ): # pylint: disable=unused-argument, redefined-builtin - state = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=True) + state = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=is_pytorch_at_least_2_4()) self.load_state_dict(state["model"]) if eval: self.eval() diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index f1aa6f8cd0..9d9edd5758 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -19,6 +19,7 @@ from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset from TTS.tts.models.base_tts import BaseTTS from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -91,7 +92,9 @@ def __init__(self, config: Coqpit): # load GPT if available if self.args.gpt_checkpoint: - gpt_checkpoint = torch.load(self.args.gpt_checkpoint, map_location=torch.device("cpu"), weights_only=True) + gpt_checkpoint = torch.load( + self.args.gpt_checkpoint, map_location=torch.device("cpu"), weights_only=is_pytorch_at_least_2_4() + ) # deal with coqui Trainer exported model if "model" in gpt_checkpoint.keys() and "config" in gpt_checkpoint.keys(): logger.info("Coqui Trainer checkpoint detected! Converting it!") @@ -184,7 +187,9 @@ def __init__(self, config: Coqpit): self.dvae.eval() if self.args.dvae_checkpoint: - dvae_checkpoint = torch.load(self.args.dvae_checkpoint, map_location=torch.device("cpu"), weights_only=True) + dvae_checkpoint = torch.load( + self.args.dvae_checkpoint, map_location=torch.device("cpu"), weights_only=is_pytorch_at_least_2_4() + ) self.dvae.load_state_dict(dvae_checkpoint, strict=False) logger.info("DVAE weights restored from: %s", self.args.dvae_checkpoint) else: diff --git a/TTS/tts/layers/xtts/xtts_manager.py b/TTS/tts/layers/xtts/xtts_manager.py index 5a3c47aead..8156b35f0d 100644 --- a/TTS/tts/layers/xtts/xtts_manager.py +++ b/TTS/tts/layers/xtts/xtts_manager.py @@ -1,9 +1,11 @@ import torch +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 + class SpeakerManager: def __init__(self, speaker_file_path=None): - self.speakers = torch.load(speaker_file_path, weights_only=True) + self.speakers = torch.load(speaker_file_path, weights_only=is_pytorch_at_least_2_4()) @property def name_to_id(self): diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index 49c48c2bd4..de5401aac7 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -18,7 +18,7 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram -from TTS.utils.generic_utils import format_aux_input +from TTS.utils.generic_utils import format_aux_input, is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -107,7 +107,7 @@ def update_mean_std(self, statistics_dict: Dict): def preprocess_batch(self, text, text_len, mels, mel_len): if self.mean.item() == 0 or self.std.item() == 1: - statistics_dict = torch.load(self.mel_statistics_parameter_path, weights_only=True) + statistics_dict = torch.load(self.mel_statistics_parameter_path, weights_only=is_pytorch_at_least_2_4()) self.update_mean_std(statistics_dict) mels = self.normalize(mels) @@ -292,7 +292,9 @@ def on_init_start(self, trainer): "Data parameters found for: %s. Loading mel normalization parameters...", trainer.config.mel_statistics_parameter_path, ) - statistics = torch.load(trainer.config.mel_statistics_parameter_path, weights_only=True) + statistics = torch.load( + trainer.config.mel_statistics_parameter_path, weights_only=is_pytorch_at_least_2_4() + ) data_mean, data_std, init_transition_prob = ( statistics["mean"], statistics["std"], diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index 4c0f341be3..b72f4877cf 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -19,7 +19,7 @@ from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram -from TTS.utils.generic_utils import format_aux_input +from TTS.utils.generic_utils import format_aux_input, is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -120,7 +120,7 @@ def update_mean_std(self, statistics_dict: Dict): def preprocess_batch(self, text, text_len, mels, mel_len): if self.mean.item() == 0 or self.std.item() == 1: - statistics_dict = torch.load(self.mel_statistics_parameter_path, weights_only=True) + statistics_dict = torch.load(self.mel_statistics_parameter_path, weights_only=is_pytorch_at_least_2_4()) self.update_mean_std(statistics_dict) mels = self.normalize(mels) @@ -308,7 +308,9 @@ def on_init_start(self, trainer): "Data parameters found for: %s. Loading mel normalization parameters...", trainer.config.mel_statistics_parameter_path, ) - statistics = torch.load(trainer.config.mel_statistics_parameter_path, weights_only=True) + statistics = torch.load( + trainer.config.mel_statistics_parameter_path, weights_only=is_pytorch_at_least_2_4() + ) data_mean, data_std, init_transition_prob = ( statistics["mean"], statistics["std"], diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py index 98e79d0cf1..01629b5d2a 100644 --- a/TTS/tts/models/tortoise.py +++ b/TTS/tts/models/tortoise.py @@ -23,6 +23,7 @@ from TTS.tts.layers.tortoise.vocoder import VocConf, VocType from TTS.tts.layers.tortoise.wav2vec_alignment import Wav2VecAlignment from TTS.tts.models.base_tts import BaseTTS +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -171,7 +172,11 @@ def classify_audio_clip(clip, model_dir): distribute_zero_label=False, ) classifier.load_state_dict( - torch.load(os.path.join(model_dir, "classifier.pth"), map_location=torch.device("cpu"), weights_only=True) + torch.load( + os.path.join(model_dir, "classifier.pth"), + map_location=torch.device("cpu"), + weights_only=is_pytorch_at_least_2_4(), + ) ) clip = clip.cpu().unsqueeze(0) results = F.softmax(classifier(clip), dim=-1) @@ -490,7 +495,7 @@ def get_random_conditioning_latents(self): torch.load( os.path.join(self.models_dir, "rlg_auto.pth"), map_location=torch.device("cpu"), - weights_only=True, + weights_only=is_pytorch_at_least_2_4(), ) ) self.rlg_diffusion = RandomLatentConverter(2048).eval() @@ -498,7 +503,7 @@ def get_random_conditioning_latents(self): torch.load( os.path.join(self.models_dir, "rlg_diffuser.pth"), map_location=torch.device("cpu"), - weights_only=True, + weights_only=is_pytorch_at_least_2_4(), ) ) with torch.no_grad(): @@ -885,17 +890,17 @@ def load_checkpoint( if os.path.exists(ar_path): # remove keys from the checkpoint that are not in the model - checkpoint = torch.load(ar_path, map_location=torch.device("cpu"), weights_only=True) + checkpoint = torch.load(ar_path, map_location=torch.device("cpu"), weights_only=is_pytorch_at_least_2_4()) # strict set False # due to removed `bias` and `masked_bias` changes in Transformers self.autoregressive.load_state_dict(checkpoint, strict=False) if os.path.exists(diff_path): - self.diffusion.load_state_dict(torch.load(diff_path, weights_only=True), strict=strict) + self.diffusion.load_state_dict(torch.load(diff_path, weights_only=is_pytorch_at_least_2_4()), strict=strict) if os.path.exists(clvp_path): - self.clvp.load_state_dict(torch.load(clvp_path, weights_only=True), strict=strict) + self.clvp.load_state_dict(torch.load(clvp_path, weights_only=is_pytorch_at_least_2_4()), strict=strict) if os.path.exists(vocoder_checkpoint_path): self.vocoder.load_state_dict( @@ -903,7 +908,7 @@ def load_checkpoint( torch.load( vocoder_checkpoint_path, map_location=torch.device("cpu"), - weights_only=True, + weights_only=is_pytorch_at_least_2_4(), ) ) ) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 0b7652e450..ef2cebee3c 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -16,6 +16,7 @@ from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer, split_sentence from TTS.tts.layers.xtts.xtts_manager import LanguageManager, SpeakerManager from TTS.tts.models.base_tts import BaseTTS +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -65,7 +66,7 @@ def wav_to_mel_cloning( mel = mel_stft(wav) mel = torch.log(torch.clamp(mel, min=1e-5)) if mel_norms is None: - mel_norms = torch.load(mel_norms_file, map_location=device, weights_only=True) + mel_norms = torch.load(mel_norms_file, map_location=device, weights_only=is_pytorch_at_least_2_4()) mel = mel / mel_norms.unsqueeze(0).unsqueeze(-1) return mel diff --git a/TTS/tts/utils/fairseq.py b/TTS/tts/utils/fairseq.py index 6eb1905d96..20907a0532 100644 --- a/TTS/tts/utils/fairseq.py +++ b/TTS/tts/utils/fairseq.py @@ -1,8 +1,10 @@ import torch +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 + def rehash_fairseq_vits_checkpoint(checkpoint_file): - chk = torch.load(checkpoint_file, map_location=torch.device("cpu"), weights_only=True)["model"] + chk = torch.load(checkpoint_file, map_location=torch.device("cpu"), weights_only=is_pytorch_at_least_2_4())["model"] new_chk = {} for k, v in chk.items(): if "enc_p." in k: diff --git a/TTS/tts/utils/managers.py b/TTS/tts/utils/managers.py index 6f72581c08..6a2f7df67b 100644 --- a/TTS/tts/utils/managers.py +++ b/TTS/tts/utils/managers.py @@ -9,6 +9,7 @@ from TTS.config import load_config from TTS.encoder.utils.generic_utils import setup_encoder_model from TTS.utils.audio import AudioProcessor +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 def load_file(path: str): @@ -17,7 +18,7 @@ def load_file(path: str): return json.load(f) elif path.endswith(".pth"): with fsspec.open(path, "rb") as f: - return torch.load(f, map_location="cpu", weights_only=True) + return torch.load(f, map_location="cpu", weights_only=is_pytorch_at_least_2_4()) else: raise ValueError("Unsupported file type") diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 91f8844262..3ee285232f 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -6,6 +6,9 @@ from pathlib import Path from typing import Dict, Optional +import torch +from packaging.version import Version + logger = logging.getLogger(__name__) @@ -131,3 +134,8 @@ def setup_logger( sh = logging.StreamHandler() sh.setFormatter(formatter) lg.addHandler(sh) + + +def is_pytorch_at_least_2_4() -> bool: + """Check if the installed Pytorch version is 2.4 or higher.""" + return Version(torch.__version__) >= Version("2.4") diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/modules/freevc/wavlm/__init__.py index 528fade772..4046e137f5 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/modules/freevc/wavlm/__init__.py @@ -5,6 +5,7 @@ import torch from trainer.io import get_user_data_dir +from TTS.utils.generic_utils import is_pytorch_at_least_2_4 from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig logger = logging.getLogger(__name__) @@ -26,7 +27,7 @@ def get_wavlm(device="cpu"): logger.info("Downloading WavLM model to %s ...", output_path) urllib.request.urlretrieve(model_uri, output_path) - checkpoint = torch.load(output_path, map_location=torch.device(device), weights_only=True) + checkpoint = torch.load(output_path, map_location=torch.device(device), weights_only=is_pytorch_at_least_2_4()) cfg = WavLMConfig(checkpoint["cfg"]) wavlm = WavLM(cfg).to(device) wavlm.load_state_dict(checkpoint["model"]) diff --git a/pyproject.toml b/pyproject.toml index d8aab49417..6299bb0d0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ dependencies = [ "numpy>=1.25.2,<2.0", "cython>=3.0.0", "scipy>=1.11.2", - "torch>=2.4", + "torch>=2.1", "torchaudio", "soundfile>=0.12.0", "librosa>=0.10.1", From ce5c49251898f47ad82f71f7f96d848b42a3c9c0 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 20 Oct 2024 23:09:11 +0200 Subject: [PATCH 164/255] ci: simplify ci by using uv where possible --- .github/workflows/style_check.yml | 16 ++++------ .github/workflows/tests.yml | 40 ++++++++++------------- .gitignore | 2 ++ .pre-commit-config.yaml | 11 ++----- CONTRIBUTING.md | 53 +++++++++++++++++++------------ Dockerfile | 2 +- Makefile | 17 ++++------ README.md | 28 ++++++---------- dockerfiles/Dockerfile.dev | 2 +- pyproject.toml | 24 +++++++------- requirements.dev.txt | 8 ----- scripts/generate_requirements.py | 39 ----------------------- 12 files changed, 89 insertions(+), 153 deletions(-) delete mode 100644 requirements.dev.txt delete mode 100644 scripts/generate_requirements.py diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index c913c233d8..a146213f7c 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -13,17 +13,15 @@ jobs: fail-fast: false matrix: python-version: [3.9] - experimental: [false] steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: Install/upgrade dev dependencies - run: python3 -m pip install -r requirements.dev.txt + version: "0.4.27" + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} - name: Lint check run: make lint diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cdb30ea0e0..be3f1b740b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,17 +16,14 @@ jobs: subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] steps: - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: ${{ matrix.python-version }} - architecture: x64 - cache: 'pip' - cache-dependency-path: 'requirements*' - - name: check OS - run: cat /etc/os-release - - name: set ENV - run: export TRAINER_TELEMETRY=0 + version: "0.4.27" + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} - name: Install Espeak if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) run: | @@ -37,21 +34,17 @@ jobs: sudo apt-get update sudo apt-get install -y --no-install-recommends git make gcc make system-deps - - name: Install/upgrade Python setup deps - run: python3 -m pip install --upgrade pip setuptools wheel uv - name: Replace scarf urls if: contains(fromJSON('["data_tests", "inference_tests", "test_aux", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) run: | sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - - name: Install TTS + - name: Unit tests run: | resolution=highest if [ "${{ matrix.python-version }}" == "3.9" ]; then resolution=lowest-direct fi - python3 -m uv pip install --resolution=$resolution --system "coqui-tts[dev,server,languages] @ ." - - name: Unit tests - run: make ${{ matrix.subset }} + uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }} - name: Upload coverage data uses: actions/upload-artifact@v4 with: @@ -65,18 +58,17 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - uses: actions/setup-python@v5 + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: "3.12" + version: "0.4.27" - uses: actions/download-artifact@v4 with: pattern: coverage-data-* merge-multiple: true - name: Combine coverage run: | - python -Im pip install --upgrade coverage[toml] - - python -Im coverage combine - python -Im coverage html --skip-covered --skip-empty - - python -Im coverage report --format=markdown >> $GITHUB_STEP_SUMMARY + uv python install + uvx coverage combine + uvx coverage html --skip-covered --skip-empty + uvx coverage report --format=markdown >> $GITHUB_STEP_SUMMARY diff --git a/.gitignore b/.gitignore index f9708961e2..d9f992275c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +uv.lock + WadaSNR/ .idea/ *.pyc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f96f6f38ac..92f6f3ab3c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: "https://github.com/pre-commit/pre-commit-hooks" - rev: v4.5.0 + rev: v5.0.0 hooks: - id: check-yaml - id: end-of-file-fixer @@ -11,14 +11,7 @@ repos: - id: black language_version: python3 - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.0 + rev: v0.7.0 hooks: - id: ruff args: [--fix, --exit-non-zero-on-fix] - - repo: local - hooks: - - id: generate_requirements.py - name: generate_requirements.py - language: system - entry: python scripts/generate_requirements.py - files: "pyproject.toml|requirements.*\\.txt|tools/generate_requirements.py" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e93858f27d..d4a8cf0090 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -44,29 +44,37 @@ If you have a new feature, a model to implement, or a bug to squash, go ahead an Please use the following steps to send a ✨**PR**✨. Let us know if you encounter a problem along the way. -The following steps are tested on an Ubuntu system. +The following steps are tested on an Ubuntu system and require +[uv](https://docs.astral.sh/uv/) for virtual environment management. Choose your +preferred [installation +method](https://docs.astral.sh/uv/getting-started/installation/), e.g. the +standalone installer: + +```bash +curl -LsSf https://astral.sh/uv/install.sh | sh +``` 1. Fork 🐸TTS[https://github.com/idiap/coqui-ai-TTS] by clicking the fork button at the top right corner of the project page. 2. Clone 🐸TTS and add the main repo as a new remote named ```upstream```. ```bash - $ git clone git@github.com:/coqui-ai-TTS.git - $ cd coqui-ai-TTS - $ git remote add upstream https://github.com/idiap/coqui-ai-TTS.git + git clone git@github.com:/coqui-ai-TTS.git + cd coqui-ai-TTS + git remote add upstream https://github.com/idiap/coqui-ai-TTS.git ``` 3. Install 🐸TTS for development. ```bash - $ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. - $ make install_dev + make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. + make install_dev ``` 4. Create a new branch with an informative name for your goal. ```bash - $ git checkout -b an_informative_name_for_my_branch + git checkout -b an_informative_name_for_my_branch ``` 5. Implement your changes on your new branch. @@ -75,39 +83,42 @@ The following steps are tested on an Ubuntu system. 7. Add your tests to our test suite under ```tests``` folder. It is important to show that your code works, edge cases are considered, and inform others about the intended use. -8. Run the tests to see how your updates work with the rest of the project. You can repeat this step multiple times as you implement your changes to make sure you are on the right direction. +8. Run the tests to see how your updates work with the rest of the project. You + can repeat this step multiple times as you implement your changes to make + sure you are on the right direction. **NB: running all tests takes a long time, + it is better to leave this to the CI.** ```bash - $ make test # stop at the first error - $ make test_all # run all the tests, report all the errors + uv run make test # stop at the first error + uv run make test_all # run all the tests, report all the errors ``` 9. Format your code. We use ```black``` for code formatting. ```bash - $ make style + make style ``` 10. Run the linter and correct the issues raised. We use ```ruff``` for linting. It helps to enforce a coding standard, offers simple refactoring suggestions. ```bash - $ make lint + make lint ``` 11. When things are good, add new files and commit your changes. ```bash - $ git add my_file1.py my_file2.py ... - $ git commit + git add my_file1.py my_file2.py ... + git commit ``` It's a good practice to regularly sync your local copy of the project with the upstream code to keep up with the recent updates. ```bash - $ git fetch upstream - $ git rebase upstream/main + git fetch upstream + git rebase upstream/main # or for the development version - $ git rebase upstream/dev + git rebase upstream/dev ``` 12. Send a PR to ```dev``` branch. @@ -115,7 +126,7 @@ The following steps are tested on an Ubuntu system. Push your branch to your fork. ```bash - $ git push -u origin an_informative_name_for_my_branch + git push -u origin an_informative_name_for_my_branch ``` Then go to your fork's Github page and click on 'Pull request' to send your ✨**PR**✨. @@ -137,9 +148,9 @@ If you prefer working within a Docker container as your development environment, 2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```. ```bash - $ git clone git@github.com:/coqui-ai-TTS.git - $ cd coqui-ai-TTS - $ git remote add upstream https://github.com/idiap/coqui-ai-TTS.git + git clone git@github.com:/coqui-ai-TTS.git + cd coqui-ai-TTS + git remote add upstream https://github.com/idiap/coqui-ai-TTS.git ``` 3. Build the Docker Image as your development environment (it installs all of the dependencies for you): diff --git a/Dockerfile b/Dockerfile index 05c37d78fa..e9d331bc41 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ RUN rm -rf /root/.cache/pip WORKDIR /root COPY . /root -RUN make install +RUN pip3 install -e .[all] ENTRYPOINT ["tts"] CMD ["--help"] diff --git a/Makefile b/Makefile index 077b4b23e5..1d6867f5e8 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ .DEFAULT_GOAL := help -.PHONY: test system-deps dev-deps style lint install install_dev help docs +.PHONY: test system-deps style lint install install_dev help docs help: @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' @@ -50,27 +50,24 @@ test_failed: ## only run tests failed the last time. coverage run -m nose2 -F -v -B tests style: ## update code style. - black ${target_dirs} + uv run --only-dev black ${target_dirs} lint: ## run linters. - ruff check ${target_dirs} - black ${target_dirs} --check + uv run --only-dev ruff check ${target_dirs} + uv run --only-dev black ${target_dirs} --check system-deps: ## install linux system deps sudo apt-get install -y libsndfile1-dev -dev-deps: ## install development deps - pip install -r requirements.dev.txt - build-docs: ## build the docs cd docs && make clean && make build install: ## install 🐸 TTS - pip install -e .[all] + uv sync --all-extras install_dev: ## install 🐸 TTS for development. - pip install -e .[all,dev] - pre-commit install + uv sync --all-extras + uv run pre-commit install docs: ## build the docs $(MAKE) -C docs clean && $(MAKE) -C docs html diff --git a/README.md b/README.md index c6a1db4fff..507cce9298 100644 --- a/README.md +++ b/README.md @@ -1,16 +1,13 @@ ## 🐸Coqui TTS News - 📣 Fork of the [original, unmaintained repository](https://github.com/coqui-ai/TTS). New PyPI package: [coqui-tts](https://pypi.org/project/coqui-tts) +- 📣 Prebuilt wheels are now also published for Mac and Windows (in addition to Linux as before) for easier installation across platforms. - 📣 ⓍTTSv2 is here with 16 languages and better performance across the board. - 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). - 📣 ⓍTTS can now stream with <200ms latency. - 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/latest/models/xtts.html) - 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/latest/models/bark.html) - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. -- 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://coqui-tts.readthedocs.io/en/latest/models/tortoise.html) - -
- ## @@ -27,7 +24,6 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) [![License]()](https://opensource.org/licenses/MPL-2.0) [![PyPI version](https://badge.fury.io/py/coqui-tts.svg)](https://badge.fury.io/py/coqui-tts) -[![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/idiap/coqui-ai-TTS/blob/main/CODE_OF_CONDUCT.md) [![Downloads](https://pepy.tech/badge/coqui-tts)](https://pepy.tech/project/coqui-tts) [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440) @@ -43,12 +39,11 @@ ______________________________________________________________________ ## 💬 Where to ask questions Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly so that more people can benefit from it. -| Type | Platforms | -| ------------------------------- | --------------------------------------- | -| 🚨 **Bug Reports** | [GitHub Issue Tracker] | -| 🎁 **Feature Requests & Ideas** | [GitHub Issue Tracker] | -| 👩‍💻 **Usage Questions** | [GitHub Discussions] | -| 🗯 **General Discussion** | [GitHub Discussions] or [Discord] | +| Type | Platforms | +| -------------------------------------------- | ----------------------------------- | +| 🚨 **Bug Reports, Feature Requests & Ideas** | [GitHub Issue Tracker] | +| 👩‍💻 **Usage Questions** | [GitHub Discussions] | +| 🗯 **General Discussion** | [GitHub Discussions] or [Discord] | [github issue tracker]: https://github.com/idiap/coqui-ai-TTS/issues [github discussions]: https://github.com/idiap/coqui-ai-TTS/discussions @@ -66,15 +61,10 @@ repository are also still a useful source of information. | 💼 **Documentation** | [ReadTheDocs](https://coqui-tts.readthedocs.io/en/latest/) | 💾 **Installation** | [TTS/README.md](https://github.com/idiap/coqui-ai-TTS/tree/dev#installation)| | 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/idiap/coqui-ai-TTS/blob/main/CONTRIBUTING.md)| -| 📌 **Road Map** | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378) | 🚀 **Released Models** | [Standard models](https://github.com/idiap/coqui-ai-TTS/blob/dev/TTS/.models.json) and [Fairseq models in ~1100 languages](https://github.com/idiap/coqui-ai-TTS#example-text-to-speech-using-fairseq-models-in-1100-languages-)| -| 📰 **Papers** | [TTS Papers](https://github.com/erogol/TTS-papers)| ## Features -- High-performance Deep Learning models for Text2Speech tasks. - - Text2Spec models (Tacotron, Tacotron2, Glow-TTS, SpeedySpeech). - - Speaker Encoder to compute speaker embeddings efficiently. - - Vocoder models (MelGAN, Multiband-MelGAN, GAN-TTS, ParallelWaveGAN, WaveGrad, WaveRNN) +- High-performance Deep Learning models for Text2Speech tasks. See lists of models below. - Fast and efficient model training. - Detailed training logs on the terminal and Tensorboard. - Support for Multi-speaker TTS. @@ -180,8 +170,8 @@ pip install -e .[server,ja] If you are on Ubuntu (Debian), you can also run following commands for installation. ```bash -$ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. -$ make install +make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. +make install ``` If you are on Windows, 👑@GuyPaddock wrote installation instructions diff --git a/dockerfiles/Dockerfile.dev b/dockerfiles/Dockerfile.dev index af0d3fc0cd..b61bc4de94 100644 --- a/dockerfiles/Dockerfile.dev +++ b/dockerfiles/Dockerfile.dev @@ -20,4 +20,4 @@ RUN rm -rf /root/.cache/pip WORKDIR /root COPY . /root -RUN make install +RUN pip3 install -e .[all,dev] diff --git a/pyproject.toml b/pyproject.toml index 6299bb0d0a..c3ec0075d0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,7 +48,7 @@ dependencies = [ "cython>=3.0.0", "scipy>=1.11.2", "torch>=2.1", - "torchaudio", + "torchaudio>=2.1.0", "soundfile>=0.12.0", "librosa>=0.10.1", "inflect>=5.6.0", @@ -77,15 +77,6 @@ dependencies = [ ] [project.optional-dependencies] -# Development dependencies -dev = [ - "black==24.2.0", - "coverage[toml]>=7", - "nose2>=0.15", - "pre-commit>=3", - "ruff==0.4.9", - "tomli>=2; python_version < '3.11'", -] # Dependencies for building the documentation docs = [ "furo>=2023.5.20", @@ -115,6 +106,7 @@ ko = [ "hangul_romanize>=0.1.0", "jamo>=0.4.1", "g2pkk>=0.1.1", + "pip>=22.2", ] # Japanese ja = [ @@ -136,6 +128,15 @@ all = [ "coqui-tts[notebooks,server,bn,ja,ko,zh]", ] +[dependency-groups] +dev = [ + "black==24.2.0", + "coverage[toml]>=7", + "nose2>=0.15", + "pre-commit>=3", + "ruff==0.7.0", +] + [project.urls] Homepage = "https://github.com/idiap/coqui-ai-TTS" Documentation = "https://coqui-tts.readthedocs.io" @@ -151,13 +152,12 @@ tts-server = "TTS.server.server:main" constraint-dependencies = ["numba>0.58.0"] [tool.ruff] -target-version = "py39" line-length = 120 +extend-exclude = ["*.ipynb"] lint.extend-select = [ "B033", # duplicate-value "C416", # unnecessary-comprehension "D419", # empty-docstring - "E999", # syntax-error "F401", # unused-import "F704", # yield-outside-function "F706", # return-outside-function diff --git a/requirements.dev.txt b/requirements.dev.txt deleted file mode 100644 index 74ec0cd80c..0000000000 --- a/requirements.dev.txt +++ /dev/null @@ -1,8 +0,0 @@ -# Generated via scripts/generate_requirements.py and pre-commit hook. -# Do not edit this file; modify pyproject.toml instead. -black==24.2.0 -coverage[toml]>=7 -nose2>=0.15 -pre-commit>=3 -ruff==0.4.9 -tomli>=2; python_version < '3.11' diff --git a/scripts/generate_requirements.py b/scripts/generate_requirements.py deleted file mode 100644 index bbd32bafd2..0000000000 --- a/scripts/generate_requirements.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python -"""Generate requirements/*.txt files from pyproject.toml. - -Adapted from: -https://github.com/numpy/numpydoc/blob/e7c6baf00f5f73a4a8f8318d0cb4e04949c9a5d1/tools/generate_requirements.py -""" - -import sys -from pathlib import Path - -try: # standard module since Python 3.11 - import tomllib as toml -except ImportError: - try: # available for older Python via pip - import tomli as toml - except ImportError: - sys.exit("Please install `tomli` first: `pip install tomli`") - -script_pth = Path(__file__) -repo_dir = script_pth.parent.parent -script_relpth = script_pth.relative_to(repo_dir) -header = [ - f"# Generated via {script_relpth.as_posix()} and pre-commit hook.", - "# Do not edit this file; modify pyproject.toml instead.", -] - - -def generate_requirement_file(name: str, req_list: list[str]) -> None: - req_fname = repo_dir / f"requirements.{name}.txt" - req_fname.write_text("\n".join(header + req_list) + "\n") - - -def main() -> None: - pyproject = toml.loads((repo_dir / "pyproject.toml").read_text()) - generate_requirement_file("dev", pyproject["project"]["optional-dependencies"]["dev"]) - - -if __name__ == "__main__": - main() From f6a4d5e4694deaf44b7fcf28eb943dc990c62e7d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 4 Nov 2024 19:04:39 +0100 Subject: [PATCH 165/255] chore: bump version to 0.24.3 [ci skip] --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c3ec0075d0..2a3e6f262c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ include = ["TTS*"] [project] name = "coqui-tts" -version = "0.24.2" +version = "0.24.3" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" From 45b8b5b3c30cdbceca2075341f40933541ea74f6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 5 Nov 2024 23:34:41 +0100 Subject: [PATCH 166/255] build: set upper version limit for trainer (#130) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a3e6f262c..389c0c66b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -62,7 +62,7 @@ dependencies = [ # Training "matplotlib>=3.7.0", # Coqui stack - "coqui-tts-trainer>=0.1.4", + "coqui-tts-trainer>=0.1.4,<0.2.0", "coqpit>=0.0.16", # Gruut + supported languages "gruut[de,es,fr]>=2.4.0", From ef8158d2811296e1d0ea6770f65b8a94fd8365cd Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 6 Nov 2024 10:06:20 +0100 Subject: [PATCH 167/255] build: use group not extra for docs dependencies --- README.md | 4 +--- pyproject.toml | 18 +++++++++--------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 507cce9298..5ca825b6ba 100644 --- a/README.md +++ b/README.md @@ -147,9 +147,7 @@ The following extras allow the installation of optional dependencies: | Name | Description | |------|-------------| -| `all` | All optional dependencies, except `dev` and `docs` | -| `dev` | Development dependencies | -| `docs` | Dependencies for building the documentation | +| `all` | All optional dependencies | | `notebooks` | Dependencies only used in notebooks | | `server` | Dependencies to run the TTS server | | `bn` | Bangla G2P | diff --git a/pyproject.toml b/pyproject.toml index 389c0c66b8..23387fd37d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,15 +77,6 @@ dependencies = [ ] [project.optional-dependencies] -# Dependencies for building the documentation -docs = [ - "furo>=2023.5.20", - "myst-parser==2.0.0", - "sphinx==7.2.5", - "sphinx_inline_tabs>=2023.4.21", - "sphinx_copybutton>=0.1", - "linkify-it-py>=2.0.0", -] # Only used in notebooks notebooks = [ "bokeh==1.4.0", @@ -136,6 +127,15 @@ dev = [ "pre-commit>=3", "ruff==0.7.0", ] +# Dependencies for building the documentation +docs = [ + "furo>=2023.5.20", + "myst-parser==2.0.0", + "sphinx==7.2.5", + "sphinx_inline_tabs>=2023.4.21", + "sphinx_copybutton>=0.1", + "linkify-it-py>=2.0.0", +] [project.urls] Homepage = "https://github.com/idiap/coqui-ai-TTS" From 020a72434ef451b4088a03524f0cec81d3304114 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 6 Nov 2024 10:06:52 +0100 Subject: [PATCH 168/255] ci(readthedocs): build docs with uv Based on: https://github.com/readthedocs/readthedocs.org/blob/7c5143848f3a7709193918fda3dae1344a4af9df/docs/user/build-customization.rst#install-dependencies-with-uv --- .readthedocs.yml | 15 +++++++-------- docs/README.md | 0 docs/requirements.txt | 6 ------ 3 files changed, 7 insertions(+), 14 deletions(-) delete mode 100644 docs/README.md delete mode 100644 docs/requirements.txt diff --git a/.readthedocs.yml b/.readthedocs.yml index e19a4dccb7..355e3485e7 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -9,14 +9,13 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" - -# Optionally set the version of Python and requirements required to build your docs -python: - install: - - path: . - extra_requirements: - - docs + python: "3.12" + commands: + - asdf plugin add uv + - asdf install uv latest + - asdf global uv latest + - uv sync --group docs + - uv run -m sphinx -T -b html -d docs/_build/doctrees -D language=en docs/source $READTHEDOCS_OUTPUT/html # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/docs/README.md b/docs/README.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index 86ccae9cca..0000000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -furo -myst-parser == 2.0.0 -sphinx == 7.2.5 -sphinx_inline_tabs -sphinx_copybutton -linkify-it-py From 0971bc236ea41e22970764b11dacefcd8f2273b8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 7 Nov 2024 00:33:54 +0100 Subject: [PATCH 169/255] refactor: use external package for monotonic alignment --- .../layers/delightful_tts/acoustic_model.py | 3 +- TTS/tts/models/align_tts.py | 3 +- TTS/tts/models/forward_tts.py | 3 +- TTS/tts/models/glow_tts.py | 3 +- TTS/tts/models/vits.py | 3 +- TTS/tts/utils/helpers.py | 74 ------------------- TTS/tts/utils/monotonic_align/__init__.py | 0 TTS/tts/utils/monotonic_align/core.pyx | 47 ------------ pyproject.toml | 1 + 9 files changed, 11 insertions(+), 126 deletions(-) delete mode 100644 TTS/tts/utils/monotonic_align/__init__.py delete mode 100644 TTS/tts/utils/monotonic_align/core.pyx diff --git a/TTS/tts/layers/delightful_tts/acoustic_model.py b/TTS/tts/layers/delightful_tts/acoustic_model.py index 83989f9ba4..3c0e3a3a76 100644 --- a/TTS/tts/layers/delightful_tts/acoustic_model.py +++ b/TTS/tts/layers/delightful_tts/acoustic_model.py @@ -5,6 +5,7 @@ import torch import torch.nn.functional as F from coqpit import Coqpit +from monotonic_alignment_search import maximum_path from torch import nn from TTS.tts.layers.delightful_tts.conformer import Conformer @@ -19,7 +20,7 @@ from TTS.tts.layers.delightful_tts.pitch_adaptor import PitchAdaptor from TTS.tts.layers.delightful_tts.variance_predictor import VariancePredictor from TTS.tts.layers.generic.aligner import AlignmentNetwork -from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask +from TTS.tts.utils.helpers import generate_path, sequence_mask logger = logging.getLogger(__name__) diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index 2d27a57850..1c3d57582e 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -3,6 +3,7 @@ import torch from coqpit import Coqpit +from monotonic_alignment_search import maximum_path from torch import nn from trainer.io import load_fsspec @@ -12,7 +13,7 @@ from TTS.tts.layers.feed_forward.encoder import Encoder from TTS.tts.layers.generic.pos_encoding import PositionalEncoding from TTS.tts.models.base_tts import BaseTTS -from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask +from TTS.tts.utils.helpers import generate_path, sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index 4b74462dd5..e7bc86374d 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -4,6 +4,7 @@ import torch from coqpit import Coqpit +from monotonic_alignment_search import maximum_path from torch import nn from torch.cuda.amp.autocast_mode import autocast from trainer.io import load_fsspec @@ -14,7 +15,7 @@ from TTS.tts.layers.generic.pos_encoding import PositionalEncoding from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor from TTS.tts.models.base_tts import BaseTTS -from TTS.tts.utils.helpers import average_over_durations, generate_path, maximum_path, sequence_mask +from TTS.tts.utils.helpers import average_over_durations, generate_path, sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_avg_energy, plot_avg_pitch, plot_spectrogram diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index 64954d283c..5ea69865b2 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -4,6 +4,7 @@ import torch from coqpit import Coqpit +from monotonic_alignment_search import maximum_path from torch import nn from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F @@ -13,7 +14,7 @@ from TTS.tts.layers.glow_tts.decoder import Decoder from TTS.tts.layers.glow_tts.encoder import Encoder from TTS.tts.models.base_tts import BaseTTS -from TTS.tts.utils.helpers import generate_path, maximum_path, sequence_mask +from TTS.tts.utils.helpers import generate_path, sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.synthesis import synthesis from TTS.tts.utils.text.tokenizer import TTSTokenizer diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index b014e4fdde..af803a0f76 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -11,6 +11,7 @@ import torchaudio from coqpit import Coqpit from librosa.filters import mel as librosa_mel_fn +from monotonic_alignment_search import maximum_path from torch import nn from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F @@ -28,7 +29,7 @@ from TTS.tts.layers.vits.stochastic_duration_predictor import StochasticDurationPredictor from TTS.tts.models.base_tts import BaseTTS from TTS.tts.utils.fairseq import rehash_fairseq_vits_checkpoint -from TTS.tts.utils.helpers import generate_path, maximum_path, rand_segments, segment, sequence_mask +from TTS.tts.utils.helpers import generate_path, rand_segments, segment, sequence_mask from TTS.tts.utils.languages import LanguageManager from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.synthesis import synthesis diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py index 7429d0fcc8..d1722501f7 100644 --- a/TTS/tts/utils/helpers.py +++ b/TTS/tts/utils/helpers.py @@ -3,13 +3,6 @@ from scipy.stats import betabinom from torch.nn import functional as F -try: - from TTS.tts.utils.monotonic_align.core import maximum_path_c - - CYTHON = True -except ModuleNotFoundError: - CYTHON = False - class StandardScaler: """StandardScaler for mean-scale normalization with the given mean and scale values.""" @@ -168,73 +161,6 @@ def generate_path(duration, mask): return path -def maximum_path(value, mask): - if CYTHON: - return maximum_path_cython(value, mask) - return maximum_path_numpy(value, mask) - - -def maximum_path_cython(value, mask): - """Cython optimised version. - Shapes: - - value: :math:`[B, T_en, T_de]` - - mask: :math:`[B, T_en, T_de]` - """ - value = value * mask - device = value.device - dtype = value.dtype - value = value.data.cpu().numpy().astype(np.float32) - path = np.zeros_like(value).astype(np.int32) - mask = mask.data.cpu().numpy() - - t_x_max = mask.sum(1)[:, 0].astype(np.int32) - t_y_max = mask.sum(2)[:, 0].astype(np.int32) - maximum_path_c(path, value, t_x_max, t_y_max) - return torch.from_numpy(path).to(device=device, dtype=dtype) - - -def maximum_path_numpy(value, mask, max_neg_val=None): - """ - Monotonic alignment search algorithm - Numpy-friendly version. It's about 4 times faster than torch version. - value: [b, t_x, t_y] - mask: [b, t_x, t_y] - """ - if max_neg_val is None: - max_neg_val = -np.inf # Patch for Sphinx complaint - value = value * mask - - device = value.device - dtype = value.dtype - value = value.cpu().detach().numpy() - mask = mask.cpu().detach().numpy().astype(bool) - - b, t_x, t_y = value.shape - direction = np.zeros(value.shape, dtype=np.int64) - v = np.zeros((b, t_x), dtype=np.float32) - x_range = np.arange(t_x, dtype=np.float32).reshape(1, -1) - for j in range(t_y): - v0 = np.pad(v, [[0, 0], [1, 0]], mode="constant", constant_values=max_neg_val)[:, :-1] - v1 = v - max_mask = v1 >= v0 - v_max = np.where(max_mask, v1, v0) - direction[:, :, j] = max_mask - - index_mask = x_range <= j - v = np.where(index_mask, v_max + value[:, :, j], max_neg_val) - direction = np.where(mask, direction, 1) - - path = np.zeros(value.shape, dtype=np.float32) - index = mask[:, :, 0].sum(1).astype(np.int64) - 1 - index_range = np.arange(b) - for j in reversed(range(t_y)): - path[index_range, index, j] = 1 - index = index + direction[index_range, index, j] - 1 - path = path * mask.astype(np.float32) - path = torch.from_numpy(path).to(device=device, dtype=dtype) - return path - - def beta_binomial_prior_distribution(phoneme_count, mel_count, scaling_factor=1.0): P, M = phoneme_count, mel_count x = np.arange(0, P) diff --git a/TTS/tts/utils/monotonic_align/__init__.py b/TTS/tts/utils/monotonic_align/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/TTS/tts/utils/monotonic_align/core.pyx b/TTS/tts/utils/monotonic_align/core.pyx deleted file mode 100644 index 091fcc3a50..0000000000 --- a/TTS/tts/utils/monotonic_align/core.pyx +++ /dev/null @@ -1,47 +0,0 @@ -import numpy as np - -cimport cython -cimport numpy as np - -from cython.parallel import prange - - -@cython.boundscheck(False) -@cython.wraparound(False) -cdef void maximum_path_each(int[:,::1] path, float[:,::1] value, int t_x, int t_y, float max_neg_val) nogil: - cdef int x - cdef int y - cdef float v_prev - cdef float v_cur - cdef float tmp - cdef int index = t_x - 1 - - for y in range(t_y): - for x in range(max(0, t_x + y - t_y), min(t_x, y + 1)): - if x == y: - v_cur = max_neg_val - else: - v_cur = value[x, y-1] - if x == 0: - if y == 0: - v_prev = 0. - else: - v_prev = max_neg_val - else: - v_prev = value[x-1, y-1] - value[x, y] = max(v_cur, v_prev) + value[x, y] - - for y in range(t_y - 1, -1, -1): - path[index, y] = 1 - if index != 0 and (index == y or value[index, y-1] < value[index-1, y-1]): - index = index - 1 - - -@cython.boundscheck(False) -@cython.wraparound(False) -cpdef void maximum_path_c(int[:,:,::1] paths, float[:,:,::1] values, int[::1] t_xs, int[::1] t_ys, float max_neg_val=-1e9) nogil: - cdef int b = values.shape[0] - - cdef int i - for i in prange(b, nogil=True): - maximum_path_each(paths[i], values[i], t_xs[i], t_ys[i], max_neg_val) diff --git a/pyproject.toml b/pyproject.toml index 23387fd37d..d13e2145d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ dependencies = [ # Coqui stack "coqui-tts-trainer>=0.1.4,<0.2.0", "coqpit>=0.0.16", + "monotonic-alignment-search>=0.1.0", # Gruut + supported languages "gruut[de,es,fr]>=2.4.0", # Tortoise From 9dd7ae6cca4a7c6db254f6d3c42aebcf34170af5 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 7 Nov 2024 10:32:57 +0100 Subject: [PATCH 170/255] build: switch to hatch Setuptools is not needed anymore because the Cython extension is now built in an external package. --- MANIFEST.in | 10 ---------- pyproject.toml | 49 +++++++++++++++++++++++++++++++++++++++---------- setup.py | 37 ------------------------------------- 3 files changed, 39 insertions(+), 57 deletions(-) delete mode 100644 MANIFEST.in delete mode 100644 setup.py diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index 8d092ceff2..0000000000 --- a/MANIFEST.in +++ /dev/null @@ -1,10 +0,0 @@ -include README.md -include LICENSE.txt -include *.cff -recursive-include TTS *.json -recursive-include TTS *.html -recursive-include TTS *.png -recursive-include TTS *.md -recursive-include TTS *.py -recursive-include TTS *.pyx -recursive-include images *.png diff --git a/pyproject.toml b/pyproject.toml index d13e2145d8..379187feed 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,14 +1,27 @@ -[build-system] -requires = [ - "setuptools", - "setuptools-scm", - "cython>=3.0.0", - "numpy>=2.0.0", -] -build-backend = "setuptools.build_meta" +# ,*++++++*, ,*++++++*, +# *++. .+++ *++. .++* +# *+* ,++++* *+* *+* ,++++, *+* +# ,+, .++++++++++* ,++,,,,*+, ,++++++++++. *+, +# *+. .++++++++++++..++ *+.,++++++++++++. .+* +# .+* ++++++++++++.*+, .+*.++++++++++++ *+, +# .++ *++++++++* ++, .++.*++++++++* ++, +# ,+++*. . .*++, ,++*. .*+++* +# *+, .,*++**. .**++**. ,+* +# .+* *+, +# *+. Coqui .+* +# *+* +++ TTS +++ *+* +# .+++*. . . *+++. +# ,+* *+++*... ...*+++* *+, +# .++. .""""+++++++****+++++++"""". ++. +# ,++. .++, +# .++* *++. +# *+++, ,+++* +# .,*++++::::::++++*,. +# `````` -[tool.setuptools.packages.find] -include = ["TTS*"] +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" [project] name = "coqui-tts" @@ -152,6 +165,22 @@ tts-server = "TTS.server.server:main" [tool.uv] constraint-dependencies = ["numba>0.58.0"] +[tool.hatch.build] +exclude = [ + "/.github", + "/.gitignore", + "/.pre-commit-config.yaml", + "/.readthedocs.yml", + "/Makefile", + "/dockerfiles", + "/run_bash_tests.sh", + "/scripts", + "/tests", +] + +[tool.hatch.build.targets.wheel] +packages = ["TTS"] + [tool.ruff] line-length = 120 extend-exclude = ["*.ipynb"] diff --git a/setup.py b/setup.py deleted file mode 100644 index 1cf2def1d3..0000000000 --- a/setup.py +++ /dev/null @@ -1,37 +0,0 @@ -#!/usr/bin/env python -# ,*++++++*, ,*++++++*, -# *++. .+++ *++. .++* -# *+* ,++++* *+* *+* ,++++, *+* -# ,+, .++++++++++* ,++,,,,*+, ,++++++++++. *+, -# *+. .++++++++++++..++ *+.,++++++++++++. .+* -# .+* ++++++++++++.*+, .+*.++++++++++++ *+, -# .++ *++++++++* ++, .++.*++++++++* ++, -# ,+++*. . .*++, ,++*. .*+++* -# *+, .,*++**. .**++**. ,+* -# .+* *+, -# *+. Coqui .+* -# *+* +++ TTS +++ *+* -# .+++*. . . *+++. -# ,+* *+++*... ...*+++* *+, -# .++. .""""+++++++****+++++++"""". ++. -# ,++. .++, -# .++* *++. -# *+++, ,+++* -# .,*++++::::::++++*,. -# `````` - -import numpy -from Cython.Build import cythonize -from setuptools import Extension, setup - -exts = [ - Extension( - name="TTS.tts.utils.monotonic_align.core", - sources=["TTS/tts/utils/monotonic_align/core.pyx"], - ) -] -setup( - include_dirs=numpy.get_include(), - ext_modules=cythonize(exts, language_level=3), - zip_safe=False, -) From d30eba573e089ef2770ac574ebff91f59df3e743 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 7 Nov 2024 10:33:51 +0100 Subject: [PATCH 171/255] chore: remove obsolete code owners file --- CODE_OWNERS.rst | 75 ------------------------------------------------- 1 file changed, 75 deletions(-) delete mode 100644 CODE_OWNERS.rst diff --git a/CODE_OWNERS.rst b/CODE_OWNERS.rst deleted file mode 100644 index 768b573911..0000000000 --- a/CODE_OWNERS.rst +++ /dev/null @@ -1,75 +0,0 @@ -TTS code owners / governance system -========================================== - -TTS is run under a governance system inspired (and partially copied from) by the `Mozilla module ownership system `_. The project is roughly divided into modules, and each module has its owners, which are responsible for reviewing pull requests and deciding on technical direction for their modules. Module ownership authority is given to people who have worked extensively on areas of the project. - -Module owners also have the authority of naming other module owners or appointing module peers, which are people with authority to review pull requests in that module. They can also sub-divide their module into sub-modules with their owners. - -Module owners are not tyrants. They are chartered to make decisions with input from the community and in the best interest of the community. Module owners are not required to make code changes or additions solely because the community wants them to do so. (Like anyone else, the module owners may write code because they want to, because their employers want them to, because the community wants them to, or for some other reason.) Module owners do need to pay attention to patches submitted to that module. However “pay attention” does not mean agreeing to every patch. Some patches may not make sense for the WebThings project; some may be poorly implemented. Module owners have the authority to decline a patch; this is a necessary part of the role. We ask the module owners to describe in the relevant issue their reasons for wanting changes to a patch, for declining it altogether, or for postponing review for some period. We don’t ask or expect them to rewrite patches to make them acceptable. Similarly, module owners may need to delay review of a promising patch due to an upcoming deadline. For example, a patch may be of interest, but not for the next milestone. In such a case it may make sense for the module owner to postpone review of a patch until after matters needed for a milestone have been finalized. Again, we expect this to be described in the relevant issue. And of course, it shouldn’t go on very often or for very long or escalation and review is likely. - -The work of the various module owners and peers is overseen by the global owners, which are responsible for making final decisions in case there's conflict between owners as well as set the direction for the project as a whole. - -This file describes module owners who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping. - -There's overlap in the areas of expertise of each owner, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate person. - -Global owners ----------------- - -These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other owners, global owners will make a final decision. - -- Eren Gölge (@erogol) -- Reuben Morais (@reuben) - -Training, feeding ------------------ - -- Eren Gölge (@erogol) - -Model exporting ---------------- - -- Eren Gölge (@erogol) - -Multi-Speaker TTS ------------------ - -- Eren Gölge (@erogol) -- Edresson Casanova (@edresson) - -TTS ---- - -- Eren Gölge (@erogol) - -Vocoders --------- - -- Eren Gölge (@erogol) - -Speaker Encoder ---------------- - -- Eren Gölge (@erogol) - -Testing & CI ------------- - -- Eren Gölge (@erogol) -- Reuben Morais (@reuben) - -Python bindings ---------------- - -- Eren Gölge (@erogol) -- Reuben Morais (@reuben) - -Documentation -------------- - -- Eren Gölge (@erogol) - -Third party bindings --------------------- - -Owned by the author. From 683ee664a8be4fcec3a5f377890dc7f22394476b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 7 Nov 2024 10:34:12 +0100 Subject: [PATCH 172/255] ci: simplify release, cibuildwheel not needed anymore --- .github/workflows/pypi-release.yml | 48 ++++++++++++------------------ 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 822990e967..4122f69f73 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -7,8 +7,7 @@ defaults: shell: bash jobs: - build-sdist: - name: Build source distribution + build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -20,37 +19,29 @@ jobs: if [[ "v$version" != "$tag" ]]; then exit 1 fi - - uses: actions/setup-python@v5 + - name: Install uv + uses: astral-sh/setup-uv@v3 with: - python-version: 3.9 - - run: | - python -m pip install -U pip setuptools build - - run: | - python -m build - - run: | - pip install dist/*.tar.gz - - uses: actions/upload-artifact@v4 - with: - name: build-sdist - path: dist/*.tar.gz - build-wheels: - name: Build wheels on ${{ matrix.os }} - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest, windows-latest, macos-latest] - steps: - - uses: actions/checkout@v4 - - name: Build wheels - uses: pypa/cibuildwheel@v2.21.1 + version: "0.4.27" + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" + - name: Set up Python + run: uv python install 3.12 + - name: Build sdist and wheel + run: uv build + - name: Test installation of sdist and wheel + run: | + uv venv --no-project + uv pip install dist/*.tar.gz + uv pip install dist/*.whl - uses: actions/upload-artifact@v4 with: - name: build-wheels-${{ matrix.os }} - path: ./wheelhouse/*.whl + name: build + path: dist/* publish-artifacts: name: Publish to PyPI runs-on: ubuntu-latest - needs: [build-sdist, build-wheels] + needs: [build] environment: name: release url: https://pypi.org/p/coqui-tts @@ -60,8 +51,7 @@ jobs: - uses: actions/download-artifact@v4 with: path: dist - pattern: build-* - merge-multiple: true + pattern: build - run: | ls -lh dist/ - name: Publish package distributions to PyPI From 540e8d6cf2636408e1d0c9d0b95594f265add052 Mon Sep 17 00:00:00 2001 From: Shavit Date: Sat, 9 Nov 2024 12:35:54 -0500 Subject: [PATCH 173/255] fix(bin.synthesize): return speakers names only (#147) --- TTS/bin/synthesize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index bc01ffd595..20e429df04 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -429,7 +429,7 @@ def main(): logger.info( "Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model." ) - logger.info(synthesizer.tts_model.speaker_manager.name_to_id) + logger.info(list(synthesizer.tts_model.speaker_manager.name_to_id.keys())) return # query langauge ids of a multi-lingual model. From 2df9bfa78eb338d1b0972c25f4d236403b4e032d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 9 Nov 2024 18:37:08 +0100 Subject: [PATCH 174/255] refactor: handle deprecation of torch.cuda.amp.autocast (#144) torch.cuda.amp.autocast(args...) and torch.cpu.amp.autocast(args...) will be deprecated. Please use torch.autocast("cuda", args...) or torch.autocast("cpu", args...) instead. https://pytorch.org/docs/stable/amp.html --- TTS/encoder/models/lstm.py | 2 +- TTS/tts/layers/bark/load_model.py | 9 ++------- TTS/tts/layers/tortoise/diffusion_decoder.py | 3 +-- TTS/tts/models/delightful_tts.py | 7 +++---- TTS/tts/models/forward_tts.py | 3 +-- TTS/tts/models/glow_tts.py | 3 +-- TTS/tts/models/tacotron.py | 3 +-- TTS/tts/models/tacotron2.py | 3 +-- TTS/tts/models/vits.py | 7 +++---- 9 files changed, 14 insertions(+), 26 deletions(-) diff --git a/TTS/encoder/models/lstm.py b/TTS/encoder/models/lstm.py index 51852b5b82..4e0a7523aa 100644 --- a/TTS/encoder/models/lstm.py +++ b/TTS/encoder/models/lstm.py @@ -86,7 +86,7 @@ def forward(self, x, l2_norm=True): - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})` """ with torch.no_grad(): - with torch.cuda.amp.autocast(enabled=False): + with torch.autocast("cuda", enabled=False): if self.use_torch_spec: x.squeeze_(1) x = self.torch_spec(x) diff --git a/TTS/tts/layers/bark/load_model.py b/TTS/tts/layers/bark/load_model.py index 72eca30ac6..6b7caab916 100644 --- a/TTS/tts/layers/bark/load_model.py +++ b/TTS/tts/layers/bark/load_model.py @@ -12,13 +12,8 @@ from TTS.tts.layers.bark.model_fine import FineGPT, FineGPTConfig from TTS.utils.generic_utils import is_pytorch_at_least_2_4 -if ( - torch.cuda.is_available() - and hasattr(torch.cuda, "amp") - and hasattr(torch.cuda.amp, "autocast") - and torch.cuda.is_bf16_supported() -): - autocast = functools.partial(torch.cuda.amp.autocast, dtype=torch.bfloat16) +if torch.cuda.is_available() and torch.cuda.is_bf16_supported(): + autocast = functools.partial(torch.autocast, device_type="cuda", dtype=torch.bfloat16) else: @contextlib.contextmanager diff --git a/TTS/tts/layers/tortoise/diffusion_decoder.py b/TTS/tts/layers/tortoise/diffusion_decoder.py index 0d3cf7698a..f71eaf1718 100644 --- a/TTS/tts/layers/tortoise/diffusion_decoder.py +++ b/TTS/tts/layers/tortoise/diffusion_decoder.py @@ -5,7 +5,6 @@ import torch import torch.nn as nn import torch.nn.functional as F -from torch import autocast from TTS.tts.layers.tortoise.arch_utils import AttentionBlock, normalization @@ -385,7 +384,7 @@ def forward( unused_params.extend(list(lyr.parameters())) else: # First and last blocks will have autocast disabled for improved precision. - with autocast(x.device.type, enabled=self.enable_fp16 and i != 0): + with torch.autocast(x.device.type, enabled=self.enable_fp16 and i != 0): x = lyr(x, time_emb) x = x.float() diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index a938a3a4ab..c6f15a7952 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -12,7 +12,6 @@ from coqpit import Coqpit from librosa.filters import mel as librosa_mel_fn from torch import nn -from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler @@ -952,7 +951,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int): ) # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( scores_disc_fake=scores_d_fake, scores_disc_real=scores_d_real, @@ -963,7 +962,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int): if optimizer_idx == 1: mel = batch["mel_input"] # compute melspec segment - with autocast(enabled=False): + with torch.autocast("cuda", enabled=False): mel_slice = segment( mel.float(), self.model_outputs_cache["slice_ids"], self.args.spec_segment_size, pad_short=True ) @@ -991,7 +990,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int): ) # compute losses - with autocast(enabled=True): # use float32 for the criterion + with torch.autocast("cuda", enabled=True): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( mel_output=self.model_outputs_cache["acoustic_model_outputs"].transpose(1, 2), mel_target=batch["mel_input"], diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index e7bc86374d..d449e580da 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -6,7 +6,6 @@ from coqpit import Coqpit from monotonic_alignment_search import maximum_path from torch import nn -from torch.cuda.amp.autocast_mode import autocast from trainer.io import load_fsspec from TTS.tts.layers.feed_forward.decoder import Decoder @@ -744,7 +743,7 @@ def train_step(self, batch: dict, criterion: nn.Module): if self.use_aligner: durations = outputs["o_alignment_dur"] # use float32 in AMP - with autocast(enabled=False): + with torch.autocast("cuda", enabled=False): # compute loss loss_dict = criterion( decoder_output=outputs["model_outputs"], diff --git a/TTS/tts/models/glow_tts.py b/TTS/tts/models/glow_tts.py index 5ea69865b2..5bf4713140 100644 --- a/TTS/tts/models/glow_tts.py +++ b/TTS/tts/models/glow_tts.py @@ -6,7 +6,6 @@ from coqpit import Coqpit from monotonic_alignment_search import maximum_path from torch import nn -from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F from trainer.io import load_fsspec @@ -416,7 +415,7 @@ def train_step(self, batch: dict, criterion: nn.Module): aux_input={"d_vectors": d_vectors, "speaker_ids": speaker_ids}, ) - with autocast(enabled=False): # avoid mixed_precision in criterion + with torch.autocast("cuda", enabled=False): # avoid mixed_precision in criterion loss_dict = criterion( outputs["z"].float(), outputs["y_mean"].float(), diff --git a/TTS/tts/models/tacotron.py b/TTS/tts/models/tacotron.py index 400a86d042..5d3efd2021 100644 --- a/TTS/tts/models/tacotron.py +++ b/TTS/tts/models/tacotron.py @@ -4,7 +4,6 @@ import torch from torch import nn -from torch.cuda.amp.autocast_mode import autocast from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE @@ -310,7 +309,7 @@ def train_step(self, batch: Dict, criterion: torch.nn.Module) -> Tuple[Dict, Dic alignment_lengths = mel_lengths // self.decoder.r # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion( outputs["model_outputs"].float(), outputs["decoder_outputs"].float(), diff --git a/TTS/tts/models/tacotron2.py b/TTS/tts/models/tacotron2.py index 4b1317f440..2716a39786 100644 --- a/TTS/tts/models/tacotron2.py +++ b/TTS/tts/models/tacotron2.py @@ -4,7 +4,6 @@ import torch from torch import nn -from torch.cuda.amp.autocast_mode import autocast from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.tts.layers.tacotron.capacitron_layers import CapacitronVAE @@ -338,7 +337,7 @@ def train_step(self, batch: Dict, criterion: torch.nn.Module): alignment_lengths = mel_lengths // self.decoder.r # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion( outputs["model_outputs"].float(), outputs["decoder_outputs"].float(), diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index af803a0f76..432b29f5e1 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -13,7 +13,6 @@ from librosa.filters import mel as librosa_mel_fn from monotonic_alignment_search import maximum_path from torch import nn -from torch.cuda.amp.autocast_mode import autocast from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler @@ -1278,7 +1277,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int) -> T ) # compute loss - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( scores_disc_real, scores_disc_fake, @@ -1289,7 +1288,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int) -> T mel = batch["mel"] # compute melspec segment - with autocast(enabled=False): + with torch.autocast("cuda", enabled=False): if self.args.encoder_sample_rate: spec_segment_size = self.spec_segment_size * int(self.interpolate_factor) else: @@ -1316,7 +1315,7 @@ def train_step(self, batch: dict, criterion: nn.Module, optimizer_idx: int) -> T ) # compute losses - with autocast(enabled=False): # use float32 for the criterion + with torch.autocast("cuda", enabled=False): # use float32 for the criterion loss_dict = criterion[optimizer_idx]( mel_slice_hat=mel_slice.float(), mel_slice=mel_slice_hat.float(), From 21172ececb09a4cd06292a8cc387c7ac7fc3511f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 16:24:52 +0100 Subject: [PATCH 175/255] ci: update uv and move into composite action --- .github/actions/setup-uv/action.yml | 11 +++++++++++ .github/workflows/pypi-release.yml | 8 ++------ .github/workflows/style_check.yml | 8 ++------ .github/workflows/tests.yml | 14 ++++---------- 4 files changed, 19 insertions(+), 22 deletions(-) create mode 100644 .github/actions/setup-uv/action.yml diff --git a/.github/actions/setup-uv/action.yml b/.github/actions/setup-uv/action.yml new file mode 100644 index 0000000000..619b138fb2 --- /dev/null +++ b/.github/actions/setup-uv/action.yml @@ -0,0 +1,11 @@ +name: Setup uv + +runs: + using: 'composite' + steps: + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "0.5.1" + enable-cache: true + cache-dependency-glob: "**/pyproject.toml" diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 4122f69f73..1b7f44654c 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -11,6 +11,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv - name: Verify tag matches version run: | set -ex @@ -19,12 +21,6 @@ jobs: if [[ "v$version" != "$tag" ]]; then exit 1 fi - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" - enable-cache: true - cache-dependency-glob: "**/pyproject.toml" - name: Set up Python run: uv python install 3.12 - name: Build sdist and wheel diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index a146213f7c..44f562d07e 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -15,12 +15,8 @@ jobs: python-version: [3.9] steps: - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" - enable-cache: true - cache-dependency-glob: "**/pyproject.toml" + - name: Setup uv + uses: ./.github/actions/setup-uv - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Lint check diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index be3f1b740b..02c6e25abb 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -16,12 +16,8 @@ jobs: subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] steps: - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" - enable-cache: true - cache-dependency-glob: "**/pyproject.toml" + - name: Setup uv + uses: ./.github/actions/setup-uv - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Install Espeak @@ -58,10 +54,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - - name: Install uv - uses: astral-sh/setup-uv@v3 - with: - version: "0.4.27" + - name: Setup uv + uses: ./.github/actions/setup-uv - uses: actions/download-artifact@v4 with: pattern: coverage-data-* From 993da778b4bd4eb0408c6b1fc1a40d0c62b1eeae Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 17:22:47 +0100 Subject: [PATCH 176/255] chore: use original instead of scarf urls These allowed Coqui to get download stats, which we don't need anymore --- .github/workflows/tests.yml | 4 - TTS/.models.json | 214 ++++++++++----------- TTS/demos/xtts_ft_demo/utils/gpt_train.py | 10 +- TTS/tts/layers/tortoise/arch_utils.py | 2 +- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 2 +- TTS/utils/manage.py | 16 +- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 8 +- recipes/ljspeech/xtts_v2/train_gpt_xtts.py | 8 +- 8 files changed, 130 insertions(+), 134 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 02c6e25abb..b485f32fd1 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -30,10 +30,6 @@ jobs: sudo apt-get update sudo apt-get install -y --no-install-recommends git make gcc make system-deps - - name: Replace scarf urls - if: contains(fromJSON('["data_tests", "inference_tests", "test_aux", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) - run: | - sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json - name: Unit tests run: | resolution=highest diff --git a/TTS/.models.json b/TTS/.models.json index a5add6e34f..7c3a498bff 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -5,11 +5,11 @@ "xtts_v2": { "description": "XTTS-v2.0.3 by Coqui with 17 languages.", "hf_url": [ - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/hash.md5", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/speakers_xtts.pth" + "https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/hash.md5", + "https://huggingface.co/coqui/XTTS-v2/resolve/main/speakers_xtts.pth" ], "model_hash": "10f92b55c512af7a8d39d650547a15a7", "default_vocoder": null, @@ -21,10 +21,10 @@ "xtts_v1.1": { "description": "XTTS-v1.1 by Coqui with 14 languages, cross-language voice cloning and reference leak fixed.", "hf_url": [ - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/model.pth", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/config.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/vocab.json", - "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/hash.md5" + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/model.pth", + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/config.json", + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/vocab.json", + "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/hash.md5" ], "model_hash": "7c62beaf58d39b729de287330dc254e7b515677416839b649a50e7cf74c3df59", "default_vocoder": null, @@ -35,7 +35,7 @@ }, "your_tts": { "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip", "default_vocoder": null, "commit": "e9a1953e", "license": "CC BY-NC-ND 4.0", @@ -44,11 +44,11 @@ "bark": { "description": "🐶 Bark TTS model released by suno-ai. You can find the original implementation in https://github.com/suno-ai/bark.", "hf_url": [ - "https://coqui.gateway.scarf.sh/hf/bark/coarse_2.pt", - "https://coqui.gateway.scarf.sh/hf/bark/fine_2.pt", - "https://coqui.gateway.scarf.sh/hf/bark/text_2.pt", - "https://coqui.gateway.scarf.sh/hf/bark/config.json", - "https://coqui.gateway.scarf.sh/hf/bark/tokenizer.pth" + "https://huggingface.co/erogol/bark/resolve/main/coarse_2.pt", + "https://huggingface.co/erogol/bark/resolve/main/fine_2.pt", + "https://huggingface.co/erogol/bark/resolve/main/text_2.pt", + "https://huggingface.co/erogol/bark/resolve/main/config.json", + "https://huggingface.co/erogol/bark/resolve/main/tokenizer.pth" ], "default_vocoder": null, "commit": "e9a1953e", @@ -60,7 +60,7 @@ "bg": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--bg--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -71,7 +71,7 @@ "cs": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--cs--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -82,7 +82,7 @@ "da": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--da--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -93,7 +93,7 @@ "et": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--et--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -104,7 +104,7 @@ "ga": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--ga--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -116,7 +116,7 @@ "ek1": { "tacotron2": { "description": "EK1 en-rp tacotron2 by NMStoker", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ek1--tacotron2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ek1--tacotron2.zip", "default_vocoder": "vocoder_models/en/ek1/wavegrad", "commit": "c802255", "license": "apache 2.0" @@ -125,7 +125,7 @@ "ljspeech": { "tacotron2-DDC": { "description": "Tacotron2 with Double Decoder Consistency.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "bae2ad0f", "author": "Eren Gölge @erogol", @@ -134,7 +134,7 @@ }, "tacotron2-DDC_ph": { "description": "Tacotron2 with Double Decoder Consistency with phonemes.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip", "default_vocoder": "vocoder_models/en/ljspeech/univnet", "commit": "3900448", "author": "Eren Gölge @erogol", @@ -143,7 +143,7 @@ }, "glow-tts": { "description": "", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip", "stats_file": null, "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan", "commit": "", @@ -153,7 +153,7 @@ }, "speedy-speech": { "description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip", "stats_file": null, "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "4581e3d", @@ -163,7 +163,7 @@ }, "tacotron2-DCA": { "description": "", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip", "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan", "commit": "", "author": "Eren Gölge @erogol", @@ -172,7 +172,7 @@ }, "vits": { "description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--vits.zip", "default_vocoder": null, "commit": "3900448", "author": "Eren Gölge @erogol", @@ -180,7 +180,7 @@ "contact": "egolge@coqui.com" }, "vits--neon": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--en--ljspeech--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--en--ljspeech--vits.zip", "default_vocoder": null, "author": "@NeonGeckoCom", "license": "bsd-3-clause", @@ -189,7 +189,7 @@ }, "fast_pitch": { "description": "FastPitch model trained on LJSpeech using the Aligner Network", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "b27b3ba", "author": "Eren Gölge @erogol", @@ -198,7 +198,7 @@ }, "overflow": { "description": "Overflow model trained on LJSpeech", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.0_models/tts_models--en--ljspeech--overflow.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.0_models/tts_models--en--ljspeech--overflow.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "3b1a28f", "author": "Eren Gölge @erogol", @@ -207,7 +207,7 @@ }, "neural_hmm": { "description": "Neural HMM model trained on LJSpeech", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.11.0_models/tts_models--en--ljspeech--neural_hmm.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.11.0_models/tts_models--en--ljspeech--neural_hmm.zip", "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2", "commit": "3b1a28f", "author": "Shivam Metha @shivammehta25", @@ -218,7 +218,7 @@ "vctk": { "vits": { "description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--vctk--vits.zip", "default_vocoder": null, "commit": "3900448", "author": "Eren @erogol", @@ -227,7 +227,7 @@ }, "fast_pitch": { "description": "FastPitch model trained on VCTK dataseset.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip", "default_vocoder": null, "commit": "bdab788d", "author": "Eren @erogol", @@ -238,7 +238,7 @@ "sam": { "tacotron-DDC": { "description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip", "default_vocoder": "vocoder_models/en/sam/hifigan_v2", "commit": "bae2ad0f", "author": "Eren Gölge @erogol", @@ -249,7 +249,7 @@ "blizzard2013": { "capacitron-t2-c50": { "description": "Capacitron additions to Tacotron 2 with Capacity at 50 as in https://arxiv.org/pdf/1906.03402.pdf", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip", "commit": "d6284e7", "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2", "author": "Adam Froghyar @a-froghyar", @@ -258,7 +258,7 @@ }, "capacitron-t2-c150_v2": { "description": "Capacitron additions to Tacotron 2 with Capacity at 150 as in https://arxiv.org/pdf/1906.03402.pdf", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip", "commit": "a67039d", "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2", "author": "Adam Froghyar @a-froghyar", @@ -270,15 +270,15 @@ "tortoise-v2": { "description": "Tortoise tts model https://github.com/neonbjb/tortoise-tts", "github_rls_url": [ - "https://coqui.gateway.scarf.sh/v0.14.1_models/autoregressive.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/clvp2.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/cvvp.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/diffusion_decoder.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/rlg_auto.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/rlg_diffuser.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/vocoder.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/mel_norms.pth", - "https://coqui.gateway.scarf.sh/v0.14.1_models/config.json" + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/autoregressive.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/clvp2.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/cvvp.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/diffusion_decoder.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/rlg_auto.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/rlg_diffuser.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/vocoder.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/mel_norms.pth", + "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/config.json" ], "commit": "c1875f6", "default_vocoder": null, @@ -289,7 +289,7 @@ "jenny": { "jenny": { "description": "VITS model trained with Jenny(Dioco) dataset. Named as Jenny as demanded by the license. Original URL for the model https://www.kaggle.com/datasets/noml4u/tts-models--en--jenny-dioco--vits", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.14.0_models/tts_models--en--jenny--jenny.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.14.0_models/tts_models--en--jenny--jenny.zip", "default_vocoder": null, "commit": "ba40a1c", "license": "custom - see https://github.com/dioco-group/jenny-tts-dataset#important", @@ -300,7 +300,7 @@ "es": { "mai": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "commit": "", "author": "Eren Gölge @erogol", @@ -310,7 +310,7 @@ }, "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--es--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -321,7 +321,7 @@ "fr": { "mai": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan", "commit": null, "author": "Eren Gölge @erogol", @@ -331,7 +331,7 @@ }, "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--fr--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -342,7 +342,7 @@ "uk": { "mai": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--uk--mai--glow-tts.zip", "author": "@robinhad", "commit": "bdab788d", "license": "MIT", @@ -350,7 +350,7 @@ "default_vocoder": "vocoder_models/uk/mai/multiband-melgan" }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--uk--mai--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -361,7 +361,7 @@ "zh-CN": { "baker": { "tacotron2-DDC-GST": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip", "commit": "unknown", "author": "@kirianguiller", "license": "apache 2.0", @@ -372,7 +372,7 @@ "nl": { "mai": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip", "author": "@r-dh", "license": "apache 2.0", "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan", @@ -382,7 +382,7 @@ }, "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--nl--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -393,21 +393,21 @@ "de": { "thorsten": { "tacotron2-DCA": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip", "default_vocoder": "vocoder_models/de/thorsten/fullband-melgan", "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--de--thorsten--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.0_models/tts_models--de--thorsten--vits.zip", "default_vocoder": null, "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/de/thorsten/hifigan_v1", "description": "Thorsten-Dec2021-22k-DDC", "author": "@thorstenMueller", @@ -417,7 +417,7 @@ }, "css10": { "vits-neon": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--de--css10--vits.zip", "default_vocoder": null, "author": "@NeonGeckoCom", "license": "bsd-3-clause", @@ -428,7 +428,7 @@ "ja": { "kokoro": { "tacotron2-DDC": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip", "default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1", "description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.", "author": "@kaiidams", @@ -440,7 +440,7 @@ "tr": { "common-voice": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip", "default_vocoder": "vocoder_models/tr/common-voice/hifigan", "license": "MIT", "description": "Turkish GlowTTS model using an unknown speaker from the Common-Voice dataset.", @@ -452,7 +452,7 @@ "it": { "mai_female": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -460,7 +460,7 @@ "commit": null }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_female--vits.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -470,7 +470,7 @@ }, "mai_male": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -478,7 +478,7 @@ "commit": null }, "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/tts_models--it--mai_male--vits.zip", "default_vocoder": null, "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.", "author": "@nicolalandro", @@ -490,7 +490,7 @@ "ewe": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--ewe--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -502,7 +502,7 @@ "hau": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--hau--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -514,7 +514,7 @@ "lin": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--lin--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -526,7 +526,7 @@ "tw_akuapem": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -538,7 +538,7 @@ "tw_asante": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -550,7 +550,7 @@ "yor": { "openbible": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.2_models/tts_models--yor--openbible--vits.zip", "default_vocoder": null, "license": "CC-BY-SA 4.0", "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.", @@ -562,7 +562,7 @@ "hu": { "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hu--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--hu--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -573,7 +573,7 @@ "el": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--el--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--el--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -584,7 +584,7 @@ "fi": { "css10": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--fi--css10--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -595,7 +595,7 @@ "hr": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--hr--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -606,7 +606,7 @@ "lt": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--lt--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -617,7 +617,7 @@ "lv": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--lv--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -628,7 +628,7 @@ "mt": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--mt--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -639,7 +639,7 @@ "pl": { "mai_female": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--pl--mai_female--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -650,7 +650,7 @@ "pt": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--pt--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -661,7 +661,7 @@ "ro": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--ro--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -672,7 +672,7 @@ "sk": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--sk--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -683,7 +683,7 @@ "sl": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--sl--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -694,7 +694,7 @@ "sv": { "cv": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/tts_models--sv--cv--vits.zip", "default_vocoder": null, "commit": null, "author": "@NeonGeckoCom", @@ -705,7 +705,7 @@ "ca": { "custom": { "vits": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.1_models/tts_models--ca--custom--vits.zip", "default_vocoder": null, "commit": null, "description": " It is trained from zero with 101460 utterances consisting of 257 speakers, approx 138 hours of speech. We used three datasets;\nFestcat and Google Catalan TTS (both TTS datasets) and also a part of Common Voice 8. It is trained with TTS v0.8.0.\nhttps://github.com/coqui-ai/TTS/discussions/930#discussioncomment-4466345", @@ -717,7 +717,7 @@ "fa": { "custom": { "glow-tts": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.10.1_models/tts_models--fa--custom--glow-tts.zip", "default_vocoder": null, "commit": null, "description": "persian-tts-female-glow_tts model for text to speech purposes. Single-speaker female voice Trained on persian-tts-dataset-famale. \nThis model has no compatible vocoder thus the output quality is not very good. \nDataset: https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale.", @@ -729,7 +729,7 @@ "bn": { "custom": { "vits-male": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.3_models/tts_models--bn--custom--vits_male.zip", "default_vocoder": null, "commit": null, "description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", @@ -737,7 +737,7 @@ "license": "Apache 2.0" }, "vits-female": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.3_models/tts_models--bn--custom--vits_female.zip", "default_vocoder": null, "commit": null, "description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts", @@ -750,7 +750,7 @@ "common-voice": { "glow-tts":{ "description": "Belarusian GlowTTS model created by @alex73 (Github).", - "github_rls_url":"https://coqui.gateway.scarf.sh/v0.16.6/tts_models--be--common-voice--glow-tts.zip", + "github_rls_url":"https://github.com/coqui-ai/TTS/releases/download/v0.16.6/tts_models--be--common-voice--glow-tts.zip", "default_vocoder": "vocoder_models/be/common-voice/hifigan", "commit": "c0aabb85", "license": "CC-BY-SA 4.0", @@ -763,14 +763,14 @@ "universal": { "libri-tts": { "wavegrad": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip", "commit": "ea976b0", "author": "Eren Gölge @erogol", "license": "MPL", "contact": "egolge@coqui.com" }, "fullband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip", "commit": "4132240", "author": "Eren Gölge @erogol", "license": "MPL", @@ -782,14 +782,14 @@ "ek1": { "wavegrad": { "description": "EK1 en-rp wavegrad by NMStoker", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip", "commit": "c802255", "license": "apache 2.0" } }, "ljspeech": { "multiband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip", "commit": "ea976b0", "author": "Eren Gölge @erogol", "license": "MPL", @@ -797,7 +797,7 @@ }, "hifigan_v2": { "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip", "commit": "bae2ad0f", "author": "@erogol", "license": "apache 2.0", @@ -805,7 +805,7 @@ }, "univnet": { "description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip", "commit": "4581e3d", "author": "Eren @erogol", "license": "apache 2.0", @@ -815,7 +815,7 @@ "blizzard2013": { "hifigan_v2": { "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip", "commit": "d6284e7", "author": "Adam Froghyar @a-froghyar", "license": "apache 2.0", @@ -825,7 +825,7 @@ "vctk": { "hifigan_v2": { "description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip", "commit": "2f07160", "author": "Edresson Casanova", "license": "apache 2.0", @@ -835,7 +835,7 @@ "sam": { "hifigan_v2": { "description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC", - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip", "commit": "2f07160", "author": "Eren Gölge @erogol", "license": "apache 2.0", @@ -846,7 +846,7 @@ "nl": { "mai": { "parallel-wavegan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip", "author": "@r-dh", "license": "apache 2.0", "commit": "unknown" @@ -856,19 +856,19 @@ "de": { "thorsten": { "wavegrad": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip", "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "fullband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip", "author": "@thorstenMueller", "license": "apache 2.0", "commit": "unknown" }, "hifigan_v1": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip", "description": "HifiGAN vocoder model for Thorsten Neutral Dec2021 22k Samplerate Tacotron2 DDC model", "author": "@thorstenMueller", "license": "apache 2.0", @@ -879,7 +879,7 @@ "ja": { "kokoro": { "hifigan_v1": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip", "description": "HifiGAN model trained for kokoro dataset by @kaiidams", "author": "@kaiidams", "license": "apache 2.0", @@ -890,7 +890,7 @@ "uk": { "mai": { "multiband-melgan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip", "author": "@robinhad", "commit": "bdab788d", "license": "MIT", @@ -901,7 +901,7 @@ "tr": { "common-voice": { "hifigan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip", "description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.", "author": "Fatih Akademi", "license": "MIT", @@ -912,7 +912,7 @@ "be": { "common-voice": { "hifigan": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.16.6/vocoder_models--be--common-voice--hifigan.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.16.6/vocoder_models--be--common-voice--hifigan.zip", "description": "Belarusian HiFiGAN model created by @alex73 (Github).", "author": "@alex73", "license": "CC-BY-SA 4.0", @@ -925,7 +925,7 @@ "multilingual": { "vctk": { "freevc24": { - "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip", + "github_rls_url": "https://github.com/coqui-ai/TTS/releases/download/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip", "description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC", "author": "Jing-Yi Li @OlaWod", "license": "MIT", diff --git a/TTS/demos/xtts_ft_demo/utils/gpt_train.py b/TTS/demos/xtts_ft_demo/utils/gpt_train.py index 7b41966b8f..f838297af3 100644 --- a/TTS/demos/xtts_ft_demo/utils/gpt_train.py +++ b/TTS/demos/xtts_ft_demo/utils/gpt_train.py @@ -43,8 +43,8 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, os.makedirs(CHECKPOINTS_OUT_PATH, exist_ok=True) # DVAE files - DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/dvae.pth" - MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/mel_stats.pth" + DVAE_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth" + MEL_NORM_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/mel_stats.pth" # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(DVAE_CHECKPOINT_LINK)) @@ -58,9 +58,9 @@ def train_gpt(language, num_epochs, batch_size, grad_acumm, train_csv, eval_csv, ) # Download XTTS v2.0 checkpoint if needed - TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json" - XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth" - XTTS_CONFIG_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json" + TOKENIZER_FILE_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json" + XTTS_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth" + XTTS_CONFIG_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/config.json" # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(TOKENIZER_FILE_LINK)) # vocab.json file diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index 52c2526695..8eda251f93 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -293,7 +293,7 @@ def forward(self, x): return h[:, :, 0] -DEFAULT_MEL_NORM_FILE = "https://coqui.gateway.scarf.sh/v0.14.1_models/mel_norms.pth" +DEFAULT_MEL_NORM_FILE = "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/mel_norms.pth" class TorchMelSpectrogram(nn.Module): diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 9d9edd5758..0253d65ddd 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -50,7 +50,7 @@ class GPTArgs(XttsArgs): max_wav_length: int = 255995 # ~11.6 seconds max_text_length: int = 200 tokenizer_file: str = "" - mel_norm_file: str = "https://coqui.gateway.scarf.sh/v0.14.0_models/mel_norms.pth" + mel_norm_file: str = "https://github.com/coqui-ai/TTS/releases/download/v0.14.0_models/mel_norms.pth" dvae_checkpoint: str = "" xtts_checkpoint: str = "" gpt_checkpoint: str = "" # if defined it will replace the gpt weights on xtts model diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index fb5071d9b0..bd445b3a2f 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -230,7 +230,7 @@ def _download_hf_model(self, model_item: Dict, output_path: str): self._download_zip_file(model_item["hf_url"], output_path, self.progress_bar) def download_fairseq_model(self, model_name, output_path): - URI_PREFIX = "https://coqui.gateway.scarf.sh/fairseq/" + URI_PREFIX = "https://dl.fbaipublicfiles.com/mms/tts/" _, lang, _, _ = model_name.split("/") model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz") self._download_tar_file(model_download_uri, output_path, self.progress_bar) @@ -243,9 +243,9 @@ def set_model_url(model_item: Dict): elif "hf_url" in model_item: model_item["model_url"] = model_item["hf_url"] elif "fairseq" in model_item["model_name"]: - model_item["model_url"] = "https://coqui.gateway.scarf.sh/fairseq/" + model_item["model_url"] = "https://dl.fbaipublicfiles.com/mms/tts/" elif "xtts" in model_item["model_name"]: - model_item["model_url"] = "https://coqui.gateway.scarf.sh/xtts/" + model_item["model_url"] = "https://huggingface.co/coqui/" return model_item def _set_model_item(self, model_name): @@ -278,11 +278,11 @@ def _set_model_item(self, model_name): "contact": "info@coqui.ai", "tos_required": True, "hf_url": [ - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/model.pth", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/config.json", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/vocab.json", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/hash.md5", - f"https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/{model_version}/speakers_xtts.pth", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/model.pth", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/config.json", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/vocab.json", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/hash.md5", + f"https://huggingface.co/coqui/XTTS-v2/resolve/{model_version}/speakers_xtts.pth", ], } else: diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index 7d8f4064c5..d31ec8f1ed 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -41,8 +41,8 @@ # DVAE files -DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/dvae.pth" -MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/mel_stats.pth" +DVAE_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/dvae.pth" +MEL_NORM_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/mel_stats.pth" # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, DVAE_CHECKPOINT_LINK.split("/")[-1]) @@ -55,8 +55,8 @@ # Download XTTS v1.1 checkpoint if needed -TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/vocab.json" -XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/model.pth" +TOKENIZER_FILE_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/vocab.json" +XTTS_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v1/resolve/v1.1.2/model.pth" # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, TOKENIZER_FILE_LINK.split("/")[-1]) # vocab.json file diff --git a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py index 626917381a..ccaa97f1e4 100644 --- a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py @@ -41,8 +41,8 @@ # DVAE files -DVAE_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/dvae.pth" -MEL_NORM_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/mel_stats.pth" +DVAE_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/dvae.pth" +MEL_NORM_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/mel_stats.pth" # Set the path to the downloaded files DVAE_CHECKPOINT = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(DVAE_CHECKPOINT_LINK)) @@ -55,8 +55,8 @@ # Download XTTS v2.0 checkpoint if needed -TOKENIZER_FILE_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json" -XTTS_CHECKPOINT_LINK = "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth" +TOKENIZER_FILE_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/vocab.json" +XTTS_CHECKPOINT_LINK = "https://huggingface.co/coqui/XTTS-v2/resolve/main/model.pth" # XTTS transfer learning parameters: You we need to provide the paths of XTTS model checkpoint that you want to do the fine tuning. TOKENIZER_FILE = os.path.join(CHECKPOINTS_OUT_PATH, os.path.basename(TOKENIZER_FILE_LINK)) # vocab.json file From 5de47e9a14e3f1df377af2a643e8d45e6a8093d7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 22:39:58 +0100 Subject: [PATCH 177/255] ci: run integration tests only on lowest and highest python --- .github/workflows/integration-tests.yml | 64 +++++++++++++++++++++++++ .github/workflows/style_check.yml | 2 +- .github/workflows/tests.yml | 6 +-- 3 files changed, 68 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/integration-tests.yml diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml new file mode 100644 index 0000000000..7af0836248 --- /dev/null +++ b/.github/workflows/integration-tests.yml @@ -0,0 +1,64 @@ +name: integration + +on: + push: + branches: + - main + pull_request: + types: [opened, synchronize, reopened] +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.12"] + subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] + steps: + - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} + - name: Install Espeak + if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + run: | + sudo apt-get update + sudo apt-get install espeak espeak-ng + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends git make gcc + make system-deps + - name: Integration tests + run: | + resolution=highest + if [ "${{ matrix.python-version }}" == "3.9" ]; then + resolution=lowest-direct + fi + uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }} + - name: Upload coverage data + uses: actions/upload-artifact@v4 + with: + include-hidden-files: true + name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} + path: .coverage.* + if-no-files-found: ignore + coverage: + if: always() + needs: test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv + - uses: actions/download-artifact@v4 + with: + pattern: coverage-data-* + merge-multiple: true + - name: Combine coverage + run: | + uv python install + uvx coverage combine + uvx coverage html --skip-covered --skip-empty + uvx coverage report --format=markdown >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/style_check.yml b/.github/workflows/style_check.yml index 44f562d07e..d1060f6be2 100644 --- a/.github/workflows/style_check.yml +++ b/.github/workflows/style_check.yml @@ -7,7 +7,7 @@ on: pull_request: types: [opened, synchronize, reopened] jobs: - test: + lint: runs-on: ubuntu-latest strategy: fail-fast: false diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b485f32fd1..16b680a93c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: tests +name: unit on: push: @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: python-version: [3.9, "3.10", "3.11", "3.12"] - subset: ["data_tests", "inference_tests", "test_aux", "test_text", "test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] + subset: ["data_tests", "inference_tests", "test_aux", "test_text"] steps: - uses: actions/checkout@v4 - name: Setup uv @@ -21,7 +21,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} run: uv python install ${{ matrix.python-version }} - name: Install Espeak - if: contains(fromJSON('["inference_tests", "test_text", "test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + if: contains(fromJSON('["inference_tests", "test_text"]'), matrix.subset) run: | sudo apt-get update sudo apt-get install espeak espeak-ng From d3c3ba3d565d61296dcea253b77896fb4d183f82 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 10 Nov 2024 22:58:37 +0100 Subject: [PATCH 178/255] build: set upper limit on transformers More breaking changes affecting the XTTS streaming code --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 379187feed..d66f33d602 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ dependencies = [ "gruut[de,es,fr]>=2.4.0", # Tortoise "einops>=0.6.0", - "transformers>=4.43.0", + "transformers>=4.43.0,<=4.46.2", # Bark "encodec>=0.1.1", # XTTS From 75d082563e22ab68649b0e321a28129ce0768229 Mon Sep 17 00:00:00 2001 From: Andy Date: Thu, 14 Nov 2024 23:06:56 +0800 Subject: [PATCH 179/255] fix(docker): add Support for building Docker on Mac/arm64 (#159) * Add Support for building Docker on arm64 * fixup! Add Support for building Docker on arm64 --------- Co-authored-by: Enno Hermann --- Dockerfile | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index e9d331bc41..9ce5c63989 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,9 +1,20 @@ ARG BASE=nvidia/cuda:11.8.0-base-ubuntu22.04 FROM ${BASE} -RUN apt-get update && apt-get upgrade -y -RUN apt-get install -y --no-install-recommends gcc g++ make python3 python3-dev python3-pip python3-venv python3-wheel espeak-ng libsndfile1-dev && rm -rf /var/lib/apt/lists/* -RUN pip3 install -U pip setuptools +RUN apt-get update && \ + apt-get upgrade -y +RUN apt-get install -y --no-install-recommends \ + gcc g++ make python3 python3-dev python3-pip \ + python3-venv python3-wheel espeak-ng \ + libsndfile1-dev libc-dev curl && \ + rm -rf /var/lib/apt/lists/* + +# Install Rust compiler (to build sudachipy for Mac) +RUN curl --proto '=https' --tlsv1.2 -sSf "https://sh.rustup.rs" | sh -s -- -y +ENV PATH="/root/.cargo/bin:${PATH}" + +RUN pip3 install -U pip setuptools wheel +RUN pip3 install -U "spacy[ja]<3.8" RUN pip3 install llvmlite --ignore-installed # Install Dependencies: From e81f8d079c055c740171558f5b8ceb1cd4cfd6f7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 17 Nov 2024 00:29:49 +0100 Subject: [PATCH 180/255] fix: more helpful error message when formatter is not found --- TTS/tts/datasets/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/TTS/tts/datasets/__init__.py b/TTS/tts/datasets/__init__.py index f9f2cb2e37..d1a37da4c1 100644 --- a/TTS/tts/datasets/__init__.py +++ b/TTS/tts/datasets/__init__.py @@ -166,6 +166,11 @@ def load_attention_mask_meta_data(metafile_path): def _get_formatter_by_name(name): """Returns the respective preprocessing function.""" thismodule = sys.modules[__name__] + if not hasattr(thismodule, name.lower()): + msg = ( + f"{name} formatter not found. If it is a custom formatter, pass the function to load_tts_samples() instead." + ) + raise ValueError(msg) return getattr(thismodule, name.lower()) From 627bbe4150a92f7aa29d82785ed82ca34433578b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 17 Nov 2024 00:30:32 +0100 Subject: [PATCH 181/255] fix(xtts): more helpful error message when vocab.json not found --- TTS/tts/models/xtts.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 7c4a76ad7d..22d2720efa 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -779,6 +779,12 @@ def load_checkpoint( if os.path.exists(vocab_path): self.tokenizer = VoiceBpeTokenizer(vocab_file=vocab_path) + else: + msg = ( + f"`vocab.json` file not found in `{checkpoint_dir}`. Move the file there or " + "specify alternative path in `model_args.tokenizer_file` in `config.json`" + ) + raise FileNotFoundError(msg) self.init_models() From 48f5be2ccbdb9032f9aece5993f8be25fc600bb8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 17 Nov 2024 01:29:30 +0100 Subject: [PATCH 182/255] feat(audio): automatically convert audio to mono --- TTS/utils/audio/numpy_transforms.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index 4a8972480c..abc9eaa093 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -427,6 +427,9 @@ def load_wav(*, filename: str, sample_rate: int = None, resample: bool = False, else: # SF is faster than librosa for loading files x, _ = sf.read(filename) + if x.ndim != 1: + logger.warning("Found multi-channel audio. Converting to mono: %s", filename) + x = librosa.to_mono(x) return x From 5784f6705ad782eb482977568e27e938270378f4 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 17 Nov 2024 16:40:43 +0100 Subject: [PATCH 183/255] refactor(audio): improve type hints, address lint issues --- TTS/utils/audio/numpy_transforms.py | 123 ++++++++++++++-------------- TTS/utils/audio/processor.py | 67 +++++++-------- 2 files changed, 89 insertions(+), 101 deletions(-) diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index abc9eaa093..cf717c7a1a 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -1,6 +1,6 @@ import logging from io import BytesIO -from typing import Tuple +from typing import Optional import librosa import numpy as np @@ -16,11 +16,11 @@ def build_mel_basis( *, - sample_rate: int = None, - fft_size: int = None, - num_mels: int = None, - mel_fmax: int = None, - mel_fmin: int = None, + sample_rate: int, + fft_size: int, + num_mels: int, + mel_fmin: int, + mel_fmax: Optional[int] = None, **kwargs, ) -> np.ndarray: """Build melspectrogram basis. @@ -34,9 +34,7 @@ def build_mel_basis( return librosa.filters.mel(sr=sample_rate, n_fft=fft_size, n_mels=num_mels, fmin=mel_fmin, fmax=mel_fmax) -def millisec_to_length( - *, frame_length_ms: int = None, frame_shift_ms: int = None, sample_rate: int = None, **kwargs -) -> Tuple[int, int]: +def millisec_to_length(*, frame_length_ms: float, frame_shift_ms: float, sample_rate: int, **kwargs) -> tuple[int, int]: """Compute hop and window length from milliseconds. Returns: @@ -61,7 +59,7 @@ def _exp(x, base): return np.exp(x) -def amp_to_db(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray: +def amp_to_db(*, x: np.ndarray, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray: """Convert amplitude values to decibels. Args: @@ -77,7 +75,7 @@ def amp_to_db(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs # pylint: disable=no-self-use -def db_to_amp(*, x: np.ndarray = None, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray: +def db_to_amp(*, x: np.ndarray, gain: float = 1, base: float = 10, **kwargs) -> np.ndarray: """Convert decibels spectrogram to amplitude spectrogram. Args: @@ -104,18 +102,20 @@ def preemphasis(*, x: np.ndarray, coef: float = 0.97, **kwargs) -> np.ndarray: np.ndarray: Decorrelated audio signal. """ if coef == 0: - raise RuntimeError(" [!] Preemphasis is set 0.0.") + msg = " [!] Preemphasis is set 0.0." + raise RuntimeError(msg) return scipy.signal.lfilter([1, -coef], [1], x) -def deemphasis(*, x: np.ndarray = None, coef: float = 0.97, **kwargs) -> np.ndarray: +def deemphasis(*, x: np.ndarray, coef: float = 0.97, **kwargs) -> np.ndarray: """Reverse pre-emphasis.""" if coef == 0: - raise RuntimeError(" [!] Preemphasis is set 0.0.") + msg = " [!] Preemphasis is set 0.0." + raise ValueError(msg) return scipy.signal.lfilter([1], [1, -coef], x) -def spec_to_mel(*, spec: np.ndarray, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray: +def spec_to_mel(*, spec: np.ndarray, mel_basis: np.ndarray, **kwargs) -> np.ndarray: """Convert a full scale linear spectrogram output of a network to a melspectrogram. Args: @@ -130,14 +130,14 @@ def spec_to_mel(*, spec: np.ndarray, mel_basis: np.ndarray = None, **kwargs) -> return np.dot(mel_basis, spec) -def mel_to_spec(*, mel: np.ndarray = None, mel_basis: np.ndarray = None, **kwargs) -> np.ndarray: +def mel_to_spec(*, mel: np.ndarray, mel_basis: np.ndarray, **kwargs) -> np.ndarray: """Convert a melspectrogram to full scale spectrogram.""" assert (mel < 0).sum() == 0, " [!] Input values must be non-negative." inv_mel_basis = np.linalg.pinv(mel_basis) return np.maximum(1e-10, np.dot(inv_mel_basis, mel)) -def wav_to_spec(*, wav: np.ndarray = None, **kwargs) -> np.ndarray: +def wav_to_spec(*, wav: np.ndarray, **kwargs) -> np.ndarray: """Compute a spectrogram from a waveform. Args: @@ -151,7 +151,7 @@ def wav_to_spec(*, wav: np.ndarray = None, **kwargs) -> np.ndarray: return S.astype(np.float32) -def wav_to_mel(*, wav: np.ndarray = None, mel_basis=None, **kwargs) -> np.ndarray: +def wav_to_mel(*, wav: np.ndarray, mel_basis: np.ndarray, **kwargs) -> np.ndarray: """Compute a melspectrogram from a waveform.""" D = stft(y=wav, **kwargs) S = spec_to_mel(spec=np.abs(D), mel_basis=mel_basis, **kwargs) @@ -164,20 +164,20 @@ def spec_to_wav(*, spec: np.ndarray, power: float = 1.5, **kwargs) -> np.ndarray return griffin_lim(spec=S**power, **kwargs) -def mel_to_wav(*, mel: np.ndarray = None, power: float = 1.5, **kwargs) -> np.ndarray: +def mel_to_wav(*, mel: np.ndarray, mel_basis: np.ndarray, power: float = 1.5, **kwargs) -> np.ndarray: """Convert a melspectrogram to a waveform using Griffi-Lim vocoder.""" S = mel.copy() - S = mel_to_spec(mel=S, mel_basis=kwargs["mel_basis"]) # Convert back to linear + S = mel_to_spec(mel=S, mel_basis=mel_basis) # Convert back to linear return griffin_lim(spec=S**power, **kwargs) ### STFT and ISTFT ### def stft( *, - y: np.ndarray = None, - fft_size: int = None, - hop_length: int = None, - win_length: int = None, + y: np.ndarray, + fft_size: int, + hop_length: Optional[int] = None, + win_length: Optional[int] = None, pad_mode: str = "reflect", window: str = "hann", center: bool = True, @@ -203,9 +203,9 @@ def stft( def istft( *, - y: np.ndarray = None, - hop_length: int = None, - win_length: int = None, + y: np.ndarray, + hop_length: Optional[int] = None, + win_length: Optional[int] = None, window: str = "hann", center: bool = True, **kwargs, @@ -220,7 +220,7 @@ def istft( return librosa.istft(y, hop_length=hop_length, win_length=win_length, center=center, window=window) -def griffin_lim(*, spec: np.ndarray = None, num_iter=60, **kwargs) -> np.ndarray: +def griffin_lim(*, spec: np.ndarray, num_iter=60, **kwargs) -> np.ndarray: angles = np.exp(2j * np.pi * np.random.rand(*spec.shape)) S_complex = np.abs(spec).astype(complex) y = istft(y=S_complex * angles, **kwargs) @@ -233,11 +233,11 @@ def griffin_lim(*, spec: np.ndarray = None, num_iter=60, **kwargs) -> np.ndarray return y -def compute_stft_paddings( - *, x: np.ndarray = None, hop_length: int = None, pad_two_sides: bool = False, **kwargs -) -> Tuple[int, int]: - """Compute paddings used by Librosa's STFT. Compute right padding (final frame) or both sides padding - (first and final frames)""" +def compute_stft_paddings(*, x: np.ndarray, hop_length: int, pad_two_sides: bool = False, **kwargs) -> tuple[int, int]: + """Compute paddings used by Librosa's STFT. + + Compute right padding (final frame) or both sides padding (first and final frames). + """ pad = (x.shape[0] // hop_length + 1) * hop_length - x.shape[0] if not pad_two_sides: return 0, pad @@ -246,12 +246,12 @@ def compute_stft_paddings( def compute_f0( *, - x: np.ndarray = None, - pitch_fmax: float = None, - pitch_fmin: float = None, - hop_length: int = None, - win_length: int = None, - sample_rate: int = None, + x: np.ndarray, + pitch_fmax: Optional[float] = None, + pitch_fmin: Optional[float] = None, + hop_length: int, + win_length: int, + sample_rate: int, stft_pad_mode: str = "reflect", center: bool = True, **kwargs, @@ -323,19 +323,18 @@ def compute_energy(y: np.ndarray, **kwargs) -> np.ndarray: """ x = stft(y=y, **kwargs) mag, _ = magphase(x) - energy = np.sqrt(np.sum(mag**2, axis=0)) - return energy + return np.sqrt(np.sum(mag**2, axis=0)) ### Audio Processing ### def find_endpoint( *, - wav: np.ndarray = None, + wav: np.ndarray, trim_db: float = -40, - sample_rate: int = None, - min_silence_sec=0.8, - gain: float = None, - base: int = None, + sample_rate: int, + min_silence_sec: float = 0.8, + gain: float = 1, + base: float = 10, **kwargs, ) -> int: """Find the last point without silence at the end of a audio signal. @@ -344,8 +343,8 @@ def find_endpoint( wav (np.ndarray): Audio signal. threshold_db (int, optional): Silence threshold in decibels. Defaults to -40. min_silence_sec (float, optional): Ignore silences that are shorter then this in secs. Defaults to 0.8. - gian (float, optional): Gain to be used to convert trim_db to trim_amp. Defaults to None. - base (int, optional): Base of the logarithm used to convert trim_db to trim_amp. Defaults to 10. + gain (float, optional): Gain factor to be used to convert trim_db to trim_amp. Defaults to 1. + base (float, optional): Base of the logarithm used to convert trim_db to trim_amp. Defaults to 10. Returns: int: Last point without silence. @@ -361,20 +360,20 @@ def find_endpoint( def trim_silence( *, - wav: np.ndarray = None, - sample_rate: int = None, - trim_db: float = None, - win_length: int = None, - hop_length: int = None, + wav: np.ndarray, + sample_rate: int, + trim_db: float = 60, + win_length: int, + hop_length: int, **kwargs, ) -> np.ndarray: - """Trim silent parts with a threshold and 0.01 sec margin""" + """Trim silent parts with a threshold and 0.01 sec margin.""" margin = int(sample_rate * 0.01) wav = wav[margin:-margin] return librosa.effects.trim(wav, top_db=trim_db, frame_length=win_length, hop_length=hop_length)[0] -def volume_norm(*, x: np.ndarray = None, coef: float = 0.95, **kwargs) -> np.ndarray: +def volume_norm(*, x: np.ndarray, coef: float = 0.95, **kwargs) -> np.ndarray: """Normalize the volume of an audio signal. Args: @@ -387,7 +386,7 @@ def volume_norm(*, x: np.ndarray = None, coef: float = 0.95, **kwargs) -> np.nda return x / abs(x).max() * coef -def rms_norm(*, wav: np.ndarray = None, db_level: float = -27.0, **kwargs) -> np.ndarray: +def rms_norm(*, wav: np.ndarray, db_level: float = -27.0, **kwargs) -> np.ndarray: r = 10 ** (db_level / 20) a = np.sqrt((len(wav) * (r**2)) / np.sum(wav**2)) return wav * a @@ -404,11 +403,10 @@ def rms_volume_norm(*, x: np.ndarray, db_level: float = -27.0, **kwargs) -> np.n np.ndarray: RMS normalized waveform. """ assert -99 <= db_level <= 0, " [!] db_level should be between -99 and 0" - wav = rms_norm(wav=x, db_level=db_level) - return wav + return rms_norm(wav=x, db_level=db_level) -def load_wav(*, filename: str, sample_rate: int = None, resample: bool = False, **kwargs) -> np.ndarray: +def load_wav(*, filename: str, sample_rate: Optional[int] = None, resample: bool = False, **kwargs) -> np.ndarray: """Read a wav file using Librosa and optionally resample, silence trim, volume normalize. Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before. @@ -433,13 +431,13 @@ def load_wav(*, filename: str, sample_rate: int = None, resample: bool = False, return x -def save_wav(*, wav: np.ndarray, path: str, sample_rate: int = None, pipe_out=None, **kwargs) -> None: +def save_wav(*, wav: np.ndarray, path: str, sample_rate: int, pipe_out=None, **kwargs) -> None: """Save float waveform to a file using Scipy. Args: wav (np.ndarray): Waveform with float values in range [-1, 1] to save. path (str): Path to a output file. - sr (int, optional): Sampling rate used for saving to the file. Defaults to None. + sr (int): Sampling rate used for saving to the file. Defaults to None. pipe_out (BytesIO, optional): Flag to stdout the generated TTS wav file for shell pipe. """ wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) @@ -465,8 +463,7 @@ def mulaw_encode(*, wav: np.ndarray, mulaw_qc: int, **kwargs) -> np.ndarray: def mulaw_decode(*, wav, mulaw_qc: int, **kwargs) -> np.ndarray: """Recovers waveform from quantized values.""" mu = 2**mulaw_qc - 1 - x = np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1) - return x + return np.sign(wav) / mu * ((1 + mu) ** np.abs(wav) - 1) def encode_16bits(*, x: np.ndarray, **kwargs) -> np.ndarray: diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index 680e29debc..90d0d7550d 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -1,6 +1,6 @@ import logging from io import BytesIO -from typing import Dict, Tuple +from typing import Optional import librosa import numpy as np @@ -32,7 +32,7 @@ # pylint: disable=too-many-public-methods -class AudioProcessor(object): +class AudioProcessor: """Audio Processor for TTS. Note: @@ -172,7 +172,7 @@ def __init__( db_level=None, stats_path=None, **_, - ): + ) -> None: # setup class attributed self.sample_rate = sample_rate self.resample = resample @@ -210,7 +210,8 @@ def __init__( elif log_func == "np.log10": self.base = 10 else: - raise ValueError(" [!] unknown `log_func` value.") + msg = " [!] unknown `log_func` value." + raise ValueError(msg) # setup stft parameters if hop_length is None: # compute stft parameters from given time values @@ -254,7 +255,7 @@ def init_from_config(config: "Coqpit"): ### normalization ### def normalize(self, S: np.ndarray) -> np.ndarray: - """Normalize values into `[0, self.max_norm]` or `[-self.max_norm, self.max_norm]` + """Normalize values into `[0, self.max_norm]` or `[-self.max_norm, self.max_norm]`. Args: S (np.ndarray): Spectrogram to normalize. @@ -272,10 +273,10 @@ def normalize(self, S: np.ndarray) -> np.ndarray: if hasattr(self, "mel_scaler"): if S.shape[0] == self.num_mels: return self.mel_scaler.transform(S.T).T - elif S.shape[0] == self.fft_size / 2: + if S.shape[0] == self.fft_size / 2: return self.linear_scaler.transform(S.T).T - else: - raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.") + msg = " [!] Mean-Var stats does not match the given feature dimensions." + raise RuntimeError(msg) # range normalization S -= self.ref_level_db # discard certain range of DB assuming it is air noise S_norm = (S - self.min_level_db) / (-self.min_level_db) @@ -286,13 +287,11 @@ def normalize(self, S: np.ndarray) -> np.ndarray: S_norm, -self.max_norm, self.max_norm # pylint: disable=invalid-unary-operand-type ) return S_norm - else: - S_norm = self.max_norm * S_norm - if self.clip_norm: - S_norm = np.clip(S_norm, 0, self.max_norm) - return S_norm - else: - return S + S_norm = self.max_norm * S_norm + if self.clip_norm: + S_norm = np.clip(S_norm, 0, self.max_norm) + return S_norm + return S def denormalize(self, S: np.ndarray) -> np.ndarray: """Denormalize spectrogram values. @@ -313,10 +312,10 @@ def denormalize(self, S: np.ndarray) -> np.ndarray: if hasattr(self, "mel_scaler"): if S_denorm.shape[0] == self.num_mels: return self.mel_scaler.inverse_transform(S_denorm.T).T - elif S_denorm.shape[0] == self.fft_size / 2: + if S_denorm.shape[0] == self.fft_size / 2: return self.linear_scaler.inverse_transform(S_denorm.T).T - else: - raise RuntimeError(" [!] Mean-Var stats does not match the given feature dimensions.") + msg = " [!] Mean-Var stats does not match the given feature dimensions." + raise RuntimeError(msg) if self.symmetric_norm: if self.clip_norm: S_denorm = np.clip( @@ -324,16 +323,14 @@ def denormalize(self, S: np.ndarray) -> np.ndarray: ) S_denorm = ((S_denorm + self.max_norm) * -self.min_level_db / (2 * self.max_norm)) + self.min_level_db return S_denorm + self.ref_level_db - else: - if self.clip_norm: - S_denorm = np.clip(S_denorm, 0, self.max_norm) - S_denorm = (S_denorm * -self.min_level_db / self.max_norm) + self.min_level_db - return S_denorm + self.ref_level_db - else: - return S_denorm + if self.clip_norm: + S_denorm = np.clip(S_denorm, 0, self.max_norm) + S_denorm = (S_denorm * -self.min_level_db / self.max_norm) + self.min_level_db + return S_denorm + self.ref_level_db + return S_denorm ### Mean-STD scaling ### - def load_stats(self, stats_path: str) -> Tuple[np.array, np.array, np.array, np.array, Dict]: + def load_stats(self, stats_path: str) -> tuple[np.array, np.array, np.array, np.array, dict]: """Loading mean and variance statistics from a `npy` file. Args: @@ -351,7 +348,7 @@ def load_stats(self, stats_path: str) -> Tuple[np.array, np.array, np.array, np. stats_config = stats["audio_config"] # check all audio parameters used for computing stats skip_parameters = ["griffin_lim_iters", "stats_path", "do_trim_silence", "ref_level_db", "power"] - for key in stats_config.keys(): + for key in stats_config: if key in skip_parameters: continue if key not in ["sample_rate", "trim_db"]: @@ -415,10 +412,7 @@ def spectrogram(self, y: np.ndarray) -> np.ndarray: win_length=self.win_length, pad_mode=self.stft_pad_mode, ) - if self.do_amp_to_db_linear: - S = amp_to_db(x=np.abs(D), gain=self.spec_gain, base=self.base) - else: - S = np.abs(D) + S = amp_to_db(x=np.abs(D), gain=self.spec_gain, base=self.base) if self.do_amp_to_db_linear else np.abs(D) return self.normalize(S).astype(np.float32) def melspectrogram(self, y: np.ndarray) -> np.ndarray: @@ -467,8 +461,7 @@ def out_linear_to_mel(self, linear_spec: np.ndarray) -> np.ndarray: S = db_to_amp(x=S, gain=self.spec_gain, base=self.base) S = spec_to_mel(spec=np.abs(S), mel_basis=self.mel_basis) S = amp_to_db(x=S, gain=self.spec_gain, base=self.base) - mel = self.normalize(S) - return mel + return self.normalize(S) def _griffin_lim(self, S): return griffin_lim( @@ -502,7 +495,7 @@ def compute_f0(self, x: np.ndarray) -> np.ndarray: if len(x) % self.hop_length == 0: x = np.pad(x, (0, self.hop_length // 2), mode=self.stft_pad_mode) - f0 = compute_f0( + return compute_f0( x=x, pitch_fmax=self.pitch_fmax, pitch_fmin=self.pitch_fmin, @@ -513,8 +506,6 @@ def compute_f0(self, x: np.ndarray) -> np.ndarray: center=True, ) - return f0 - ### Audio Processing ### def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int: """Find the last point without silence at the end of a audio signal. @@ -537,7 +528,7 @@ def find_endpoint(self, wav: np.ndarray, min_silence_sec=0.8) -> int: ) def trim_silence(self, wav): - """Trim silent parts with a threshold and 0.01 sec margin""" + """Trim silent parts with a threshold and 0.01 sec margin.""" return trim_silence( wav=wav, sample_rate=self.sample_rate, @@ -572,7 +563,7 @@ def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np.ndarray: return rms_volume_norm(x=x, db_level=db_level) ### save and load ### - def load_wav(self, filename: str, sr: int = None) -> np.ndarray: + def load_wav(self, filename: str, sr: Optional[int] = None) -> np.ndarray: """Read a wav file using Librosa and optionally resample, silence trim, volume normalize. Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before. From 8ba3233ec607c648e3720086ff75994c1b39677f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 18 Nov 2024 00:54:26 +0100 Subject: [PATCH 184/255] refactor(audio): remove duplicate save_wav code --- TTS/utils/audio/numpy_transforms.py | 21 +++++++++++++++++++-- TTS/utils/audio/processor.py | 26 ++++++++++---------------- 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index cf717c7a1a..203091ea88 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -431,7 +431,16 @@ def load_wav(*, filename: str, sample_rate: Optional[int] = None, resample: bool return x -def save_wav(*, wav: np.ndarray, path: str, sample_rate: int, pipe_out=None, **kwargs) -> None: +def save_wav( + *, + wav: np.ndarray, + path: str, + sample_rate: int, + pipe_out=None, + do_rms_norm: bool = False, + db_level: float = -27.0, + **kwargs, +) -> None: """Save float waveform to a file using Scipy. Args: @@ -439,8 +448,16 @@ def save_wav(*, wav: np.ndarray, path: str, sample_rate: int, pipe_out=None, **k path (str): Path to a output file. sr (int): Sampling rate used for saving to the file. Defaults to None. pipe_out (BytesIO, optional): Flag to stdout the generated TTS wav file for shell pipe. + do_rms_norm (bool): Whether to apply RMS normalization + db_level (float): Target dB level in RMS. """ - wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) + if do_rms_norm: + if db_level is None: + msg = "`db_level` cannot be None with `do_rms_norm=True`" + raise ValueError(msg) + wav_norm = rms_volume_norm(x=wav, db_level=db_level) + else: + wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) wav_norm = wav_norm.astype(np.int16) if pipe_out: diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index 90d0d7550d..fe125acecf 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -1,11 +1,8 @@ import logging -from io import BytesIO from typing import Optional import librosa import numpy as np -import scipy.io.wavfile -import scipy.signal from TTS.tts.utils.helpers import StandardScaler from TTS.utils.audio.numpy_transforms import ( @@ -21,6 +18,7 @@ millisec_to_length, preemphasis, rms_volume_norm, + save_wav, spec_to_mel, stft, trim_silence, @@ -590,7 +588,7 @@ def load_wav(self, filename: str, sr: Optional[int] = None) -> np.ndarray: x = self.rms_volume_norm(x, self.db_level) return x - def save_wav(self, wav: np.ndarray, path: str, sr: int = None, pipe_out=None) -> None: + def save_wav(self, wav: np.ndarray, path: str, sr: Optional[int] = None, pipe_out=None) -> None: """Save a waveform to a file using Scipy. Args: @@ -599,18 +597,14 @@ def save_wav(self, wav: np.ndarray, path: str, sr: int = None, pipe_out=None) -> sr (int, optional): Sampling rate used for saving to the file. Defaults to None. pipe_out (BytesIO, optional): Flag to stdout the generated TTS wav file for shell pipe. """ - if self.do_rms_norm: - wav_norm = self.rms_volume_norm(wav, self.db_level) * 32767 - else: - wav_norm = wav * (32767 / max(0.01, np.max(np.abs(wav)))) - - wav_norm = wav_norm.astype(np.int16) - if pipe_out: - wav_buffer = BytesIO() - scipy.io.wavfile.write(wav_buffer, sr if sr else self.sample_rate, wav_norm) - wav_buffer.seek(0) - pipe_out.buffer.write(wav_buffer.read()) - scipy.io.wavfile.write(path, sr if sr else self.sample_rate, wav_norm) + save_wav( + wav=wav, + path=path, + sample_rate=sr if sr else self.sample_rate, + pipe_out=pipe_out, + do_rms_norm=self.do_rms_norm, + db_level=self.db_level, + ) def get_duration(self, filename: str) -> float: """Get the duration of a wav file using Librosa. From fbbae5ac6a1f2f69bdf7ad89349fdaf7e4fd8da2 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 18 Nov 2024 00:56:13 +0100 Subject: [PATCH 185/255] refactor(audio): remove duplicate rms_volume_norm function --- TTS/utils/audio/processor.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index fe125acecf..1d8fed8e39 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -547,19 +547,6 @@ def sound_norm(x: np.ndarray) -> np.ndarray: """ return volume_norm(x=x) - def rms_volume_norm(self, x: np.ndarray, db_level: float = None) -> np.ndarray: - """Normalize the volume based on RMS of the signal. - - Args: - x (np.ndarray): Raw waveform. - - Returns: - np.ndarray: RMS normalized waveform. - """ - if db_level is None: - db_level = self.db_level - return rms_volume_norm(x=x, db_level=db_level) - ### save and load ### def load_wav(self, filename: str, sr: Optional[int] = None) -> np.ndarray: """Read a wav file using Librosa and optionally resample, silence trim, volume normalize. @@ -585,7 +572,7 @@ def load_wav(self, filename: str, sr: Optional[int] = None) -> np.ndarray: if self.do_sound_norm: x = self.sound_norm(x) if self.do_rms_norm: - x = self.rms_volume_norm(x, self.db_level) + x = rms_volume_norm(x=x, db_level=self.db_level) return x def save_wav(self, wav: np.ndarray, path: str, sr: Optional[int] = None, pipe_out=None) -> None: From 9035e36b1f4d59576c749c8f43fc2ad3e7881c0b Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 20 Nov 2024 16:34:58 +0100 Subject: [PATCH 186/255] ci: allow testing out trainer/coqpit branches before release (#168) --- .github/workflows/integration-tests.yml | 18 ++++++++++++++++++ .github/workflows/tests.yml | 18 ++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index 7af0836248..4dc8c76c1a 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -6,6 +6,16 @@ on: - main pull_request: types: [opened, synchronize, reopened] + workflow_dispatch: + inputs: + trainer_branch: + description: "Branch of Trainer to test" + required: false + default: "main" + coqpit_branch: + description: "Branch of Coqpit to test" + required: false + default: "main" jobs: test: runs-on: ubuntu-latest @@ -30,6 +40,14 @@ jobs: sudo apt-get update sudo apt-get install -y --no-install-recommends git make gcc make system-deps + - name: Install custom Trainer and/or Coqpit if requested + run: | + if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then + uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }} + fi + if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then + uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }} + fi - name: Integration tests run: | resolution=highest diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 16b680a93c..576de150fd 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -6,6 +6,16 @@ on: - main pull_request: types: [opened, synchronize, reopened] + workflow_dispatch: + inputs: + trainer_branch: + description: "Branch of Trainer to test" + required: false + default: "main" + coqpit_branch: + description: "Branch of Coqpit to test" + required: false + default: "main" jobs: test: runs-on: ubuntu-latest @@ -30,6 +40,14 @@ jobs: sudo apt-get update sudo apt-get install -y --no-install-recommends git make gcc make system-deps + - name: Install custom Trainer and/or Coqpit if requested + run: | + if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then + uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }} + fi + if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then + uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }} + fi - name: Unit tests run: | resolution=highest From 1b6d3ebd3317f1d4017104b5208cbc8210780e0e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 20 Nov 2024 18:39:55 +0100 Subject: [PATCH 187/255] refactor(xtts): remove duplicate hifigan generator --- TTS/tts/layers/xtts/hifigan_decoder.py | 609 +----------------------- TTS/vocoder/models/hifigan_generator.py | 13 + 2 files changed, 15 insertions(+), 607 deletions(-) diff --git a/TTS/tts/layers/xtts/hifigan_decoder.py b/TTS/tts/layers/xtts/hifigan_decoder.py index 5ef0030b8b..2e6ac01a87 100644 --- a/TTS/tts/layers/xtts/hifigan_decoder.py +++ b/TTS/tts/layers/xtts/hifigan_decoder.py @@ -1,618 +1,13 @@ import logging import torch -import torchaudio -from torch import nn -from torch.nn import Conv1d, ConvTranspose1d -from torch.nn import functional as F -from torch.nn.utils.parametrizations import weight_norm -from torch.nn.utils.parametrize import remove_parametrizations from trainer.io import load_fsspec -from TTS.utils.generic_utils import is_pytorch_at_least_2_4 -from TTS.vocoder.models.hifigan_generator import get_padding +from TTS.encoder.models.resnet import ResNetSpeakerEncoder +from TTS.vocoder.models.hifigan_generator import HifiganGenerator logger = logging.getLogger(__name__) -LRELU_SLOPE = 0.1 - - -class ResBlock1(torch.nn.Module): - """Residual Block Type 1. It has 3 convolutional layers in each convolutional block. - - Network:: - - x -> lrelu -> conv1_1 -> conv1_2 -> conv1_3 -> z -> lrelu -> conv2_1 -> conv2_2 -> conv2_3 -> o -> + -> o - |--------------------------------------------------------------------------------------------------| - - - Args: - channels (int): number of hidden channels for the convolutional layers. - kernel_size (int): size of the convolution filter in each layer. - dilations (list): list of dilation value for each conv layer in a block. - """ - - def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): - super().__init__() - self.convs1 = nn.ModuleList( - [ - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[0], - padding=get_padding(kernel_size, dilation[0]), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[1], - padding=get_padding(kernel_size, dilation[1]), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[2], - padding=get_padding(kernel_size, dilation[2]), - ) - ), - ] - ) - - self.convs2 = nn.ModuleList( - [ - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=1, - padding=get_padding(kernel_size, 1), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=1, - padding=get_padding(kernel_size, 1), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=1, - padding=get_padding(kernel_size, 1), - ) - ), - ] - ) - - def forward(self, x): - """ - Args: - x (Tensor): input tensor. - Returns: - Tensor: output tensor. - Shapes: - x: [B, C, T] - """ - for c1, c2 in zip(self.convs1, self.convs2): - xt = F.leaky_relu(x, LRELU_SLOPE) - xt = c1(xt) - xt = F.leaky_relu(xt, LRELU_SLOPE) - xt = c2(xt) - x = xt + x - return x - - def remove_weight_norm(self): - for l in self.convs1: - remove_parametrizations(l, "weight") - for l in self.convs2: - remove_parametrizations(l, "weight") - - -class ResBlock2(torch.nn.Module): - """Residual Block Type 2. It has 1 convolutional layers in each convolutional block. - - Network:: - - x -> lrelu -> conv1-> -> z -> lrelu -> conv2-> o -> + -> o - |---------------------------------------------------| - - - Args: - channels (int): number of hidden channels for the convolutional layers. - kernel_size (int): size of the convolution filter in each layer. - dilations (list): list of dilation value for each conv layer in a block. - """ - - def __init__(self, channels, kernel_size=3, dilation=(1, 3)): - super().__init__() - self.convs = nn.ModuleList( - [ - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[0], - padding=get_padding(kernel_size, dilation[0]), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[1], - padding=get_padding(kernel_size, dilation[1]), - ) - ), - ] - ) - - def forward(self, x): - for c in self.convs: - xt = F.leaky_relu(x, LRELU_SLOPE) - xt = c(xt) - x = xt + x - return x - - def remove_weight_norm(self): - for l in self.convs: - remove_parametrizations(l, "weight") - - -class HifiganGenerator(torch.nn.Module): - def __init__( - self, - in_channels, - out_channels, - resblock_type, - resblock_dilation_sizes, - resblock_kernel_sizes, - upsample_kernel_sizes, - upsample_initial_channel, - upsample_factors, - inference_padding=5, - cond_channels=0, - conv_pre_weight_norm=True, - conv_post_weight_norm=True, - conv_post_bias=True, - cond_in_each_up_layer=False, - ): - r"""HiFiGAN Generator with Multi-Receptive Field Fusion (MRF) - - Network: - x -> lrelu -> upsampling_layer -> resblock1_k1x1 -> z1 -> + -> z_sum / #resblocks -> lrelu -> conv_post_7x1 -> tanh -> o - .. -> zI ---| - resblockN_kNx1 -> zN ---' - - Args: - in_channels (int): number of input tensor channels. - out_channels (int): number of output tensor channels. - resblock_type (str): type of the `ResBlock`. '1' or '2'. - resblock_dilation_sizes (List[List[int]]): list of dilation values in each layer of a `ResBlock`. - resblock_kernel_sizes (List[int]): list of kernel sizes for each `ResBlock`. - upsample_kernel_sizes (List[int]): list of kernel sizes for each transposed convolution. - upsample_initial_channel (int): number of channels for the first upsampling layer. This is divided by 2 - for each consecutive upsampling layer. - upsample_factors (List[int]): upsampling factors (stride) for each upsampling layer. - inference_padding (int): constant padding applied to the input at inference time. Defaults to 5. - """ - super().__init__() - self.inference_padding = inference_padding - self.num_kernels = len(resblock_kernel_sizes) - self.num_upsamples = len(upsample_factors) - self.cond_in_each_up_layer = cond_in_each_up_layer - - # initial upsampling layers - self.conv_pre = weight_norm(Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3)) - resblock = ResBlock1 if resblock_type == "1" else ResBlock2 - # upsampling layers - self.ups = nn.ModuleList() - for i, (u, k) in enumerate(zip(upsample_factors, upsample_kernel_sizes)): - self.ups.append( - weight_norm( - ConvTranspose1d( - upsample_initial_channel // (2**i), - upsample_initial_channel // (2 ** (i + 1)), - k, - u, - padding=(k - u) // 2, - ) - ) - ) - # MRF blocks - self.resblocks = nn.ModuleList() - for i in range(len(self.ups)): - ch = upsample_initial_channel // (2 ** (i + 1)) - for _, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): - self.resblocks.append(resblock(ch, k, d)) - # post convolution layer - self.conv_post = weight_norm(Conv1d(ch, out_channels, 7, 1, padding=3, bias=conv_post_bias)) - if cond_channels > 0: - self.cond_layer = nn.Conv1d(cond_channels, upsample_initial_channel, 1) - - if not conv_pre_weight_norm: - remove_parametrizations(self.conv_pre, "weight") - - if not conv_post_weight_norm: - remove_parametrizations(self.conv_post, "weight") - - if self.cond_in_each_up_layer: - self.conds = nn.ModuleList() - for i in range(len(self.ups)): - ch = upsample_initial_channel // (2 ** (i + 1)) - self.conds.append(nn.Conv1d(cond_channels, ch, 1)) - - def forward(self, x, g=None): - """ - Args: - x (Tensor): feature input tensor. - g (Tensor): global conditioning input tensor. - - Returns: - Tensor: output waveform. - - Shapes: - x: [B, C, T] - Tensor: [B, 1, T] - """ - o = self.conv_pre(x) - if hasattr(self, "cond_layer"): - o = o + self.cond_layer(g) - for i in range(self.num_upsamples): - o = F.leaky_relu(o, LRELU_SLOPE) - o = self.ups[i](o) - - if self.cond_in_each_up_layer: - o = o + self.conds[i](g) - - z_sum = None - for j in range(self.num_kernels): - if z_sum is None: - z_sum = self.resblocks[i * self.num_kernels + j](o) - else: - z_sum += self.resblocks[i * self.num_kernels + j](o) - o = z_sum / self.num_kernels - o = F.leaky_relu(o) - o = self.conv_post(o) - o = torch.tanh(o) - return o - - @torch.no_grad() - def inference(self, c): - """ - Args: - x (Tensor): conditioning input tensor. - - Returns: - Tensor: output waveform. - - Shapes: - x: [B, C, T] - Tensor: [B, 1, T] - """ - c = c.to(self.conv_pre.weight.device) - c = torch.nn.functional.pad(c, (self.inference_padding, self.inference_padding), "replicate") - return self.forward(c) - - def remove_weight_norm(self): - logger.info("Removing weight norm...") - for l in self.ups: - remove_parametrizations(l, "weight") - for l in self.resblocks: - l.remove_weight_norm() - remove_parametrizations(self.conv_pre, "weight") - remove_parametrizations(self.conv_post, "weight") - - def load_checkpoint( - self, config, checkpoint_path, eval=False, cache=False - ): # pylint: disable=unused-argument, redefined-builtin - state = torch.load(checkpoint_path, map_location=torch.device("cpu"), weights_only=is_pytorch_at_least_2_4()) - self.load_state_dict(state["model"]) - if eval: - self.eval() - assert not self.training - self.remove_weight_norm() - - -class SELayer(nn.Module): - def __init__(self, channel, reduction=8): - super(SELayer, self).__init__() - self.avg_pool = nn.AdaptiveAvgPool2d(1) - self.fc = nn.Sequential( - nn.Linear(channel, channel // reduction), - nn.ReLU(inplace=True), - nn.Linear(channel // reduction, channel), - nn.Sigmoid(), - ) - - def forward(self, x): - b, c, _, _ = x.size() - y = self.avg_pool(x).view(b, c) - y = self.fc(y).view(b, c, 1, 1) - return x * y - - -class SEBasicBlock(nn.Module): - expansion = 1 - - def __init__(self, inplanes, planes, stride=1, downsample=None, reduction=8): - super(SEBasicBlock, self).__init__() - self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=3, stride=stride, padding=1, bias=False) - self.bn1 = nn.BatchNorm2d(planes) - self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, padding=1, bias=False) - self.bn2 = nn.BatchNorm2d(planes) - self.relu = nn.ReLU(inplace=True) - self.se = SELayer(planes, reduction) - self.downsample = downsample - self.stride = stride - - def forward(self, x): - residual = x - - out = self.conv1(x) - out = self.relu(out) - out = self.bn1(out) - - out = self.conv2(out) - out = self.bn2(out) - out = self.se(out) - - if self.downsample is not None: - residual = self.downsample(x) - - out += residual - out = self.relu(out) - return out - - -def set_init_dict(model_dict, checkpoint_state, c): - # Partial initialization: if there is a mismatch with new and old layer, it is skipped. - for k, v in checkpoint_state.items(): - if k not in model_dict: - logger.warning("Layer missing in the model definition: %s", k) - # 1. filter out unnecessary keys - pretrained_dict = {k: v for k, v in checkpoint_state.items() if k in model_dict} - # 2. filter out different size layers - pretrained_dict = {k: v for k, v in pretrained_dict.items() if v.numel() == model_dict[k].numel()} - # 3. skip reinit layers - if c.has("reinit_layers") and c.reinit_layers is not None: - for reinit_layer_name in c.reinit_layers: - pretrained_dict = {k: v for k, v in pretrained_dict.items() if reinit_layer_name not in k} - # 4. overwrite entries in the existing state dict - model_dict.update(pretrained_dict) - logger.info("%d / %d layers are restored.", len(pretrained_dict), len(model_dict)) - return model_dict - - -class PreEmphasis(nn.Module): - def __init__(self, coefficient=0.97): - super().__init__() - self.coefficient = coefficient - self.register_buffer("filter", torch.FloatTensor([-self.coefficient, 1.0]).unsqueeze(0).unsqueeze(0)) - - def forward(self, x): - assert len(x.size()) == 2 - - x = torch.nn.functional.pad(x.unsqueeze(1), (1, 0), "reflect") - return torch.nn.functional.conv1d(x, self.filter).squeeze(1) - - -class ResNetSpeakerEncoder(nn.Module): - """This is copied from 🐸TTS to remove it from the dependencies.""" - - # pylint: disable=W0102 - def __init__( - self, - input_dim=64, - proj_dim=512, - layers=[3, 4, 6, 3], - num_filters=[32, 64, 128, 256], - encoder_type="ASP", - log_input=False, - use_torch_spec=False, - audio_config=None, - ): - super(ResNetSpeakerEncoder, self).__init__() - - self.encoder_type = encoder_type - self.input_dim = input_dim - self.log_input = log_input - self.use_torch_spec = use_torch_spec - self.audio_config = audio_config - self.proj_dim = proj_dim - - self.conv1 = nn.Conv2d(1, num_filters[0], kernel_size=3, stride=1, padding=1) - self.relu = nn.ReLU(inplace=True) - self.bn1 = nn.BatchNorm2d(num_filters[0]) - - self.inplanes = num_filters[0] - self.layer1 = self.create_layer(SEBasicBlock, num_filters[0], layers[0]) - self.layer2 = self.create_layer(SEBasicBlock, num_filters[1], layers[1], stride=(2, 2)) - self.layer3 = self.create_layer(SEBasicBlock, num_filters[2], layers[2], stride=(2, 2)) - self.layer4 = self.create_layer(SEBasicBlock, num_filters[3], layers[3], stride=(2, 2)) - - self.instancenorm = nn.InstanceNorm1d(input_dim) - - if self.use_torch_spec: - self.torch_spec = torch.nn.Sequential( - PreEmphasis(audio_config["preemphasis"]), - torchaudio.transforms.MelSpectrogram( - sample_rate=audio_config["sample_rate"], - n_fft=audio_config["fft_size"], - win_length=audio_config["win_length"], - hop_length=audio_config["hop_length"], - window_fn=torch.hamming_window, - n_mels=audio_config["num_mels"], - ), - ) - - else: - self.torch_spec = None - - outmap_size = int(self.input_dim / 8) - - self.attention = nn.Sequential( - nn.Conv1d(num_filters[3] * outmap_size, 128, kernel_size=1), - nn.ReLU(), - nn.BatchNorm1d(128), - nn.Conv1d(128, num_filters[3] * outmap_size, kernel_size=1), - nn.Softmax(dim=2), - ) - - if self.encoder_type == "SAP": - out_dim = num_filters[3] * outmap_size - elif self.encoder_type == "ASP": - out_dim = num_filters[3] * outmap_size * 2 - else: - raise ValueError("Undefined encoder") - - self.fc = nn.Linear(out_dim, proj_dim) - - self._init_layers() - - def _init_layers(self): - for m in self.modules(): - if isinstance(m, nn.Conv2d): - nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu") - elif isinstance(m, nn.BatchNorm2d): - nn.init.constant_(m.weight, 1) - nn.init.constant_(m.bias, 0) - - def create_layer(self, block, planes, blocks, stride=1): - downsample = None - if stride != 1 or self.inplanes != planes * block.expansion: - downsample = nn.Sequential( - nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False), - nn.BatchNorm2d(planes * block.expansion), - ) - - layers = [] - layers.append(block(self.inplanes, planes, stride, downsample)) - self.inplanes = planes * block.expansion - for _ in range(1, blocks): - layers.append(block(self.inplanes, planes)) - - return nn.Sequential(*layers) - - # pylint: disable=R0201 - def new_parameter(self, *size): - out = nn.Parameter(torch.FloatTensor(*size)) - nn.init.xavier_normal_(out) - return out - - def forward(self, x, l2_norm=False): - """Forward pass of the model. - - Args: - x (Tensor): Raw waveform signal or spectrogram frames. If input is a waveform, `torch_spec` must be `True` - to compute the spectrogram on-the-fly. - l2_norm (bool): Whether to L2-normalize the outputs. - - Shapes: - - x: :math:`(N, 1, T_{in})` or :math:`(N, D_{spec}, T_{in})` - """ - x.squeeze_(1) - # if you torch spec compute it otherwise use the mel spec computed by the AP - if self.use_torch_spec: - x = self.torch_spec(x) - - if self.log_input: - x = (x + 1e-6).log() - x = self.instancenorm(x).unsqueeze(1) - - x = self.conv1(x) - x = self.relu(x) - x = self.bn1(x) - - x = self.layer1(x) - x = self.layer2(x) - x = self.layer3(x) - x = self.layer4(x) - - x = x.reshape(x.size()[0], -1, x.size()[-1]) - - w = self.attention(x) - - if self.encoder_type == "SAP": - x = torch.sum(x * w, dim=2) - elif self.encoder_type == "ASP": - mu = torch.sum(x * w, dim=2) - sg = torch.sqrt((torch.sum((x**2) * w, dim=2) - mu**2).clamp(min=1e-5)) - x = torch.cat((mu, sg), 1) - - x = x.view(x.size()[0], -1) - x = self.fc(x) - - if l2_norm: - x = torch.nn.functional.normalize(x, p=2, dim=1) - return x - - def load_checkpoint( - self, - checkpoint_path: str, - eval: bool = False, - use_cuda: bool = False, - criterion=None, - cache=False, - ): - state = load_fsspec(checkpoint_path, map_location=torch.device("cpu"), cache=cache) - try: - self.load_state_dict(state["model"]) - logger.info("Model fully restored.") - except (KeyError, RuntimeError) as error: - # If eval raise the error - if eval: - raise error - - logger.info("Partial model initialization.") - model_dict = self.state_dict() - model_dict = set_init_dict(model_dict, state["model"]) - self.load_state_dict(model_dict) - del model_dict - - # load the criterion for restore_path - if criterion is not None and "criterion" in state: - try: - criterion.load_state_dict(state["criterion"]) - except (KeyError, RuntimeError) as error: - logger.exception("Criterion load ignored because of: %s", error) - - if use_cuda: - self.cuda() - if criterion is not None: - criterion = criterion.cuda() - - if eval: - self.eval() - assert not self.training - - if not eval: - return criterion, state["step"] - return criterion - class HifiDecoder(torch.nn.Module): def __init__( diff --git a/TTS/vocoder/models/hifigan_generator.py b/TTS/vocoder/models/hifigan_generator.py index afdd59a859..8273d02037 100644 --- a/TTS/vocoder/models/hifigan_generator.py +++ b/TTS/vocoder/models/hifigan_generator.py @@ -178,6 +178,7 @@ def __init__( conv_pre_weight_norm=True, conv_post_weight_norm=True, conv_post_bias=True, + cond_in_each_up_layer=False, ): r"""HiFiGAN Generator with Multi-Receptive Field Fusion (MRF) @@ -202,6 +203,8 @@ def __init__( self.inference_padding = inference_padding self.num_kernels = len(resblock_kernel_sizes) self.num_upsamples = len(upsample_factors) + self.cond_in_each_up_layer = cond_in_each_up_layer + # initial upsampling layers self.conv_pre = weight_norm(Conv1d(in_channels, upsample_initial_channel, 7, 1, padding=3)) resblock = ResBlock1 if resblock_type == "1" else ResBlock2 @@ -236,6 +239,12 @@ def __init__( if not conv_post_weight_norm: remove_parametrizations(self.conv_post, "weight") + if self.cond_in_each_up_layer: + self.conds = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + self.conds.append(nn.Conv1d(cond_channels, ch, 1)) + def forward(self, x, g=None): """ Args: @@ -255,6 +264,10 @@ def forward(self, x, g=None): for i in range(self.num_upsamples): o = F.leaky_relu(o, LRELU_SLOPE) o = self.ups[i](o) + + if self.cond_in_each_up_layer: + o = o + self.conds[i](g) + z_sum = None for j in range(self.num_kernels): if z_sum is None: From 1f27f994a1d7f30400172c341b0256a350749055 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 20 Nov 2024 18:40:28 +0100 Subject: [PATCH 188/255] refactor(utils): remove duplicate set_partial_state_dict --- TTS/encoder/models/base_encoder.py | 4 ++-- TTS/utils/generic_utils.py | 19 ------------------- 2 files changed, 2 insertions(+), 21 deletions(-) diff --git a/TTS/encoder/models/base_encoder.py b/TTS/encoder/models/base_encoder.py index f7137c2186..2082019aad 100644 --- a/TTS/encoder/models/base_encoder.py +++ b/TTS/encoder/models/base_encoder.py @@ -5,10 +5,10 @@ import torchaudio from coqpit import Coqpit from torch import nn +from trainer.generic_utils import set_partial_state_dict from trainer.io import load_fsspec from TTS.encoder.losses import AngleProtoLoss, GE2ELoss, SoftmaxAngleProtoLoss -from TTS.utils.generic_utils import set_init_dict logger = logging.getLogger(__name__) @@ -130,7 +130,7 @@ def load_checkpoint( logger.info("Partial model initialization.") model_dict = self.state_dict() - model_dict = set_init_dict(model_dict, state["model"], c) + model_dict = set_partial_state_dict(model_dict, state["model"], config) self.load_state_dict(model_dict) del model_dict diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 3ee285232f..c38282248d 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -54,25 +54,6 @@ def get_import_path(obj: object) -> str: return ".".join([type(obj).__module__, type(obj).__name__]) -def set_init_dict(model_dict, checkpoint_state, c): - # Partial initialization: if there is a mismatch with new and old layer, it is skipped. - for k, v in checkpoint_state.items(): - if k not in model_dict: - logger.warning("Layer missing in the model finition %s", k) - # 1. filter out unnecessary keys - pretrained_dict = {k: v for k, v in checkpoint_state.items() if k in model_dict} - # 2. filter out different size layers - pretrained_dict = {k: v for k, v in pretrained_dict.items() if v.numel() == model_dict[k].numel()} - # 3. skip reinit layers - if c.has("reinit_layers") and c.reinit_layers is not None: - for reinit_layer_name in c.reinit_layers: - pretrained_dict = {k: v for k, v in pretrained_dict.items() if reinit_layer_name not in k} - # 4. overwrite entries in the existing state dict - model_dict.update(pretrained_dict) - logger.info("%d / %d layers are restored.", len(pretrained_dict), len(model_dict)) - return model_dict - - def format_aux_input(def_args: Dict, kwargs: Dict) -> Dict: """Format kwargs to hande auxilary inputs to models. From 66701e1e5120d1db44023de09c5b46f0aa56eba3 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 21 Nov 2024 12:21:38 +0100 Subject: [PATCH 189/255] refactor(xtts): reuse functions/classes from tortoise --- TTS/tts/layers/xtts/latent_encoder.py | 23 +---------------------- TTS/tts/layers/xtts/perceiver_encoder.py | 8 ++------ 2 files changed, 3 insertions(+), 28 deletions(-) diff --git a/TTS/tts/layers/xtts/latent_encoder.py b/TTS/tts/layers/xtts/latent_encoder.py index f9d62a36f1..7d385ec46a 100644 --- a/TTS/tts/layers/xtts/latent_encoder.py +++ b/TTS/tts/layers/xtts/latent_encoder.py @@ -6,10 +6,7 @@ from torch import nn from torch.nn import functional as F - -class GroupNorm32(nn.GroupNorm): - def forward(self, x): - return super().forward(x.float()).type(x.dtype) +from TTS.tts.layers.tortoise.arch_utils import normalization, zero_module def conv_nd(dims, *args, **kwargs): @@ -22,24 +19,6 @@ def conv_nd(dims, *args, **kwargs): raise ValueError(f"unsupported dimensions: {dims}") -def normalization(channels): - groups = 32 - if channels <= 16: - groups = 8 - elif channels <= 64: - groups = 16 - while channels % groups != 0: - groups = int(groups / 2) - assert groups > 2 - return GroupNorm32(groups, channels) - - -def zero_module(module): - for p in module.parameters(): - p.detach().zero_() - return module - - class QKVAttention(nn.Module): def __init__(self, n_heads): super().__init__() diff --git a/TTS/tts/layers/xtts/perceiver_encoder.py b/TTS/tts/layers/xtts/perceiver_encoder.py index f4b6e84123..4b42a0e467 100644 --- a/TTS/tts/layers/xtts/perceiver_encoder.py +++ b/TTS/tts/layers/xtts/perceiver_encoder.py @@ -9,6 +9,8 @@ from einops.layers.torch import Rearrange from torch import einsum, nn +from TTS.tts.layers.tortoise.transformer import GEGLU + def exists(val): return val is not None @@ -194,12 +196,6 @@ def forward(self, x): return super().forward(causal_padded_x) -class GEGLU(nn.Module): - def forward(self, x): - x, gate = x.chunk(2, dim=-1) - return F.gelu(gate) * x - - def FeedForward(dim, mult=4, causal_conv=False): dim_inner = int(dim * mult * 2 / 3) From 4ba83f42ab4287430f47f9f17031222e7bbb3086 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 21 Nov 2024 12:28:03 +0100 Subject: [PATCH 190/255] chore(tortoise): remove unused AudioMiniEncoder There's one in tortoise.classifier that's actually used --- TTS/tts/layers/tortoise/arch_utils.py | 108 -------------------------- 1 file changed, 108 deletions(-) diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index 8eda251f93..c9abcf6094 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -185,114 +185,6 @@ def forward(self, x): return self.op(x) -class ResBlock(nn.Module): - def __init__( - self, - channels, - dropout, - out_channels=None, - use_conv=False, - use_scale_shift_norm=False, - up=False, - down=False, - kernel_size=3, - ): - super().__init__() - self.channels = channels - self.dropout = dropout - self.out_channels = out_channels or channels - self.use_conv = use_conv - self.use_scale_shift_norm = use_scale_shift_norm - padding = 1 if kernel_size == 3 else 2 - - self.in_layers = nn.Sequential( - normalization(channels), - nn.SiLU(), - nn.Conv1d(channels, self.out_channels, kernel_size, padding=padding), - ) - - self.updown = up or down - - if up: - self.h_upd = Upsample(channels, False) - self.x_upd = Upsample(channels, False) - elif down: - self.h_upd = Downsample(channels, False) - self.x_upd = Downsample(channels, False) - else: - self.h_upd = self.x_upd = nn.Identity() - - self.out_layers = nn.Sequential( - normalization(self.out_channels), - nn.SiLU(), - nn.Dropout(p=dropout), - zero_module(nn.Conv1d(self.out_channels, self.out_channels, kernel_size, padding=padding)), - ) - - if self.out_channels == channels: - self.skip_connection = nn.Identity() - elif use_conv: - self.skip_connection = nn.Conv1d(channels, self.out_channels, kernel_size, padding=padding) - else: - self.skip_connection = nn.Conv1d(channels, self.out_channels, 1) - - def forward(self, x): - if self.updown: - in_rest, in_conv = self.in_layers[:-1], self.in_layers[-1] - h = in_rest(x) - h = self.h_upd(h) - x = self.x_upd(x) - h = in_conv(h) - else: - h = self.in_layers(x) - h = self.out_layers(h) - return self.skip_connection(x) + h - - -class AudioMiniEncoder(nn.Module): - def __init__( - self, - spec_dim, - embedding_dim, - base_channels=128, - depth=2, - resnet_blocks=2, - attn_blocks=4, - num_attn_heads=4, - dropout=0, - downsample_factor=2, - kernel_size=3, - ): - super().__init__() - self.init = nn.Sequential(nn.Conv1d(spec_dim, base_channels, 3, padding=1)) - ch = base_channels - res = [] - for l in range(depth): - for r in range(resnet_blocks): - res.append(ResBlock(ch, dropout, kernel_size=kernel_size)) - res.append(Downsample(ch, use_conv=True, out_channels=ch * 2, factor=downsample_factor)) - ch *= 2 - self.res = nn.Sequential(*res) - self.final = nn.Sequential(normalization(ch), nn.SiLU(), nn.Conv1d(ch, embedding_dim, 1)) - attn = [] - for a in range(attn_blocks): - attn.append( - AttentionBlock( - embedding_dim, - num_attn_heads, - ) - ) - self.attn = nn.Sequential(*attn) - self.dim = embedding_dim - - def forward(self, x): - h = self.init(x) - h = self.res(h) - h = self.final(h) - h = self.attn(h) - return h[:, :, 0] - - DEFAULT_MEL_NORM_FILE = "https://github.com/coqui-ai/TTS/releases/download/v0.14.1_models/mel_norms.pth" From 705551c60c84ff8856efc3cf428ecf817a4f7f72 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 21 Nov 2024 12:40:12 +0100 Subject: [PATCH 191/255] refactor(tortoise): remove unused do_checkpoint arguments These are assigned but not used for anything. --- TTS/tts/layers/tortoise/arch_utils.py | 2 -- TTS/tts/layers/tortoise/autoregressive.py | 2 -- TTS/tts/layers/tortoise/classifier.py | 6 ++---- TTS/tts/layers/tortoise/diffusion_decoder.py | 5 ----- 4 files changed, 2 insertions(+), 13 deletions(-) diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index c9abcf6094..4c3733e691 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -93,12 +93,10 @@ def __init__( channels, num_heads=1, num_head_channels=-1, - do_checkpoint=True, relative_pos_embeddings=False, ): super().__init__() self.channels = channels - self.do_checkpoint = do_checkpoint if num_head_channels == -1: self.num_heads = num_heads else: diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py index aaae695516..e3ffd4d1f6 100644 --- a/TTS/tts/layers/tortoise/autoregressive.py +++ b/TTS/tts/layers/tortoise/autoregressive.py @@ -175,7 +175,6 @@ def __init__( embedding_dim, attn_blocks=6, num_attn_heads=4, - do_checkpointing=False, mean=False, ): super().__init__() @@ -185,7 +184,6 @@ def __init__( attn.append(AttentionBlock(embedding_dim, num_attn_heads)) self.attn = nn.Sequential(*attn) self.dim = embedding_dim - self.do_checkpointing = do_checkpointing self.mean = mean def forward(self, x): diff --git a/TTS/tts/layers/tortoise/classifier.py b/TTS/tts/layers/tortoise/classifier.py index 8764bb070b..c72834e9a8 100644 --- a/TTS/tts/layers/tortoise/classifier.py +++ b/TTS/tts/layers/tortoise/classifier.py @@ -16,7 +16,6 @@ def __init__( up=False, down=False, kernel_size=3, - do_checkpoint=True, ): super().__init__() self.channels = channels @@ -24,7 +23,6 @@ def __init__( self.out_channels = out_channels or channels self.use_conv = use_conv self.use_scale_shift_norm = use_scale_shift_norm - self.do_checkpoint = do_checkpoint padding = 1 if kernel_size == 3 else 2 self.in_layers = nn.Sequential( @@ -92,14 +90,14 @@ def __init__( self.layers = depth for l in range(depth): for r in range(resnet_blocks): - res.append(ResBlock(ch, dropout, do_checkpoint=False, kernel_size=kernel_size)) + res.append(ResBlock(ch, dropout, kernel_size=kernel_size)) res.append(Downsample(ch, use_conv=True, out_channels=ch * 2, factor=downsample_factor)) ch *= 2 self.res = nn.Sequential(*res) self.final = nn.Sequential(normalization(ch), nn.SiLU(), nn.Conv1d(ch, embedding_dim, 1)) attn = [] for a in range(attn_blocks): - attn.append(AttentionBlock(embedding_dim, num_attn_heads, do_checkpoint=False)) + attn.append(AttentionBlock(embedding_dim, num_attn_heads)) self.attn = nn.Sequential(*attn) self.dim = embedding_dim diff --git a/TTS/tts/layers/tortoise/diffusion_decoder.py b/TTS/tts/layers/tortoise/diffusion_decoder.py index f71eaf1718..15bbfb7121 100644 --- a/TTS/tts/layers/tortoise/diffusion_decoder.py +++ b/TTS/tts/layers/tortoise/diffusion_decoder.py @@ -196,31 +196,26 @@ def __init__( model_channels * 2, num_heads, relative_pos_embeddings=True, - do_checkpoint=False, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, - do_checkpoint=False, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, - do_checkpoint=False, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, - do_checkpoint=False, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, - do_checkpoint=False, ), ) self.unconditioned_embedding = nn.Parameter(torch.randn(1, model_channels, 1)) From 5ffc0543b76858ee5a25fc60c3de9d0369e43dd5 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 21 Nov 2024 13:06:20 +0100 Subject: [PATCH 192/255] refactor(bark): remove custom layer norm Pytorch LayerNorm supports bias=False since version 2.1 --- TTS/tts/layers/bark/model.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/TTS/tts/layers/bark/model.py b/TTS/tts/layers/bark/model.py index 68c50dbdbd..54a9cecec0 100644 --- a/TTS/tts/layers/bark/model.py +++ b/TTS/tts/layers/bark/model.py @@ -12,18 +12,6 @@ from torch.nn import functional as F -class LayerNorm(nn.Module): - """LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False""" - - def __init__(self, ndim, bias): - super().__init__() - self.weight = nn.Parameter(torch.ones(ndim)) - self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None - - def forward(self, x): - return F.layer_norm(x, self.weight.shape, self.weight, self.bias, 1e-5) - - class CausalSelfAttention(nn.Module): def __init__(self, config): super().__init__() @@ -119,9 +107,9 @@ def forward(self, x): class Block(nn.Module): def __init__(self, config, layer_idx): super().__init__() - self.ln_1 = LayerNorm(config.n_embd, bias=config.bias) + self.ln_1 = nn.LayerNorm(config.n_embd, bias=config.bias) self.attn = CausalSelfAttention(config) - self.ln_2 = LayerNorm(config.n_embd, bias=config.bias) + self.ln_2 = nn.LayerNorm(config.n_embd, bias=config.bias) self.mlp = MLP(config) self.layer_idx = layer_idx @@ -158,7 +146,7 @@ def __init__(self, config): wpe=nn.Embedding(config.block_size, config.n_embd), drop=nn.Dropout(config.dropout), h=nn.ModuleList([Block(config, idx) for idx in range(config.n_layer)]), - ln_f=LayerNorm(config.n_embd, bias=config.bias), + ln_f=nn.LayerNorm(config.n_embd, bias=config.bias), ) ) self.lm_head = nn.Linear(config.n_embd, config.output_vocab_size, bias=False) From 490c973371c4a5ae345982325324efd0ece7f4af Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 21 Nov 2024 15:05:37 +0100 Subject: [PATCH 193/255] refactor(xtts): use position embedding from tortoise --- TTS/tts/layers/tortoise/autoregressive.py | 15 ++++++++++---- TTS/tts/layers/xtts/gpt.py | 24 +---------------------- 2 files changed, 12 insertions(+), 27 deletions(-) diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py index e3ffd4d1f6..3463e63b39 100644 --- a/TTS/tts/layers/tortoise/autoregressive.py +++ b/TTS/tts/layers/tortoise/autoregressive.py @@ -1,5 +1,6 @@ # AGPL: a notification must be added stating that changes have been made to that file. import functools +import random from typing import Optional import torch @@ -123,7 +124,7 @@ def forward( else: emb = self.embeddings(input_ids) emb = emb + self.text_pos_embedding.get_fixed_embedding( - attention_mask.shape[1] - mel_len, attention_mask.device + attention_mask.shape[1] - (mel_len + 1), attention_mask.device ) transformer_outputs = self.transformer( @@ -196,18 +197,24 @@ def forward(self, x): class LearnedPositionEmbeddings(nn.Module): - def __init__(self, seq_len, model_dim, init=0.02): + def __init__(self, seq_len, model_dim, init=0.02, relative=False): super().__init__() self.emb = nn.Embedding(seq_len, model_dim) # Initializing this way is standard for GPT-2 self.emb.weight.data.normal_(mean=0.0, std=init) + self.relative = relative + self.seq_len = seq_len def forward(self, x): sl = x.shape[1] - return self.emb(torch.arange(0, sl, device=x.device)) + if self.relative: + start = random.randint(sl, self.seq_len) - sl + return self.emb(torch.arange(start, start + sl, device=x.device)) + else: + return self.emb(torch.arange(0, sl, device=x.device)) def get_fixed_embedding(self, ind, dev): - return self.emb(torch.arange(0, ind, device=dev))[ind - 1 : ind] + return self.emb(torch.tensor([ind], device=dev)).unsqueeze(0) def build_hf_gpt_transformer(layers, model_dim, heads, max_mel_seq_len, max_text_seq_len, checkpointing): diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index b3c3b31b47..f93287619e 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -8,7 +8,7 @@ import torch.nn.functional as F from transformers import GPT2Config -from TTS.tts.layers.tortoise.autoregressive import _prepare_attention_mask_for_generation +from TTS.tts.layers.tortoise.autoregressive import LearnedPositionEmbeddings, _prepare_attention_mask_for_generation from TTS.tts.layers.xtts.gpt_inference import GPT2InferenceModel from TTS.tts.layers.xtts.latent_encoder import ConditioningEncoder from TTS.tts.layers.xtts.perceiver_encoder import PerceiverResampler @@ -18,28 +18,6 @@ def null_position_embeddings(range, dim): return torch.zeros((range.shape[0], range.shape[1], dim), device=range.device) -class LearnedPositionEmbeddings(nn.Module): - def __init__(self, seq_len, model_dim, init=0.02, relative=False): - super().__init__() - # nn.Embedding - self.emb = torch.nn.Embedding(seq_len, model_dim) - # Initializing this way is standard for GPT-2 - self.emb.weight.data.normal_(mean=0.0, std=init) - self.relative = relative - self.seq_len = seq_len - - def forward(self, x): - sl = x.shape[1] - if self.relative: - start = random.randint(sl, self.seq_len) - sl - return self.emb(torch.arange(start, start + sl, device=x.device)) - else: - return self.emb(torch.arange(0, sl, device=x.device)) - - def get_fixed_embedding(self, ind, dev): - return self.emb(torch.tensor([ind], device=dev)).unsqueeze(0) - - def build_hf_gpt_transformer( layers, model_dim, From 33ac0d6ee179b9959d86130bdfbff2abad30c587 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 21 Nov 2024 15:33:36 +0100 Subject: [PATCH 194/255] refactor(xtts): use build_hf_gpt_transformer from tortoise --- TTS/tts/layers/tortoise/autoregressive.py | 43 +++++++++----- TTS/tts/layers/xtts/gpt.py | 70 ++++------------------- 2 files changed, 40 insertions(+), 73 deletions(-) diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py index 3463e63b39..19c1adc0a6 100644 --- a/TTS/tts/layers/tortoise/autoregressive.py +++ b/TTS/tts/layers/tortoise/autoregressive.py @@ -217,7 +217,15 @@ def get_fixed_embedding(self, ind, dev): return self.emb(torch.tensor([ind], device=dev)).unsqueeze(0) -def build_hf_gpt_transformer(layers, model_dim, heads, max_mel_seq_len, max_text_seq_len, checkpointing): +def build_hf_gpt_transformer( + layers: int, + model_dim: int, + heads: int, + max_mel_seq_len: int, + max_text_seq_len: int, + checkpointing: bool, + max_prompt_len: int = 0, +): """ GPT-2 implemented by the HuggingFace library. """ @@ -225,8 +233,8 @@ def build_hf_gpt_transformer(layers, model_dim, heads, max_mel_seq_len, max_text gpt_config = GPT2Config( vocab_size=256, # Unused. - n_positions=max_mel_seq_len + max_text_seq_len, - n_ctx=max_mel_seq_len + max_text_seq_len, + n_positions=max_mel_seq_len + max_text_seq_len + max_prompt_len, + n_ctx=max_mel_seq_len + max_text_seq_len + max_prompt_len, n_embd=model_dim, n_layer=layers, n_head=heads, @@ -239,13 +247,18 @@ def build_hf_gpt_transformer(layers, model_dim, heads, max_mel_seq_len, max_text gpt.wpe = functools.partial(null_position_embeddings, dim=model_dim) # Built-in token embeddings are unused. del gpt.wte - return ( - gpt, - LearnedPositionEmbeddings(max_mel_seq_len, model_dim), - LearnedPositionEmbeddings(max_text_seq_len, model_dim), - None, - None, + + mel_pos_emb = ( + LearnedPositionEmbeddings(max_mel_seq_len, model_dim) + if max_mel_seq_len != -1 + else functools.partial(null_position_embeddings, dim=model_dim) + ) + text_pos_emb = ( + LearnedPositionEmbeddings(max_text_seq_len, model_dim) + if max_mel_seq_len != -1 + else functools.partial(null_position_embeddings, dim=model_dim) ) + return gpt, mel_pos_emb, text_pos_emb, None, None class MelEncoder(nn.Module): @@ -339,12 +352,12 @@ def __init__( self.mel_layer_pos_embedding, self.text_layer_pos_embedding, ) = build_hf_gpt_transformer( - layers, - model_dim, - heads, - self.max_mel_tokens + 2 + self.max_conditioning_inputs, - self.max_text_tokens + 2, - checkpointing, + layers=layers, + model_dim=model_dim, + heads=heads, + max_mel_seq_len=self.max_mel_tokens + 2 + self.max_conditioning_inputs, + max_text_seq_len=self.max_text_tokens + 2, + checkpointing=checkpointing, ) if train_solo_embeddings: self.mel_solo_embedding = nn.Parameter(torch.randn(1, 1, model_dim) * 0.02, requires_grad=True) diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index f93287619e..899522e091 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -1,6 +1,5 @@ # ported from: https://github.com/neonbjb/tortoise-tts -import functools import random import torch @@ -8,61 +7,16 @@ import torch.nn.functional as F from transformers import GPT2Config -from TTS.tts.layers.tortoise.autoregressive import LearnedPositionEmbeddings, _prepare_attention_mask_for_generation +from TTS.tts.layers.tortoise.autoregressive import ( + LearnedPositionEmbeddings, + _prepare_attention_mask_for_generation, + build_hf_gpt_transformer, +) from TTS.tts.layers.xtts.gpt_inference import GPT2InferenceModel from TTS.tts.layers.xtts.latent_encoder import ConditioningEncoder from TTS.tts.layers.xtts.perceiver_encoder import PerceiverResampler -def null_position_embeddings(range, dim): - return torch.zeros((range.shape[0], range.shape[1], dim), device=range.device) - - -def build_hf_gpt_transformer( - layers, - model_dim, - heads, - max_mel_seq_len, - max_text_seq_len, - max_prompt_len, - checkpointing, -): - """ - GPT-2 implemented by the HuggingFace library. - """ - from transformers import GPT2Config, GPT2Model - - gpt_config = GPT2Config( - vocab_size=256, # Unused. - n_positions=max_mel_seq_len + max_text_seq_len + max_prompt_len, - n_ctx=max_mel_seq_len + max_text_seq_len + max_prompt_len, - n_embd=model_dim, - n_layer=layers, - n_head=heads, - gradient_checkpointing=checkpointing, - use_cache=not checkpointing, - ) - gpt = GPT2Model(gpt_config) - # Override the built in positional embeddings - del gpt.wpe - gpt.wpe = functools.partial(null_position_embeddings, dim=model_dim) - # Built-in token embeddings are unused. - del gpt.wte - - mel_pos_emb = ( - LearnedPositionEmbeddings(max_mel_seq_len, model_dim) - if max_mel_seq_len != -1 - else functools.partial(null_position_embeddings, dim=model_dim) - ) - text_pos_emb = ( - LearnedPositionEmbeddings(max_text_seq_len, model_dim) - if max_mel_seq_len != -1 - else functools.partial(null_position_embeddings, dim=model_dim) - ) - # gpt = torch.compile(gpt, mode="reduce-overhead", fullgraph=True) - return gpt, mel_pos_emb, text_pos_emb, None, None - - class GPT(nn.Module): def __init__( self, @@ -127,13 +81,13 @@ def __init__( self.mel_layer_pos_embedding, self.text_layer_pos_embedding, ) = build_hf_gpt_transformer( - layers, - model_dim, - heads, - self.max_mel_tokens, - self.max_text_tokens, - self.max_prompt_tokens, - checkpointing, + layers=layers, + model_dim=model_dim, + heads=heads, + max_mel_seq_len=self.max_mel_tokens, + max_text_seq_len=self.max_text_tokens, + max_prompt_len=self.max_prompt_tokens, + checkpointing=checkpointing, ) if train_solo_embeddings: self.mel_solo_embedding = nn.Parameter(torch.randn(1, 1, model_dim) * 0.02, requires_grad=True) From 7cdfde226bc03cc792424c4f3a93741150213cfc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 21:30:21 +0100 Subject: [PATCH 195/255] refactor: move amp_to_db/db_to_amp into torch_transforms --- TTS/tts/layers/tortoise/audio_utils.py | 22 ++------------- TTS/tts/models/delightful_tts.py | 19 +------------ TTS/tts/models/vits.py | 19 +------------ TTS/utils/audio/numpy_transforms.py | 2 +- TTS/utils/audio/torch_transforms.py | 18 ++++++------ TTS/vc/modules/freevc/mel_processing.py | 35 +++--------------------- tests/aux_tests/test_stft_torch.py | 0 tests/aux_tests/test_torch_transforms.py | 16 +++++++++++ tests/tts_tests/test_vits.py | 3 +- 9 files changed, 36 insertions(+), 98 deletions(-) delete mode 100644 tests/aux_tests/test_stft_torch.py create mode 100644 tests/aux_tests/test_torch_transforms.py diff --git a/TTS/tts/layers/tortoise/audio_utils.py b/TTS/tts/layers/tortoise/audio_utils.py index 4f299a8fd9..c67ee6c44b 100644 --- a/TTS/tts/layers/tortoise/audio_utils.py +++ b/TTS/tts/layers/tortoise/audio_utils.py @@ -9,7 +9,7 @@ import torchaudio from scipy.io.wavfile import read -from TTS.utils.audio.torch_transforms import TorchSTFT +from TTS.utils.audio.torch_transforms import TorchSTFT, amp_to_db from TTS.utils.generic_utils import is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -88,24 +88,6 @@ def normalize_tacotron_mel(mel): return 2 * ((mel - TACOTRON_MEL_MIN) / (TACOTRON_MEL_MAX - TACOTRON_MEL_MIN)) - 1 -def dynamic_range_compression(x, C=1, clip_val=1e-5): - """ - PARAMS - ------ - C: compression factor - """ - return torch.log(torch.clamp(x, min=clip_val) * C) - - -def dynamic_range_decompression(x, C=1): - """ - PARAMS - ------ - C: compression factor used to compress - """ - return torch.exp(x) / C - - def get_voices(extra_voice_dirs: List[str] = []): dirs = extra_voice_dirs voices: Dict[str, List[str]] = {} @@ -175,7 +157,7 @@ def wav_to_univnet_mel(wav, do_normalization=False, device="cuda"): ) stft = stft.to(device) mel = stft(wav) - mel = dynamic_range_compression(mel) + mel = amp_to_db(mel) if do_normalization: mel = normalize_tacotron_mel(mel) return mel diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index c6f15a7952..880ea4ae26 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -32,6 +32,7 @@ from TTS.utils.audio.numpy_transforms import db_to_amp as db_to_amp_numpy from TTS.utils.audio.numpy_transforms import mel_to_wav as mel_to_wav_numpy from TTS.utils.audio.processor import AudioProcessor +from TTS.utils.audio.torch_transforms import amp_to_db from TTS.vocoder.layers.losses import MultiScaleSTFTLoss from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results @@ -136,24 +137,6 @@ def load_audio(file_path: str): return x, sr -def _amp_to_db(x, C=1, clip_val=1e-5): - return torch.log(torch.clamp(x, min=clip_val) * C) - - -def _db_to_amp(x, C=1): - return torch.exp(x) / C - - -def amp_to_db(magnitudes): - output = _amp_to_db(magnitudes) - return output - - -def db_to_amp(magnitudes): - output = _db_to_amp(magnitudes) - return output - - def _wav_to_spec(y, n_fft, hop_length, win_length, center=False): y = y.squeeze(1) diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 432b29f5e1..aea0f4e4f8 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -35,6 +35,7 @@ from TTS.tts.utils.text.characters import BaseCharacters, BaseVocabulary, _characters, _pad, _phonemes, _punctuations from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment +from TTS.utils.audio.torch_transforms import amp_to_db from TTS.utils.samplers import BucketBatchSampler from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results @@ -78,24 +79,6 @@ def load_audio(file_path): return x, sr -def _amp_to_db(x, C=1, clip_val=1e-5): - return torch.log(torch.clamp(x, min=clip_val) * C) - - -def _db_to_amp(x, C=1): - return torch.exp(x) / C - - -def amp_to_db(magnitudes): - output = _amp_to_db(magnitudes) - return output - - -def db_to_amp(magnitudes): - output = _db_to_amp(magnitudes) - return output - - def wav_to_spec(y, n_fft, hop_length, win_length, center=False): """ Args Shapes: diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index 203091ea88..9c83009b0f 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -59,7 +59,7 @@ def _exp(x, base): return np.exp(x) -def amp_to_db(*, x: np.ndarray, gain: float = 1, base: int = 10, **kwargs) -> np.ndarray: +def amp_to_db(*, x: np.ndarray, gain: float = 1, base: float = 10, **kwargs) -> np.ndarray: """Convert amplitude values to decibels. Args: diff --git a/TTS/utils/audio/torch_transforms.py b/TTS/utils/audio/torch_transforms.py index 632969c51a..dda4c0a419 100644 --- a/TTS/utils/audio/torch_transforms.py +++ b/TTS/utils/audio/torch_transforms.py @@ -3,6 +3,16 @@ from torch import nn +def amp_to_db(x: torch.Tensor, *, spec_gain: float = 1.0, clip_val: float = 1e-5) -> torch.Tensor: + """Spectral normalization / dynamic range compression.""" + return torch.log(torch.clamp(x, min=clip_val) * spec_gain) + + +def db_to_amp(x: torch.Tensor, *, spec_gain: float = 1.0) -> torch.Tensor: + """Spectral denormalization / dynamic range decompression.""" + return torch.exp(x) / spec_gain + + class TorchSTFT(nn.Module): # pylint: disable=abstract-method """Some of the audio processing funtions using Torch for faster batch processing. @@ -157,11 +167,3 @@ def _build_mel_basis(self): norm=self.mel_norm, ) self.mel_basis = torch.from_numpy(mel_basis).float() - - @staticmethod - def _amp_to_db(x, spec_gain=1.0): - return torch.log(torch.clamp(x, min=1e-5) * spec_gain) - - @staticmethod - def _db_to_amp(x, spec_gain=1.0): - return torch.exp(x) / spec_gain diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/modules/freevc/mel_processing.py index a3e251891a..4da5e27c83 100644 --- a/TTS/vc/modules/freevc/mel_processing.py +++ b/TTS/vc/modules/freevc/mel_processing.py @@ -4,39 +4,12 @@ import torch.utils.data from librosa.filters import mel as librosa_mel_fn +from TTS.utils.audio.torch_transforms import amp_to_db + logger = logging.getLogger(__name__) MAX_WAV_VALUE = 32768.0 - -def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): - """ - PARAMS - ------ - C: compression factor - """ - return torch.log(torch.clamp(x, min=clip_val) * C) - - -def dynamic_range_decompression_torch(x, C=1): - """ - PARAMS - ------ - C: compression factor used to compress - """ - return torch.exp(x) / C - - -def spectral_normalize_torch(magnitudes): - output = dynamic_range_compression_torch(magnitudes) - return output - - -def spectral_de_normalize_torch(magnitudes): - output = dynamic_range_decompression_torch(magnitudes) - return output - - mel_basis = {} hann_window = {} @@ -85,7 +58,7 @@ def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) spec = torch.matmul(mel_basis[fmax_dtype_device], spec) - spec = spectral_normalize_torch(spec) + spec = amp_to_db(spec) return spec @@ -128,6 +101,6 @@ def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) spec = torch.matmul(mel_basis[fmax_dtype_device], spec) - spec = spectral_normalize_torch(spec) + spec = amp_to_db(spec) return spec diff --git a/tests/aux_tests/test_stft_torch.py b/tests/aux_tests/test_stft_torch.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/aux_tests/test_torch_transforms.py b/tests/aux_tests/test_torch_transforms.py new file mode 100644 index 0000000000..2da5a359c1 --- /dev/null +++ b/tests/aux_tests/test_torch_transforms.py @@ -0,0 +1,16 @@ +import numpy as np +import torch + +from TTS.utils.audio import numpy_transforms as np_transforms +from TTS.utils.audio.torch_transforms import amp_to_db, db_to_amp + + +def test_amplitude_db_conversion(): + x = torch.rand(11) + o1 = amp_to_db(x=x, spec_gain=1.0) + o2 = db_to_amp(x=o1, spec_gain=1.0) + np_o1 = np_transforms.amp_to_db(x=x, base=np.e) + np_o2 = np_transforms.db_to_amp(x=np_o1, base=np.e) + assert torch.allclose(x, o2) + assert torch.allclose(o1, np_o1) + assert torch.allclose(o2, np_o2) diff --git a/tests/tts_tests/test_vits.py b/tests/tts_tests/test_vits.py index 17992773ad..a27bdfe5b5 100644 --- a/tests/tts_tests/test_vits.py +++ b/tests/tts_tests/test_vits.py @@ -13,14 +13,13 @@ Vits, VitsArgs, VitsAudioConfig, - amp_to_db, - db_to_amp, load_audio, spec_to_mel, wav_to_mel, wav_to_spec, ) from TTS.tts.utils.speakers import SpeakerManager +from TTS.utils.audio.torch_transforms import amp_to_db, db_to_amp LANG_FILE = os.path.join(get_tests_input_path(), "language_ids.json") SPEAKER_ENCODER_CONFIG = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json") From 6f25c2b90463dec6afe8c9c788a0a3a717030429 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 00:38:37 +0100 Subject: [PATCH 196/255] refactor(delightful_tts): remove unused classes --- TTS/tts/layers/delightful_tts/conformer.py | 49 +----- TTS/tts/layers/delightful_tts/conv_layers.py | 142 ------------------ .../layers/delightful_tts/kernel_predictor.py | 128 ---------------- TTS/tts/layers/tacotron/gst_layers.py | 10 +- TTS/tts/models/delightful_tts.py | 113 ++------------ TTS/tts/utils/synthesis.py | 19 +-- 6 files changed, 24 insertions(+), 437 deletions(-) delete mode 100644 TTS/tts/layers/delightful_tts/kernel_predictor.py diff --git a/TTS/tts/layers/delightful_tts/conformer.py b/TTS/tts/layers/delightful_tts/conformer.py index b2175b3b96..227a871c69 100644 --- a/TTS/tts/layers/delightful_tts/conformer.py +++ b/TTS/tts/layers/delightful_tts/conformer.py @@ -1,20 +1,14 @@ ### credit: https://github.com/dunky11/voicesmith import math -from typing import Tuple import torch import torch.nn as nn # pylint: disable=consider-using-from-import import torch.nn.functional as F -from TTS.tts.layers.delightful_tts.conv_layers import Conv1dGLU, DepthWiseConv1d, PointwiseConv1d +from TTS.tts.layers.delightful_tts.conv_layers import Conv1dGLU, DepthWiseConv1d, PointwiseConv1d, calc_same_padding from TTS.tts.layers.delightful_tts.networks import GLUActivation -def calc_same_padding(kernel_size: int) -> Tuple[int, int]: - pad = kernel_size // 2 - return (pad, pad - (kernel_size + 1) % 2) - - class Conformer(nn.Module): def __init__( self, @@ -322,7 +316,7 @@ def forward( value: torch.Tensor, mask: torch.Tensor, encoding: torch.Tensor, - ) -> Tuple[torch.Tensor, torch.Tensor]: + ) -> tuple[torch.Tensor, torch.Tensor]: batch_size, seq_length, _ = key.size() # pylint: disable=unused-variable encoding = encoding[:, : key.shape[1]] encoding = encoding.repeat(batch_size, 1, 1) @@ -378,7 +372,7 @@ def forward( value: torch.Tensor, pos_embedding: torch.Tensor, mask: torch.Tensor, - ) -> Tuple[torch.Tensor, torch.Tensor]: + ) -> tuple[torch.Tensor, torch.Tensor]: batch_size = query.shape[0] query = self.query_proj(query).view(batch_size, -1, self.num_heads, self.d_head) key = self.key_proj(key).view(batch_size, -1, self.num_heads, self.d_head).permute(0, 2, 1, 3) @@ -411,40 +405,3 @@ def _relative_shift(self, pos_score: torch.Tensor) -> torch.Tensor: # pylint: d padded_pos_score = padded_pos_score.view(batch_size, num_heads, seq_length2 + 1, seq_length1) pos_score = padded_pos_score[:, :, 1:].view_as(pos_score) return pos_score - - -class MultiHeadAttention(nn.Module): - """ - input: - query --- [N, T_q, query_dim] - key --- [N, T_k, key_dim] - output: - out --- [N, T_q, num_units] - """ - - def __init__(self, query_dim: int, key_dim: int, num_units: int, num_heads: int): - super().__init__() - self.num_units = num_units - self.num_heads = num_heads - self.key_dim = key_dim - - self.W_query = nn.Linear(in_features=query_dim, out_features=num_units, bias=False) - self.W_key = nn.Linear(in_features=key_dim, out_features=num_units, bias=False) - self.W_value = nn.Linear(in_features=key_dim, out_features=num_units, bias=False) - - def forward(self, query: torch.Tensor, key: torch.Tensor) -> torch.Tensor: - querys = self.W_query(query) # [N, T_q, num_units] - keys = self.W_key(key) # [N, T_k, num_units] - values = self.W_value(key) - split_size = self.num_units // self.num_heads - querys = torch.stack(torch.split(querys, split_size, dim=2), dim=0) # [h, N, T_q, num_units/h] - keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h] - values = torch.stack(torch.split(values, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h] - # score = softmax(QK^T / (d_k ** 0.5)) - scores = torch.matmul(querys, keys.transpose(2, 3)) # [h, N, T_q, T_k] - scores = scores / (self.key_dim**0.5) - scores = F.softmax(scores, dim=3) - # out = score * V - out = torch.matmul(scores, values) # [h, N, T_q, num_units/h] - out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0) # [N, T_q, num_units] - return out diff --git a/TTS/tts/layers/delightful_tts/conv_layers.py b/TTS/tts/layers/delightful_tts/conv_layers.py index fb9aa4495f..1d5139571e 100644 --- a/TTS/tts/layers/delightful_tts/conv_layers.py +++ b/TTS/tts/layers/delightful_tts/conv_layers.py @@ -3,9 +3,6 @@ import torch import torch.nn as nn # pylint: disable=consider-using-from-import import torch.nn.functional as F -from torch.nn.utils import parametrize - -from TTS.tts.layers.delightful_tts.kernel_predictor import KernelPredictor def calc_same_padding(kernel_size: int) -> Tuple[int, int]: @@ -530,142 +527,3 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: x = self.addcoords(x) x = self.conv(x) return x - - -class LVCBlock(torch.nn.Module): - """the location-variable convolutions""" - - def __init__( # pylint: disable=dangerous-default-value - self, - in_channels, - cond_channels, - stride, - dilations=[1, 3, 9, 27], - lReLU_slope=0.2, - conv_kernel_size=3, - cond_hop_length=256, - kpnet_hidden_channels=64, - kpnet_conv_size=3, - kpnet_dropout=0.0, - ): - super().__init__() - - self.cond_hop_length = cond_hop_length - self.conv_layers = len(dilations) - self.conv_kernel_size = conv_kernel_size - - self.kernel_predictor = KernelPredictor( - cond_channels=cond_channels, - conv_in_channels=in_channels, - conv_out_channels=2 * in_channels, - conv_layers=len(dilations), - conv_kernel_size=conv_kernel_size, - kpnet_hidden_channels=kpnet_hidden_channels, - kpnet_conv_size=kpnet_conv_size, - kpnet_dropout=kpnet_dropout, - kpnet_nonlinear_activation_params={"negative_slope": lReLU_slope}, - ) - - self.convt_pre = nn.Sequential( - nn.LeakyReLU(lReLU_slope), - nn.utils.parametrizations.weight_norm( - nn.ConvTranspose1d( - in_channels, - in_channels, - 2 * stride, - stride=stride, - padding=stride // 2 + stride % 2, - output_padding=stride % 2, - ) - ), - ) - - self.conv_blocks = nn.ModuleList() - for dilation in dilations: - self.conv_blocks.append( - nn.Sequential( - nn.LeakyReLU(lReLU_slope), - nn.utils.parametrizations.weight_norm( - nn.Conv1d( - in_channels, - in_channels, - conv_kernel_size, - padding=dilation * (conv_kernel_size - 1) // 2, - dilation=dilation, - ) - ), - nn.LeakyReLU(lReLU_slope), - ) - ) - - def forward(self, x, c): - """forward propagation of the location-variable convolutions. - Args: - x (Tensor): the input sequence (batch, in_channels, in_length) - c (Tensor): the conditioning sequence (batch, cond_channels, cond_length) - - Returns: - Tensor: the output sequence (batch, in_channels, in_length) - """ - _, in_channels, _ = x.shape # (B, c_g, L') - - x = self.convt_pre(x) # (B, c_g, stride * L') - kernels, bias = self.kernel_predictor(c) - - for i, conv in enumerate(self.conv_blocks): - output = conv(x) # (B, c_g, stride * L') - - k = kernels[:, i, :, :, :, :] # (B, 2 * c_g, c_g, kernel_size, cond_length) - b = bias[:, i, :, :] # (B, 2 * c_g, cond_length) - - output = self.location_variable_convolution( - output, k, b, hop_size=self.cond_hop_length - ) # (B, 2 * c_g, stride * L'): LVC - x = x + torch.sigmoid(output[:, :in_channels, :]) * torch.tanh( - output[:, in_channels:, :] - ) # (B, c_g, stride * L'): GAU - - return x - - def location_variable_convolution(self, x, kernel, bias, dilation=1, hop_size=256): # pylint: disable=no-self-use - """perform location-variable convolution operation on the input sequence (x) using the local convolution kernl. - Time: 414 μs ± 309 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each), test on NVIDIA V100. - Args: - x (Tensor): the input sequence (batch, in_channels, in_length). - kernel (Tensor): the local convolution kernel (batch, in_channel, out_channels, kernel_size, kernel_length) - bias (Tensor): the bias for the local convolution (batch, out_channels, kernel_length) - dilation (int): the dilation of convolution. - hop_size (int): the hop_size of the conditioning sequence. - Returns: - (Tensor): the output sequence after performing local convolution. (batch, out_channels, in_length). - """ - batch, _, in_length = x.shape - batch, _, out_channels, kernel_size, kernel_length = kernel.shape - assert in_length == (kernel_length * hop_size), "length of (x, kernel) is not matched" - - padding = dilation * int((kernel_size - 1) / 2) - x = F.pad(x, (padding, padding), "constant", 0) # (batch, in_channels, in_length + 2*padding) - x = x.unfold(2, hop_size + 2 * padding, hop_size) # (batch, in_channels, kernel_length, hop_size + 2*padding) - - if hop_size < dilation: - x = F.pad(x, (0, dilation), "constant", 0) - x = x.unfold( - 3, dilation, dilation - ) # (batch, in_channels, kernel_length, (hop_size + 2*padding)/dilation, dilation) - x = x[:, :, :, :, :hop_size] - x = x.transpose(3, 4) # (batch, in_channels, kernel_length, dilation, (hop_size + 2*padding)/dilation) - x = x.unfold(4, kernel_size, 1) # (batch, in_channels, kernel_length, dilation, _, kernel_size) - - o = torch.einsum("bildsk,biokl->bolsd", x, kernel) - o = o.to(memory_format=torch.channels_last_3d) - bias = bias.unsqueeze(-1).unsqueeze(-1).to(memory_format=torch.channels_last_3d) - o = o + bias - o = o.contiguous().view(batch, out_channels, -1) - - return o - - def remove_weight_norm(self): - self.kernel_predictor.remove_weight_norm() - parametrize.remove_parametrizations(self.convt_pre[1], "weight") - for block in self.conv_blocks: - parametrize.remove_parametrizations(block[1], "weight") diff --git a/TTS/tts/layers/delightful_tts/kernel_predictor.py b/TTS/tts/layers/delightful_tts/kernel_predictor.py deleted file mode 100644 index 96c550b6c2..0000000000 --- a/TTS/tts/layers/delightful_tts/kernel_predictor.py +++ /dev/null @@ -1,128 +0,0 @@ -import torch.nn as nn # pylint: disable=consider-using-from-import -from torch.nn.utils import parametrize - - -class KernelPredictor(nn.Module): - """Kernel predictor for the location-variable convolutions - - Args: - cond_channels (int): number of channel for the conditioning sequence, - conv_in_channels (int): number of channel for the input sequence, - conv_out_channels (int): number of channel for the output sequence, - conv_layers (int): number of layers - - """ - - def __init__( # pylint: disable=dangerous-default-value - self, - cond_channels, - conv_in_channels, - conv_out_channels, - conv_layers, - conv_kernel_size=3, - kpnet_hidden_channels=64, - kpnet_conv_size=3, - kpnet_dropout=0.0, - kpnet_nonlinear_activation="LeakyReLU", - kpnet_nonlinear_activation_params={"negative_slope": 0.1}, - ): - super().__init__() - - self.conv_in_channels = conv_in_channels - self.conv_out_channels = conv_out_channels - self.conv_kernel_size = conv_kernel_size - self.conv_layers = conv_layers - - kpnet_kernel_channels = conv_in_channels * conv_out_channels * conv_kernel_size * conv_layers # l_w - kpnet_bias_channels = conv_out_channels * conv_layers # l_b - - self.input_conv = nn.Sequential( - nn.utils.parametrizations.weight_norm( - nn.Conv1d(cond_channels, kpnet_hidden_channels, 5, padding=2, bias=True) - ), - getattr(nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), - ) - - self.residual_convs = nn.ModuleList() - padding = (kpnet_conv_size - 1) // 2 - for _ in range(3): - self.residual_convs.append( - nn.Sequential( - nn.Dropout(kpnet_dropout), - nn.utils.parametrizations.weight_norm( - nn.Conv1d( - kpnet_hidden_channels, - kpnet_hidden_channels, - kpnet_conv_size, - padding=padding, - bias=True, - ) - ), - getattr(nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), - nn.utils.parametrizations.weight_norm( - nn.Conv1d( - kpnet_hidden_channels, - kpnet_hidden_channels, - kpnet_conv_size, - padding=padding, - bias=True, - ) - ), - getattr(nn, kpnet_nonlinear_activation)(**kpnet_nonlinear_activation_params), - ) - ) - self.kernel_conv = nn.utils.parametrizations.weight_norm( - nn.Conv1d( - kpnet_hidden_channels, - kpnet_kernel_channels, - kpnet_conv_size, - padding=padding, - bias=True, - ) - ) - self.bias_conv = nn.utils.parametrizations.weight_norm( - nn.Conv1d( - kpnet_hidden_channels, - kpnet_bias_channels, - kpnet_conv_size, - padding=padding, - bias=True, - ) - ) - - def forward(self, c): - """ - Args: - c (Tensor): the conditioning sequence (batch, cond_channels, cond_length) - """ - batch, _, cond_length = c.shape - c = self.input_conv(c) - for residual_conv in self.residual_convs: - residual_conv.to(c.device) - c = c + residual_conv(c) - k = self.kernel_conv(c) - b = self.bias_conv(c) - kernels = k.contiguous().view( - batch, - self.conv_layers, - self.conv_in_channels, - self.conv_out_channels, - self.conv_kernel_size, - cond_length, - ) - bias = b.contiguous().view( - batch, - self.conv_layers, - self.conv_out_channels, - cond_length, - ) - - return kernels, bias - - def remove_weight_norm(self): - parametrize.remove_parametrizations(self.input_conv[0], "weight") - parametrize.remove_parametrizations(self.kernel_conv, "weight") - parametrize.remove_parametrizations(self.bias_conv, "weight") - for block in self.residual_convs: - parametrize.remove_parametrizations(block[1], "weight") - parametrize.remove_parametrizations(block[3], "weight") diff --git a/TTS/tts/layers/tacotron/gst_layers.py b/TTS/tts/layers/tacotron/gst_layers.py index 05dba7084f..ac3d7d4aae 100644 --- a/TTS/tts/layers/tacotron/gst_layers.py +++ b/TTS/tts/layers/tacotron/gst_layers.py @@ -117,7 +117,7 @@ class MultiHeadAttention(nn.Module): out --- [N, T_q, num_units] """ - def __init__(self, query_dim, key_dim, num_units, num_heads): + def __init__(self, query_dim: int, key_dim: int, num_units: int, num_heads: int): super().__init__() self.num_units = num_units self.num_heads = num_heads @@ -127,7 +127,7 @@ def __init__(self, query_dim, key_dim, num_units, num_heads): self.W_key = nn.Linear(in_features=key_dim, out_features=num_units, bias=False) self.W_value = nn.Linear(in_features=key_dim, out_features=num_units, bias=False) - def forward(self, query, key): + def forward(self, query: torch.Tensor, key: torch.Tensor) -> torch.Tensor: queries = self.W_query(query) # [N, T_q, num_units] keys = self.W_key(key) # [N, T_k, num_units] values = self.W_value(key) @@ -137,13 +137,11 @@ def forward(self, query, key): keys = torch.stack(torch.split(keys, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h] values = torch.stack(torch.split(values, split_size, dim=2), dim=0) # [h, N, T_k, num_units/h] - # score = softmax(QK^T / (d_k**0.5)) + # score = softmax(QK^T / (d_k ** 0.5)) scores = torch.matmul(queries, keys.transpose(2, 3)) # [h, N, T_q, T_k] scores = scores / (self.key_dim**0.5) scores = F.softmax(scores, dim=3) # out = score * V out = torch.matmul(scores, values) # [h, N, T_q, num_units/h] - out = torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0) # [N, T_q, num_units] - - return out + return torch.cat(torch.split(out, 1, dim=0), dim=3).squeeze(0) # [N, T_q, num_units] diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 880ea4ae26..2f34e4323b 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -8,11 +8,9 @@ import numpy as np import torch import torch.distributed as dist -import torchaudio from coqpit import Coqpit from librosa.filters import mel as librosa_mel_fn from torch import nn -from torch.nn import functional as F from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler from trainer.io import load_fsspec @@ -24,8 +22,10 @@ from TTS.tts.layers.losses import ForwardSumLoss, VitsDiscriminatorLoss from TTS.tts.layers.vits.discriminator import VitsDiscriminator from TTS.tts.models.base_tts import BaseTTSE2E +from TTS.tts.models.vits import load_audio from TTS.tts.utils.helpers import average_over_durations, compute_attn_prior, rand_segments, segment, sequence_mask from TTS.tts.utils.speakers import SpeakerManager +from TTS.tts.utils.synthesis import embedding_to_torch, id_to_torch, numpy_to_torch from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_avg_pitch, plot_pitch, plot_spectrogram from TTS.utils.audio.numpy_transforms import build_mel_basis, compute_f0 @@ -40,103 +40,10 @@ logger = logging.getLogger(__name__) -def id_to_torch(aux_id, cuda=False): - if aux_id is not None: - aux_id = np.asarray(aux_id) - aux_id = torch.from_numpy(aux_id) - if cuda: - return aux_id.cuda() - return aux_id - - -def embedding_to_torch(d_vector, cuda=False): - if d_vector is not None: - d_vector = np.asarray(d_vector) - d_vector = torch.from_numpy(d_vector).float() - d_vector = d_vector.squeeze().unsqueeze(0) - if cuda: - return d_vector.cuda() - return d_vector - - -def numpy_to_torch(np_array, dtype, cuda=False): - if np_array is None: - return None - tensor = torch.as_tensor(np_array, dtype=dtype) - if cuda: - return tensor.cuda() - return tensor - - -def get_mask_from_lengths(lengths: torch.Tensor) -> torch.Tensor: - batch_size = lengths.shape[0] - max_len = torch.max(lengths).item() - ids = torch.arange(0, max_len, device=lengths.device).unsqueeze(0).expand(batch_size, -1) - mask = ids >= lengths.unsqueeze(1).expand(-1, max_len) - return mask - - -def pad(input_ele: List[torch.Tensor], max_len: int) -> torch.Tensor: - out_list = torch.jit.annotate(List[torch.Tensor], []) - for batch in input_ele: - if len(batch.shape) == 1: - one_batch_padded = F.pad(batch, (0, max_len - batch.size(0)), "constant", 0.0) - else: - one_batch_padded = F.pad(batch, (0, 0, 0, max_len - batch.size(0)), "constant", 0.0) - out_list.append(one_batch_padded) - out_padded = torch.stack(out_list) - return out_padded - - -def stride_lens(lens: torch.Tensor, stride: int = 2) -> torch.Tensor: - return torch.ceil(lens / stride).int() - - -def initialize_embeddings(shape: Tuple[int]) -> torch.Tensor: - assert len(shape) == 2, "Can only initialize 2-D embedding matrices ..." - return torch.randn(shape) * np.sqrt(2 / shape[1]) - - -# pylint: disable=redefined-outer-name -def calc_same_padding(kernel_size: int) -> Tuple[int, int]: - pad = kernel_size // 2 - return (pad, pad - (kernel_size + 1) % 2) - - hann_window = {} mel_basis = {} -@torch.no_grad() -def weights_reset(m: nn.Module): - # check if the current module has reset_parameters and if it is reset the weight - reset_parameters = getattr(m, "reset_parameters", None) - if callable(reset_parameters): - m.reset_parameters() - - -def get_module_weights_sum(mdl: nn.Module): - dict_sums = {} - for name, w in mdl.named_parameters(): - if "weight" in name: - value = w.data.sum().item() - dict_sums[name] = value - return dict_sums - - -def load_audio(file_path: str): - """Load the audio file normalized in [-1, 1] - - Return Shapes: - - x: :math:`[1, T]` - """ - x, sr = torchaudio.load( - file_path, - ) - assert (x > 1).sum() + (x < -1).sum() == 0 - return x, sr - - def _wav_to_spec(y, n_fft, hop_length, win_length, center=False): y = y.squeeze(1) @@ -1179,7 +1086,7 @@ def synthesize( **kwargs, ): # pylint: disable=unused-argument # TODO: add cloning support with ref_waveform - is_cuda = next(self.parameters()).is_cuda + device = next(self.parameters()).device # convert text to sequence of token IDs text_inputs = np.asarray( @@ -1193,14 +1100,14 @@ def synthesize( if isinstance(speaker_id, str) and self.args.use_speaker_embedding: # get the speaker id for the speaker embedding layer _speaker_id = self.speaker_manager.name_to_id[speaker_id] - _speaker_id = id_to_torch(_speaker_id, cuda=is_cuda) + _speaker_id = id_to_torch(_speaker_id, device=device) if speaker_id is not None and self.args.use_d_vector_file: # get the average d_vector for the speaker d_vector = self.speaker_manager.get_mean_embedding(speaker_id, num_samples=None, randomize=False) - d_vector = embedding_to_torch(d_vector, cuda=is_cuda) + d_vector = embedding_to_torch(d_vector, device=device) - text_inputs = numpy_to_torch(text_inputs, torch.long, cuda=is_cuda) + text_inputs = numpy_to_torch(text_inputs, torch.long, device=device) text_inputs = text_inputs.unsqueeze(0) # synthesize voice @@ -1223,7 +1130,7 @@ def synthesize( return return_dict def synthesize_with_gl(self, text: str, speaker_id, d_vector): - is_cuda = next(self.parameters()).is_cuda + device = next(self.parameters()).device # convert text to sequence of token IDs text_inputs = np.asarray( @@ -1232,12 +1139,12 @@ def synthesize_with_gl(self, text: str, speaker_id, d_vector): ) # pass tensors to backend if speaker_id is not None: - speaker_id = id_to_torch(speaker_id, cuda=is_cuda) + speaker_id = id_to_torch(speaker_id, device=device) if d_vector is not None: - d_vector = embedding_to_torch(d_vector, cuda=is_cuda) + d_vector = embedding_to_torch(d_vector, device=device) - text_inputs = numpy_to_torch(text_inputs, torch.long, cuda=is_cuda) + text_inputs = numpy_to_torch(text_inputs, torch.long, device=device) text_inputs = text_inputs.unsqueeze(0) # synthesize voice diff --git a/TTS/tts/utils/synthesis.py b/TTS/tts/utils/synthesis.py index 797151c254..5dc4cc569f 100644 --- a/TTS/tts/utils/synthesis.py +++ b/TTS/tts/utils/synthesis.py @@ -1,17 +1,16 @@ -from typing import Dict +from typing import Dict, Optional, Union import numpy as np import torch from torch import nn -def numpy_to_torch(np_array, dtype, cuda=False, device="cpu"): - if cuda: - device = "cuda" +def numpy_to_torch( + np_array: np.ndarray, dtype: torch.dtype, device: Union[str, torch.device] = "cpu" +) -> Optional[torch.Tensor]: if np_array is None: return None - tensor = torch.as_tensor(np_array, dtype=dtype, device=device) - return tensor + return torch.as_tensor(np_array, dtype=dtype, device=device) def compute_style_mel(style_wav, ap, cuda=False, device="cpu"): @@ -76,18 +75,14 @@ def inv_spectrogram(postnet_output, ap, CONFIG): return wav -def id_to_torch(aux_id, cuda=False, device="cpu"): - if cuda: - device = "cuda" +def id_to_torch(aux_id, device: Union[str, torch.device] = "cpu") -> Optional[torch.Tensor]: if aux_id is not None: aux_id = np.asarray(aux_id) aux_id = torch.from_numpy(aux_id).to(device) return aux_id -def embedding_to_torch(d_vector, cuda=False, device="cpu"): - if cuda: - device = "cuda" +def embedding_to_torch(d_vector, device: Union[str, torch.device] = "cpu") -> Optional[torch.Tensor]: if d_vector is not None: d_vector = np.asarray(d_vector) d_vector = torch.from_numpy(d_vector).type(torch.FloatTensor) From e63962c22662d76c0765fdb35fd0b30fce8888c8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 00:45:33 +0100 Subject: [PATCH 197/255] refactor(losses): move shared losses into losses.py --- TTS/tts/layers/losses.py | 87 +++++++++++++++++++------------- TTS/tts/models/delightful_tts.py | 44 ++++------------ TTS/tts/models/neuralhmm_tts.py | 19 +------ TTS/tts/models/overflow.py | 19 +------ 4 files changed, 64 insertions(+), 105 deletions(-) diff --git a/TTS/tts/layers/losses.py b/TTS/tts/layers/losses.py index 5ebed81dda..db62430c9d 100644 --- a/TTS/tts/layers/losses.py +++ b/TTS/tts/layers/losses.py @@ -309,6 +309,24 @@ def forward(self, attn_logprob, in_lens, out_lens): return total_loss +class NLLLoss(nn.Module): + """Negative log likelihood loss.""" + + def forward(self, log_prob: torch.Tensor) -> dict: # pylint: disable=no-self-use + """Compute the loss. + + Args: + logits (Tensor): [B, T, D] + + Returns: + Tensor: [1] + + """ + return_dict = {} + return_dict["loss"] = -log_prob.mean() + return return_dict + + ######################## # MODEL LOSS LAYERS ######################## @@ -619,6 +637,28 @@ def forward( return {"loss": loss, "loss_l1": spec_loss, "loss_ssim": ssim_loss, "loss_dur": dur_loss, "mdn_loss": mdn_loss} +def feature_loss(feats_real, feats_generated): + loss = 0 + for dr, dg in zip(feats_real, feats_generated): + for rl, gl in zip(dr, dg): + rl = rl.float().detach() + gl = gl.float() + loss += torch.mean(torch.abs(rl - gl)) + return loss * 2 + + +def generator_loss(scores_fake): + loss = 0 + gen_losses = [] + for dg in scores_fake: + dg = dg.float() + l = torch.mean((1 - dg) ** 2) + gen_losses.append(l) + loss += l + + return loss, gen_losses + + class VitsGeneratorLoss(nn.Module): def __init__(self, c: Coqpit): super().__init__() @@ -640,28 +680,6 @@ def __init__(self, c: Coqpit): do_amp_to_db=True, ) - @staticmethod - def feature_loss(feats_real, feats_generated): - loss = 0 - for dr, dg in zip(feats_real, feats_generated): - for rl, gl in zip(dr, dg): - rl = rl.float().detach() - gl = gl.float() - loss += torch.mean(torch.abs(rl - gl)) - return loss * 2 - - @staticmethod - def generator_loss(scores_fake): - loss = 0 - gen_losses = [] - for dg in scores_fake: - dg = dg.float() - l = torch.mean((1 - dg) ** 2) - gen_losses.append(l) - loss += l - - return loss, gen_losses - @staticmethod def kl_loss(z_p, logs_q, m_p, logs_p, z_mask): """ @@ -722,10 +740,8 @@ def forward( self.kl_loss(z_p=z_p, logs_q=logs_q, m_p=m_p, logs_p=logs_p, z_mask=z_mask.unsqueeze(1)) * self.kl_loss_alpha ) - loss_feat = ( - self.feature_loss(feats_real=feats_disc_real, feats_generated=feats_disc_fake) * self.feat_loss_alpha - ) - loss_gen = self.generator_loss(scores_fake=scores_disc_fake)[0] * self.gen_loss_alpha + loss_feat = feature_loss(feats_real=feats_disc_real, feats_generated=feats_disc_fake) * self.feat_loss_alpha + loss_gen = generator_loss(scores_fake=scores_disc_fake)[0] * self.gen_loss_alpha loss_mel = torch.nn.functional.l1_loss(mel_slice, mel_slice_hat) * self.mel_loss_alpha loss_duration = torch.sum(loss_duration.float()) * self.dur_loss_alpha loss = loss_kl + loss_feat + loss_mel + loss_gen + loss_duration @@ -779,6 +795,15 @@ def forward(self, scores_disc_real, scores_disc_fake): return return_dict +def _binary_alignment_loss(alignment_hard, alignment_soft): + """Binary loss that forces soft alignments to match the hard alignments. + + Explained in `https://arxiv.org/pdf/2108.10447.pdf`. + """ + log_sum = torch.log(torch.clamp(alignment_soft[alignment_hard == 1], min=1e-12)).sum() + return -log_sum / alignment_hard.sum() + + class ForwardTTSLoss(nn.Module): """Generic configurable ForwardTTS loss.""" @@ -820,14 +845,6 @@ def __init__(self, c): self.dur_loss_alpha = c.dur_loss_alpha self.binary_alignment_loss_alpha = c.binary_align_loss_alpha - @staticmethod - def _binary_alignment_loss(alignment_hard, alignment_soft): - """Binary loss that forces soft alignments to match the hard alignments as - explained in `https://arxiv.org/pdf/2108.10447.pdf`. - """ - log_sum = torch.log(torch.clamp(alignment_soft[alignment_hard == 1], min=1e-12)).sum() - return -log_sum / alignment_hard.sum() - def forward( self, decoder_output, @@ -879,7 +896,7 @@ def forward( return_dict["loss_aligner"] = self.aligner_loss_alpha * aligner_loss if self.binary_alignment_loss_alpha > 0 and alignment_hard is not None: - binary_alignment_loss = self._binary_alignment_loss(alignment_hard, alignment_soft) + binary_alignment_loss = _binary_alignment_loss(alignment_hard, alignment_soft) loss = loss + self.binary_alignment_loss_alpha * binary_alignment_loss if binary_loss_weight: return_dict["loss_binary_alignment"] = ( diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 2f34e4323b..7216e8143a 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -19,7 +19,13 @@ from TTS.tts.datasets.dataset import F0Dataset, TTSDataset, _parse_sample from TTS.tts.layers.delightful_tts.acoustic_model import AcousticModel -from TTS.tts.layers.losses import ForwardSumLoss, VitsDiscriminatorLoss +from TTS.tts.layers.losses import ( + ForwardSumLoss, + VitsDiscriminatorLoss, + _binary_alignment_loss, + feature_loss, + generator_loss, +) from TTS.tts.layers.vits.discriminator import VitsDiscriminator from TTS.tts.models.base_tts import BaseTTSE2E from TTS.tts.models.vits import load_audio @@ -1491,36 +1497,6 @@ def __init__(self, config): self.gen_loss_alpha = config.gen_loss_alpha self.multi_scale_stft_loss_alpha = config.multi_scale_stft_loss_alpha - @staticmethod - def _binary_alignment_loss(alignment_hard, alignment_soft): - """Binary loss that forces soft alignments to match the hard alignments as - explained in `https://arxiv.org/pdf/2108.10447.pdf`. - """ - log_sum = torch.log(torch.clamp(alignment_soft[alignment_hard == 1], min=1e-12)).sum() - return -log_sum / alignment_hard.sum() - - @staticmethod - def feature_loss(feats_real, feats_generated): - loss = 0 - for dr, dg in zip(feats_real, feats_generated): - for rl, gl in zip(dr, dg): - rl = rl.float().detach() - gl = gl.float() - loss += torch.mean(torch.abs(rl - gl)) - return loss * 2 - - @staticmethod - def generator_loss(scores_fake): - loss = 0 - gen_losses = [] - for dg in scores_fake: - dg = dg.float() - l = torch.mean((1 - dg) ** 2) - gen_losses.append(l) - loss += l - - return loss, gen_losses - def forward( self, mel_output, @@ -1618,7 +1594,7 @@ def forward( ) if self.binary_alignment_loss_alpha > 0 and aligner_hard is not None: - binary_alignment_loss = self._binary_alignment_loss(aligner_hard, aligner_soft) + binary_alignment_loss = _binary_alignment_loss(aligner_hard, aligner_soft) total_loss = total_loss + self.binary_alignment_loss_alpha * binary_alignment_loss * binary_loss_weight if binary_loss_weight: loss_dict["loss_binary_alignment"] = ( @@ -1638,8 +1614,8 @@ def forward( # vocoder losses if not skip_disc: - loss_feat = self.feature_loss(feats_real=feats_real, feats_generated=feats_fake) * self.feat_loss_alpha - loss_gen = self.generator_loss(scores_fake=scores_fake)[0] * self.gen_loss_alpha + loss_feat = feature_loss(feats_real=feats_real, feats_generated=feats_fake) * self.feat_loss_alpha + loss_gen = generator_loss(scores_fake=scores_fake)[0] * self.gen_loss_alpha loss_dict["vocoder_loss_feat"] = loss_feat loss_dict["vocoder_loss_gen"] = loss_gen loss_dict["loss"] = loss_dict["loss"] + loss_feat + loss_gen diff --git a/TTS/tts/models/neuralhmm_tts.py b/TTS/tts/models/neuralhmm_tts.py index de5401aac7..0b3fadafbf 100644 --- a/TTS/tts/models/neuralhmm_tts.py +++ b/TTS/tts/models/neuralhmm_tts.py @@ -8,6 +8,7 @@ from trainer.io import load_fsspec from trainer.logging.tensorboard_logger import TensorboardLogger +from TTS.tts.layers.losses import NLLLoss from TTS.tts.layers.overflow.common_layers import Encoder, OverflowUtils from TTS.tts.layers.overflow.neural_hmm import NeuralHMM from TTS.tts.layers.overflow.plotting_utils import ( @@ -373,21 +374,3 @@ def test_log( ) -> None: logger.test_audios(steps, outputs[1], self.ap.sample_rate) logger.test_figures(steps, outputs[0]) - - -class NLLLoss(nn.Module): - """Negative log likelihood loss.""" - - def forward(self, log_prob: torch.Tensor) -> dict: # pylint: disable=no-self-use - """Compute the loss. - - Args: - logits (Tensor): [B, T, D] - - Returns: - Tensor: [1] - - """ - return_dict = {} - return_dict["loss"] = -log_prob.mean() - return return_dict diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index b72f4877cf..ac09e406ad 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -8,6 +8,7 @@ from trainer.io import load_fsspec from trainer.logging.tensorboard_logger import TensorboardLogger +from TTS.tts.layers.losses import NLLLoss from TTS.tts.layers.overflow.common_layers import Encoder, OverflowUtils from TTS.tts.layers.overflow.decoder import Decoder from TTS.tts.layers.overflow.neural_hmm import NeuralHMM @@ -389,21 +390,3 @@ def test_log( ) -> None: logger.test_audios(steps, outputs[1], self.ap.sample_rate) logger.test_figures(steps, outputs[0]) - - -class NLLLoss(nn.Module): - """Negative log likelihood loss.""" - - def forward(self, log_prob: torch.Tensor) -> dict: # pylint: disable=no-self-use - """Compute the loss. - - Args: - logits (Tensor): [B, T, D] - - Returns: - Tensor: [1] - - """ - return_dict = {} - return_dict["loss"] = -log_prob.mean() - return return_dict From 2e5f68df6a72cca1f25867cca3d38f585be14df6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 01:16:42 +0100 Subject: [PATCH 198/255] refactor(wavernn): remove duplicate Stretch2d I checked that the implementations are the same --- TTS/vocoder/models/wavernn.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/TTS/vocoder/models/wavernn.py b/TTS/vocoder/models/wavernn.py index 723f18dde2..1847679890 100644 --- a/TTS/vocoder/models/wavernn.py +++ b/TTS/vocoder/models/wavernn.py @@ -17,6 +17,7 @@ from TTS.utils.audio.numpy_transforms import mulaw_decode from TTS.vocoder.datasets.wavernn_dataset import WaveRNNDataset from TTS.vocoder.layers.losses import WaveRNNLoss +from TTS.vocoder.layers.upsample import Stretch2d from TTS.vocoder.models.base_vocoder import BaseVocoder from TTS.vocoder.utils.distribution import sample_from_discretized_mix_logistic, sample_from_gaussian @@ -66,19 +67,6 @@ def forward(self, x): return x -class Stretch2d(nn.Module): - def __init__(self, x_scale, y_scale): - super().__init__() - self.x_scale = x_scale - self.y_scale = y_scale - - def forward(self, x): - b, c, h, w = x.size() - x = x.unsqueeze(-1).unsqueeze(3) - x = x.repeat(1, 1, 1, self.y_scale, 1, self.x_scale) - return x.view(b, c, h * self.y_scale, w * self.x_scale) - - class UpsampleNetwork(nn.Module): def __init__( self, From 69a599d403eb44140f8a640241faee4c551fda00 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 12:12:50 +0100 Subject: [PATCH 199/255] refactor(freevc): remove duplicate code --- TTS/tts/layers/vits/discriminator.py | 4 +- TTS/vc/models/freevc.py | 75 ++-------------------------- TTS/vc/modules/freevc/commons.py | 28 +---------- TTS/vc/modules/freevc/modules.py | 4 +- 4 files changed, 8 insertions(+), 103 deletions(-) diff --git a/TTS/tts/layers/vits/discriminator.py b/TTS/tts/layers/vits/discriminator.py index 3449739fdc..49f7a0d074 100644 --- a/TTS/tts/layers/vits/discriminator.py +++ b/TTS/tts/layers/vits/discriminator.py @@ -2,7 +2,7 @@ from torch import nn from torch.nn.modules.conv import Conv1d -from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP +from TTS.vocoder.models.hifigan_discriminator import LRELU_SLOPE, DiscriminatorP class DiscriminatorS(torch.nn.Module): @@ -39,7 +39,7 @@ def forward(self, x): feat = [] for l in self.convs: x = l(x) - x = torch.nn.functional.leaky_relu(x, 0.1) + x = torch.nn.functional.leaky_relu(x, LRELU_SLOPE) feat.append(x) x = self.conv_post(x) feat.append(x) diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index e5cfdc1e61..62559de534 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -6,15 +6,15 @@ import torch from coqpit import Coqpit from torch import nn -from torch.nn import Conv1d, Conv2d, ConvTranspose1d +from torch.nn import Conv1d, ConvTranspose1d from torch.nn import functional as F -from torch.nn.utils import spectral_norm from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations from trainer.io import load_fsspec import TTS.vc.modules.freevc.commons as commons import TTS.vc.modules.freevc.modules as modules +from TTS.tts.layers.vits.discriminator import DiscriminatorS from TTS.tts.utils.helpers import sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.vc.configs.freevc_config import FreeVCConfig @@ -23,7 +23,7 @@ from TTS.vc.modules.freevc.mel_processing import mel_spectrogram_torch from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx from TTS.vc.modules.freevc.wavlm import get_wavlm -from TTS.vocoder.models.hifigan_generator import get_padding +from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP logger = logging.getLogger(__name__) @@ -164,75 +164,6 @@ def remove_weight_norm(self): remove_parametrizations(l, "weight") -class DiscriminatorP(torch.nn.Module): - def __init__(self, period, kernel_size=5, stride=3, use_spectral_norm=False): - super(DiscriminatorP, self).__init__() - self.period = period - self.use_spectral_norm = use_spectral_norm - norm_f = weight_norm if use_spectral_norm is False else spectral_norm - self.convs = nn.ModuleList( - [ - norm_f(Conv2d(1, 32, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), - norm_f(Conv2d(32, 128, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), - norm_f(Conv2d(128, 512, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), - norm_f(Conv2d(512, 1024, (kernel_size, 1), (stride, 1), padding=(get_padding(kernel_size, 1), 0))), - norm_f(Conv2d(1024, 1024, (kernel_size, 1), 1, padding=(get_padding(kernel_size, 1), 0))), - ] - ) - self.conv_post = norm_f(Conv2d(1024, 1, (3, 1), 1, padding=(1, 0))) - - def forward(self, x): - fmap = [] - - # 1d to 2d - b, c, t = x.shape - if t % self.period != 0: # pad first - n_pad = self.period - (t % self.period) - x = F.pad(x, (0, n_pad), "reflect") - t = t + n_pad - x = x.view(b, c, t // self.period, self.period) - - for l in self.convs: - x = l(x) - x = F.leaky_relu(x, modules.LRELU_SLOPE) - fmap.append(x) - x = self.conv_post(x) - fmap.append(x) - x = torch.flatten(x, 1, -1) - - return x, fmap - - -class DiscriminatorS(torch.nn.Module): - def __init__(self, use_spectral_norm=False): - super(DiscriminatorS, self).__init__() - norm_f = weight_norm if use_spectral_norm is False else spectral_norm - self.convs = nn.ModuleList( - [ - norm_f(Conv1d(1, 16, 15, 1, padding=7)), - norm_f(Conv1d(16, 64, 41, 4, groups=4, padding=20)), - norm_f(Conv1d(64, 256, 41, 4, groups=16, padding=20)), - norm_f(Conv1d(256, 1024, 41, 4, groups=64, padding=20)), - norm_f(Conv1d(1024, 1024, 41, 4, groups=256, padding=20)), - norm_f(Conv1d(1024, 1024, 5, 1, padding=2)), - ] - ) - self.conv_post = norm_f(Conv1d(1024, 1, 3, 1, padding=1)) - - def forward(self, x): - fmap = [] - - for l in self.convs: - x = l(x) - x = F.leaky_relu(x, modules.LRELU_SLOPE) - fmap.append(x) - x = self.conv_post(x) - fmap.append(x) - x = torch.flatten(x, 1, -1) - - return x, fmap - - class MultiPeriodDiscriminator(torch.nn.Module): def __init__(self, use_spectral_norm=False): super(MultiPeriodDiscriminator, self).__init__() diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/modules/freevc/commons.py index feea7f34dc..49889e4816 100644 --- a/TTS/vc/modules/freevc/commons.py +++ b/TTS/vc/modules/freevc/commons.py @@ -3,7 +3,7 @@ import torch from torch.nn import functional as F -from TTS.tts.utils.helpers import convert_pad_shape, sequence_mask +from TTS.tts.utils.helpers import convert_pad_shape def init_weights(m: torch.nn.Module, mean: float = 0.0, std: float = 0.01) -> None: @@ -96,37 +96,11 @@ def subsequent_mask(length): return mask -@torch.jit.script -def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): - n_channels_int = n_channels[0] - in_act = input_a + input_b - t_act = torch.tanh(in_act[:, :n_channels_int, :]) - s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) - acts = t_act * s_act - return acts - - def shift_1d(x): x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] return x -def generate_path(duration, mask): - """ - duration: [b, 1, t_x] - mask: [b, 1, t_y, t_x] - """ - b, _, t_y, t_x = mask.shape - cum_duration = torch.cumsum(duration, -1) - - cum_duration_flat = cum_duration.view(b * t_x) - path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) - path = path.view(b, t_x, t_y) - path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] - path = path.unsqueeze(1).transpose(2, 3) * mask - return path - - def clip_grad_value_(parameters, clip_value, norm_type=2): if isinstance(parameters, torch.Tensor): parameters = [parameters] diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/modules/freevc/modules.py index 722444a303..ea17be24d6 100644 --- a/TTS/vc/modules/freevc/modules.py +++ b/TTS/vc/modules/freevc/modules.py @@ -5,8 +5,8 @@ from torch.nn.utils.parametrizations import weight_norm from torch.nn.utils.parametrize import remove_parametrizations -import TTS.vc.modules.freevc.commons as commons from TTS.tts.layers.generic.normalization import LayerNorm2 +from TTS.tts.layers.generic.wavenet import fused_add_tanh_sigmoid_multiply from TTS.vc.modules.freevc.commons import init_weights from TTS.vocoder.models.hifigan_generator import get_padding @@ -99,7 +99,7 @@ def forward(self, x, x_mask, g=None, **kwargs): else: g_l = torch.zeros_like(x_in) - acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + acts = fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) acts = self.drop(acts) res_skip_acts = self.res_skip_layers[i](acts) From 6ecf47312c1cbe30dd47bd70c8ae30dbc9d2d407 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 15:38:35 +0100 Subject: [PATCH 200/255] refactor(xtts): use tortoise conditioning encoder --- TTS/tts/layers/tortoise/autoregressive.py | 12 +++++------ TTS/tts/layers/xtts/gpt.py | 16 ++------------- TTS/tts/layers/xtts/latent_encoder.py | 25 ----------------------- TTS/tts/models/xtts.py | 19 ----------------- 4 files changed, 7 insertions(+), 65 deletions(-) diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py index 19c1adc0a6..07cf3d542b 100644 --- a/TTS/tts/layers/tortoise/autoregressive.py +++ b/TTS/tts/layers/tortoise/autoregressive.py @@ -176,7 +176,6 @@ def __init__( embedding_dim, attn_blocks=6, num_attn_heads=4, - mean=False, ): super().__init__() attn = [] @@ -185,15 +184,14 @@ def __init__( attn.append(AttentionBlock(embedding_dim, num_attn_heads)) self.attn = nn.Sequential(*attn) self.dim = embedding_dim - self.mean = mean def forward(self, x): + """ + x: (b, 80, s) + """ h = self.init(x) h = self.attn(h) - if self.mean: - return h.mean(dim=2) - else: - return h[:, :, 0] + return h class LearnedPositionEmbeddings(nn.Module): @@ -473,7 +471,7 @@ def get_conditioning(self, speech_conditioning_input): ) conds = [] for j in range(speech_conditioning_input.shape[1]): - conds.append(self.conditioning_encoder(speech_conditioning_input[:, j])) + conds.append(self.conditioning_encoder(speech_conditioning_input[:, j])[:, :, 0]) conds = torch.stack(conds, dim=1) conds = conds.mean(dim=1) return conds diff --git a/TTS/tts/layers/xtts/gpt.py b/TTS/tts/layers/xtts/gpt.py index 899522e091..20eff26ecc 100644 --- a/TTS/tts/layers/xtts/gpt.py +++ b/TTS/tts/layers/xtts/gpt.py @@ -8,12 +8,12 @@ from transformers import GPT2Config from TTS.tts.layers.tortoise.autoregressive import ( + ConditioningEncoder, LearnedPositionEmbeddings, _prepare_attention_mask_for_generation, build_hf_gpt_transformer, ) from TTS.tts.layers.xtts.gpt_inference import GPT2InferenceModel -from TTS.tts.layers.xtts.latent_encoder import ConditioningEncoder from TTS.tts.layers.xtts.perceiver_encoder import PerceiverResampler @@ -235,19 +235,6 @@ def get_logits( else: return first_logits - def get_conditioning(self, speech_conditioning_input): - speech_conditioning_input = ( - speech_conditioning_input.unsqueeze(1) - if len(speech_conditioning_input.shape) == 3 - else speech_conditioning_input - ) - conds = [] - for j in range(speech_conditioning_input.shape[1]): - conds.append(self.conditioning_encoder(speech_conditioning_input[:, j])) - conds = torch.stack(conds, dim=1) - conds = conds.mean(dim=1) - return conds - def get_prompts(self, prompt_codes): """ Create a prompt from the mel codes. This is used to condition the model on the mel codes. @@ -286,6 +273,7 @@ def get_style_emb(self, cond_input, return_latent=False): """ cond_input: (b, 80, s) or (b, 1, 80, s) conds: (b, 1024, s) + output: (b, 1024, 32) """ conds = None if not return_latent: diff --git a/TTS/tts/layers/xtts/latent_encoder.py b/TTS/tts/layers/xtts/latent_encoder.py index 7d385ec46a..6becffb8b7 100644 --- a/TTS/tts/layers/xtts/latent_encoder.py +++ b/TTS/tts/layers/xtts/latent_encoder.py @@ -93,28 +93,3 @@ def forward(self, x, mask=None, qk_bias=0): h = self.proj_out(h) xp = self.x_proj(x) return (xp + h).reshape(b, xp.shape[1], *spatial) - - -class ConditioningEncoder(nn.Module): - def __init__( - self, - spec_dim, - embedding_dim, - attn_blocks=6, - num_attn_heads=4, - ): - super().__init__() - attn = [] - self.init = nn.Conv1d(spec_dim, embedding_dim, kernel_size=1) - for a in range(attn_blocks): - attn.append(AttentionBlock(embedding_dim, num_attn_heads)) - self.attn = nn.Sequential(*attn) - self.dim = embedding_dim - - def forward(self, x): - """ - x: (b, 80, s) - """ - h = self.init(x) - h = self.attn(h) - return h diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 22d2720efa..35de91e359 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -93,25 +93,6 @@ def load_audio(audiopath, sampling_rate): return audio -def pad_or_truncate(t, length): - """ - Ensure a given tensor t has a specified sequence length by either padding it with zeros or clipping it. - - Args: - t (torch.Tensor): The input tensor to be padded or truncated. - length (int): The desired length of the tensor. - - Returns: - torch.Tensor: The padded or truncated tensor. - """ - tp = t[..., :length] - if t.shape[-1] == length: - tp = t - elif t.shape[-1] < length: - tp = F.pad(t, (0, length - t.shape[-1])) - return tp - - @dataclass class XttsAudioConfig(Coqpit): """ From 0f69d31f705dd28c132ea86f4dc1ab47d1d9efb0 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 17:28:30 +0100 Subject: [PATCH 201/255] refactor(vocoder): remove duplicate function --- TTS/vocoder/models/parallel_wavegan_generator.py | 16 ++++++++-------- TTS/vocoder/models/univnet_generator.py | 10 ++-------- 2 files changed, 10 insertions(+), 16 deletions(-) diff --git a/TTS/vocoder/models/parallel_wavegan_generator.py b/TTS/vocoder/models/parallel_wavegan_generator.py index 6a4d4ca6e7..e60c8781f0 100644 --- a/TTS/vocoder/models/parallel_wavegan_generator.py +++ b/TTS/vocoder/models/parallel_wavegan_generator.py @@ -12,6 +12,13 @@ logger = logging.getLogger(__name__) +def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): + assert layers % stacks == 0 + layers_per_cycle = layers // stacks + dilations = [dilation(i % layers_per_cycle) for i in range(layers)] + return (kernel_size - 1) * sum(dilations) + 1 + + class ParallelWaveganGenerator(torch.nn.Module): """PWGAN generator as in https://arxiv.org/pdf/1910.11480.pdf. It is similar to WaveNet with no causal convolution. @@ -144,16 +151,9 @@ def _apply_weight_norm(m): self.apply(_apply_weight_norm) - @staticmethod - def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): - assert layers % stacks == 0 - layers_per_cycle = layers // stacks - dilations = [dilation(i % layers_per_cycle) for i in range(layers)] - return (kernel_size - 1) * sum(dilations) + 1 - @property def receptive_field_size(self): - return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) + return _get_receptive_field_size(self.layers, self.stacks, self.kernel_size) def load_checkpoint( self, config, checkpoint_path, eval=False, cache=False diff --git a/TTS/vocoder/models/univnet_generator.py b/TTS/vocoder/models/univnet_generator.py index 72e57a9c39..5d1f817927 100644 --- a/TTS/vocoder/models/univnet_generator.py +++ b/TTS/vocoder/models/univnet_generator.py @@ -7,6 +7,7 @@ from torch.nn.utils import parametrize from TTS.vocoder.layers.lvc_block import LVCBlock +from TTS.vocoder.models.parallel_wavegan_generator import _get_receptive_field_size logger = logging.getLogger(__name__) @@ -133,17 +134,10 @@ def _apply_weight_norm(m): self.apply(_apply_weight_norm) - @staticmethod - def _get_receptive_field_size(layers, stacks, kernel_size, dilation=lambda x: 2**x): - assert layers % stacks == 0 - layers_per_cycle = layers // stacks - dilations = [dilation(i % layers_per_cycle) for i in range(layers)] - return (kernel_size - 1) * sum(dilations) + 1 - @property def receptive_field_size(self): """Return receptive field size.""" - return self._get_receptive_field_size(self.layers, self.stacks, self.kernel_size) + return _get_receptive_field_size(self.layers, self.stacks, self.kernel_size) @torch.no_grad() def inference(self, c): From fa844e0fb7ea84a26cea1fc5ae3b3c3b7f811f55 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 21:35:26 +0100 Subject: [PATCH 202/255] refactor(tacotron): remove duplicate function --- TTS/tts/layers/tacotron/capacitron_layers.py | 11 +++-------- TTS/tts/layers/tacotron/common_layers.py | 7 +++++++ TTS/tts/layers/tacotron/gst_layers.py | 11 +++-------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/TTS/tts/layers/tacotron/capacitron_layers.py b/TTS/tts/layers/tacotron/capacitron_layers.py index 2181ffa7ec..817f42771b 100644 --- a/TTS/tts/layers/tacotron/capacitron_layers.py +++ b/TTS/tts/layers/tacotron/capacitron_layers.py @@ -3,6 +3,8 @@ from torch.distributions.multivariate_normal import MultivariateNormal as MVN from torch.nn import functional as F +from TTS.tts.layers.tacotron.common_layers import calculate_post_conv_height + class CapacitronVAE(nn.Module): """Effective Use of Variational Embedding Capacity for prosody transfer. @@ -97,7 +99,7 @@ def __init__(self, num_mel, out_dim): self.training = False self.bns = nn.ModuleList([nn.BatchNorm2d(num_features=filter_size) for filter_size in filters[1:]]) - post_conv_height = self.calculate_post_conv_height(num_mel, 3, 2, 2, num_layers) + post_conv_height = calculate_post_conv_height(num_mel, 3, 2, 2, num_layers) self.recurrence = nn.LSTM( input_size=filters[-1] * post_conv_height, hidden_size=out_dim, batch_first=True, bidirectional=False ) @@ -155,13 +157,6 @@ def forward(self, inputs, input_lengths): return last_output.to(inputs.device) # [B, 128] - @staticmethod - def calculate_post_conv_height(height, kernel_size, stride, pad, n_convs): - """Height of spec after n convolutions with fixed kernel/stride/pad.""" - for _ in range(n_convs): - height = (height - kernel_size + 2 * pad) // stride + 1 - return height - class TextSummary(nn.Module): def __init__(self, embedding_dim, encoder_output_dim): diff --git a/TTS/tts/layers/tacotron/common_layers.py b/TTS/tts/layers/tacotron/common_layers.py index f78ff1e75f..16e517fdca 100644 --- a/TTS/tts/layers/tacotron/common_layers.py +++ b/TTS/tts/layers/tacotron/common_layers.py @@ -3,6 +3,13 @@ from torch.nn import functional as F +def calculate_post_conv_height(height: int, kernel_size: int, stride: int, pad: int, n_convs: int) -> int: + """Height of spec after n convolutions with fixed kernel/stride/pad.""" + for _ in range(n_convs): + height = (height - kernel_size + 2 * pad) // stride + 1 + return height + + class Linear(nn.Module): """Linear layer with a specific initialization. diff --git a/TTS/tts/layers/tacotron/gst_layers.py b/TTS/tts/layers/tacotron/gst_layers.py index ac3d7d4aae..4a83fb1c83 100644 --- a/TTS/tts/layers/tacotron/gst_layers.py +++ b/TTS/tts/layers/tacotron/gst_layers.py @@ -2,6 +2,8 @@ import torch.nn.functional as F from torch import nn +from TTS.tts.layers.tacotron.common_layers import calculate_post_conv_height + class GST(nn.Module): """Global Style Token Module for factorizing prosody in speech. @@ -44,7 +46,7 @@ def __init__(self, num_mel, embedding_dim): self.convs = nn.ModuleList(convs) self.bns = nn.ModuleList([nn.BatchNorm2d(num_features=filter_size) for filter_size in filters[1:]]) - post_conv_height = self.calculate_post_conv_height(num_mel, 3, 2, 1, num_layers) + post_conv_height = calculate_post_conv_height(num_mel, 3, 2, 1, num_layers) self.recurrence = nn.GRU( input_size=filters[-1] * post_conv_height, hidden_size=embedding_dim // 2, batch_first=True ) @@ -71,13 +73,6 @@ def forward(self, inputs): return out.squeeze(0) - @staticmethod - def calculate_post_conv_height(height, kernel_size, stride, pad, n_convs): - """Height of spec after n convolutions with fixed kernel/stride/pad.""" - for _ in range(n_convs): - height = (height - kernel_size + 2 * pad) // stride + 1 - return height - class StyleTokenLayer(nn.Module): """NN Module attending to style tokens based on prosody encodings.""" From b45a7a4220e21eea4825d24ba4498afb37591c64 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 22:02:26 +0100 Subject: [PATCH 203/255] refactor: move exists() and default() into generic_utils --- TTS/tts/layers/bark/hubert/kmeans_hubert.py | 10 ++-------- TTS/tts/layers/tortoise/clvp.py | 4 ---- TTS/tts/layers/tortoise/transformer.py | 17 +++++++--------- TTS/tts/layers/tortoise/xtransformers.py | 22 +++------------------ TTS/tts/layers/xtts/dvae.py | 4 ---- TTS/tts/layers/xtts/perceiver_encoder.py | 11 +---------- TTS/utils/generic_utils.py | 15 +++++++++++++- 7 files changed, 27 insertions(+), 56 deletions(-) diff --git a/TTS/tts/layers/bark/hubert/kmeans_hubert.py b/TTS/tts/layers/bark/hubert/kmeans_hubert.py index 58a614cb87..ade84794eb 100644 --- a/TTS/tts/layers/bark/hubert/kmeans_hubert.py +++ b/TTS/tts/layers/bark/hubert/kmeans_hubert.py @@ -14,6 +14,8 @@ from torchaudio.functional import resample from transformers import HubertModel +from TTS.utils.generic_utils import exists + def round_down_nearest_multiple(num, divisor): return num // divisor * divisor @@ -26,14 +28,6 @@ def curtail_to_multiple(t, mult, from_left=False): return t[..., seq_slice] -def exists(val): - return val is not None - - -def default(val, d): - return val if exists(val) else d - - class CustomHubert(nn.Module): """ checkpoint and kmeans can be downloaded at https://github.com/facebookresearch/fairseq/tree/main/examples/hubert diff --git a/TTS/tts/layers/tortoise/clvp.py b/TTS/tts/layers/tortoise/clvp.py index 241dfdd4f4..44da1324e7 100644 --- a/TTS/tts/layers/tortoise/clvp.py +++ b/TTS/tts/layers/tortoise/clvp.py @@ -8,10 +8,6 @@ from TTS.tts.layers.tortoise.xtransformers import Encoder -def exists(val): - return val is not None - - def masked_mean(t, mask, dim=1): t = t.masked_fill(~mask[:, :, None], 0.0) return t.sum(dim=1) / mask.sum(dim=1)[..., None] diff --git a/TTS/tts/layers/tortoise/transformer.py b/TTS/tts/layers/tortoise/transformer.py index 6cb1bab96a..ed4d79d4ab 100644 --- a/TTS/tts/layers/tortoise/transformer.py +++ b/TTS/tts/layers/tortoise/transformer.py @@ -1,22 +1,19 @@ +from typing import TypeVar, Union + import torch import torch.nn.functional as F from einops import rearrange from torch import nn -# helpers - +from TTS.utils.generic_utils import exists -def exists(val): - return val is not None - - -def default(val, d): - return val if exists(val) else d +# helpers +_T = TypeVar("_T") -def cast_tuple(val, depth=1): +def cast_tuple(val: Union[tuple[_T], list[_T], _T], depth: int = 1) -> tuple[_T]: if isinstance(val, list): - val = tuple(val) + return tuple(val) return val if isinstance(val, tuple) else (val,) * depth diff --git a/TTS/tts/layers/tortoise/xtransformers.py b/TTS/tts/layers/tortoise/xtransformers.py index 9325b8c720..0892fee19d 100644 --- a/TTS/tts/layers/tortoise/xtransformers.py +++ b/TTS/tts/layers/tortoise/xtransformers.py @@ -1,13 +1,15 @@ import math from collections import namedtuple from functools import partial -from inspect import isfunction import torch import torch.nn.functional as F from einops import rearrange, repeat from torch import einsum, nn +from TTS.tts.layers.tortoise.transformer import cast_tuple, max_neg_value +from TTS.utils.generic_utils import default, exists + DEFAULT_DIM_HEAD = 64 Intermediates = namedtuple("Intermediates", ["pre_softmax_attn", "post_softmax_attn"]) @@ -25,20 +27,6 @@ # helpers -def exists(val): - return val is not None - - -def default(val, d): - if exists(val): - return val - return d() if isfunction(d) else d - - -def cast_tuple(val, depth): - return val if isinstance(val, tuple) else (val,) * depth - - class always: def __init__(self, val): self.val = val @@ -63,10 +51,6 @@ def __call__(self, x, *args, **kwargs): return x == self.val -def max_neg_value(tensor): - return -torch.finfo(tensor.dtype).max - - def l2norm(t): return F.normalize(t, p=2, dim=-1) diff --git a/TTS/tts/layers/xtts/dvae.py b/TTS/tts/layers/xtts/dvae.py index 73970fb0bf..4f806f82cb 100644 --- a/TTS/tts/layers/xtts/dvae.py +++ b/TTS/tts/layers/xtts/dvae.py @@ -14,10 +14,6 @@ logger = logging.getLogger(__name__) -def default(val, d): - return val if val is not None else d - - def eval_decorator(fn): def inner(model, *args, **kwargs): was_training = model.training diff --git a/TTS/tts/layers/xtts/perceiver_encoder.py b/TTS/tts/layers/xtts/perceiver_encoder.py index 4b42a0e467..7477087283 100644 --- a/TTS/tts/layers/xtts/perceiver_encoder.py +++ b/TTS/tts/layers/xtts/perceiver_encoder.py @@ -10,10 +10,7 @@ from torch import einsum, nn from TTS.tts.layers.tortoise.transformer import GEGLU - - -def exists(val): - return val is not None +from TTS.utils.generic_utils import default, exists def once(fn): @@ -153,12 +150,6 @@ def Sequential(*mods): return nn.Sequential(*filter(exists, mods)) -def default(val, d): - if exists(val): - return val - return d() if callable(d) else d - - class RMSNorm(nn.Module): def __init__(self, dim, scale=True, dim_cond=None): super().__init__() diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index c38282248d..087ae7d0e1 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -4,13 +4,26 @@ import logging import re from pathlib import Path -from typing import Dict, Optional +from typing import Callable, Dict, Optional, TypeVar, Union import torch from packaging.version import Version +from typing_extensions import TypeIs logger = logging.getLogger(__name__) +_T = TypeVar("_T") + + +def exists(val: Union[_T, None]) -> TypeIs[_T]: + return val is not None + + +def default(val: Union[_T, None], d: Union[_T, Callable[[], _T]]) -> _T: + if exists(val): + return val + return d() if callable(d) else d + def to_camel(text): text = text.capitalize() From 54f4228a466bf3042e7d6d56b00767559d66b942 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 22:08:05 +0100 Subject: [PATCH 204/255] refactor(xtts): use existing cleaners --- TTS/tts/layers/xtts/tokenizer.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index e87eb0766b..076727239c 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -15,6 +15,7 @@ from tokenizers import Tokenizer from TTS.tts.layers.xtts.zh_num2words import TextNorm as zh_num2words +from TTS.tts.utils.text.cleaners import collapse_whitespace, lowercase logger = logging.getLogger(__name__) @@ -72,8 +73,6 @@ def split_sentence(text, lang, text_split_length=250): return text_splits -_whitespace_re = re.compile(r"\s+") - # List of (regular expression, replacement) pairs for abbreviations: _abbreviations = { "en": [ @@ -564,14 +563,6 @@ def expand_numbers_multilingual(text, lang="en"): return text -def lowercase(text): - return text.lower() - - -def collapse_whitespace(text): - return re.sub(_whitespace_re, " ", text) - - def multilingual_cleaners(text, lang): text = text.replace('"', "") if lang == "tr": @@ -586,13 +577,6 @@ def multilingual_cleaners(text, lang): return text -def basic_cleaners(text): - """Basic pipeline that lowercases and collapses whitespace without transliteration.""" - text = lowercase(text) - text = collapse_whitespace(text) - return text - - def chinese_transliterate(text): try: import pypinyin From b1ac884e077e243015c59421c8385e55a61d0899 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 22:33:25 +0100 Subject: [PATCH 205/255] refactor: move shared function into dataset.py --- TTS/tts/datasets/dataset.py | 25 +++++++++++++++++++++++++ TTS/tts/models/delightful_tts.py | 21 +-------------------- TTS/tts/models/vits.py | 26 +------------------------- 3 files changed, 27 insertions(+), 45 deletions(-) diff --git a/TTS/tts/datasets/dataset.py b/TTS/tts/datasets/dataset.py index 37e3a1779d..5f629f32a9 100644 --- a/TTS/tts/datasets/dataset.py +++ b/TTS/tts/datasets/dataset.py @@ -63,6 +63,31 @@ def get_audio_size(audiopath: Union[str, os.PathLike[Any]]) -> int: raise RuntimeError(msg) from e +def get_attribute_balancer_weights(items: list, attr_name: str, multi_dict: Optional[dict] = None): + """Create inverse frequency weights for balancing the dataset. + + Use `multi_dict` to scale relative weights.""" + attr_names_samples = np.array([item[attr_name] for item in items]) + unique_attr_names = np.unique(attr_names_samples).tolist() + attr_idx = [unique_attr_names.index(l) for l in attr_names_samples] + attr_count = np.array([len(np.where(attr_names_samples == l)[0]) for l in unique_attr_names]) + weight_attr = 1.0 / attr_count + dataset_samples_weight = np.array([weight_attr[l] for l in attr_idx]) + dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) + if multi_dict is not None: + # check if all keys are in the multi_dict + for k in multi_dict: + assert k in unique_attr_names, f"{k} not in {unique_attr_names}" + # scale weights + multiplier_samples = np.array([multi_dict.get(item[attr_name], 1.0) for item in items]) + dataset_samples_weight *= multiplier_samples + return ( + torch.from_numpy(dataset_samples_weight).float(), + unique_attr_names, + np.unique(dataset_samples_weight).tolist(), + ) + + class TTSDataset(Dataset): def __init__( self, diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 7216e8143a..8857004725 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -17,7 +17,7 @@ from trainer.torch import DistributedSampler, DistributedSamplerWrapper from trainer.trainer_utils import get_optimizer, get_scheduler -from TTS.tts.datasets.dataset import F0Dataset, TTSDataset, _parse_sample +from TTS.tts.datasets.dataset import F0Dataset, TTSDataset, _parse_sample, get_attribute_balancer_weights from TTS.tts.layers.delightful_tts.acoustic_model import AcousticModel from TTS.tts.layers.losses import ( ForwardSumLoss, @@ -194,25 +194,6 @@ def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fm ############################## -def get_attribute_balancer_weights(items: list, attr_name: str, multi_dict: dict = None): - """Create balancer weight for torch WeightedSampler""" - attr_names_samples = np.array([item[attr_name] for item in items]) - unique_attr_names = np.unique(attr_names_samples).tolist() - attr_idx = [unique_attr_names.index(l) for l in attr_names_samples] - attr_count = np.array([len(np.where(attr_names_samples == l)[0]) for l in unique_attr_names]) - weight_attr = 1.0 / attr_count - dataset_samples_weight = np.array([weight_attr[l] for l in attr_idx]) - dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) - if multi_dict is not None: - multiplier_samples = np.array([multi_dict.get(item[attr_name], 1.0) for item in items]) - dataset_samples_weight *= multiplier_samples - return ( - torch.from_numpy(dataset_samples_weight).float(), - unique_attr_names, - np.unique(dataset_samples_weight).tolist(), - ) - - class ForwardTTSE2eF0Dataset(F0Dataset): """Override F0Dataset to avoid slow computing of pitches""" diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index aea0f4e4f8..30d9caff02 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -21,7 +21,7 @@ from trainer.trainer_utils import get_optimizer, get_scheduler from TTS.tts.configs.shared_configs import CharactersConfig -from TTS.tts.datasets.dataset import TTSDataset, _parse_sample +from TTS.tts.datasets.dataset import TTSDataset, _parse_sample, get_attribute_balancer_weights from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor from TTS.tts.layers.vits.discriminator import VitsDiscriminator from TTS.tts.layers.vits.networks import PosteriorEncoder, ResidualCouplingBlocks, TextEncoder @@ -219,30 +219,6 @@ class VitsAudioConfig(Coqpit): ############################## -def get_attribute_balancer_weights(items: list, attr_name: str, multi_dict: dict = None): - """Create inverse frequency weights for balancing the dataset. - Use `multi_dict` to scale relative weights.""" - attr_names_samples = np.array([item[attr_name] for item in items]) - unique_attr_names = np.unique(attr_names_samples).tolist() - attr_idx = [unique_attr_names.index(l) for l in attr_names_samples] - attr_count = np.array([len(np.where(attr_names_samples == l)[0]) for l in unique_attr_names]) - weight_attr = 1.0 / attr_count - dataset_samples_weight = np.array([weight_attr[l] for l in attr_idx]) - dataset_samples_weight = dataset_samples_weight / np.linalg.norm(dataset_samples_weight) - if multi_dict is not None: - # check if all keys are in the multi_dict - for k in multi_dict: - assert k in unique_attr_names, f"{k} not in {unique_attr_names}" - # scale weights - multiplier_samples = np.array([multi_dict.get(item[attr_name], 1.0) for item in items]) - dataset_samples_weight *= multiplier_samples - return ( - torch.from_numpy(dataset_samples_weight).float(), - unique_attr_names, - np.unique(dataset_samples_weight).tolist(), - ) - - class VitsDataset(TTSDataset): def __init__(self, model_args, *args, **kwargs): super().__init__(*args, **kwargs) From 2c82477a785dad0abe178453af70c0554dc7982f Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 22 Nov 2024 23:44:40 +0100 Subject: [PATCH 206/255] ci: merge integration tests back into unit tests --- .github/actions/setup-uv/action.yml | 4 +- .github/workflows/integration-tests.yml | 82 ------------------------- .github/workflows/tests.yml | 51 ++++++++++++++- 3 files changed, 50 insertions(+), 87 deletions(-) delete mode 100644 .github/workflows/integration-tests.yml diff --git a/.github/actions/setup-uv/action.yml b/.github/actions/setup-uv/action.yml index 619b138fb2..c7dd4f5f99 100644 --- a/.github/actions/setup-uv/action.yml +++ b/.github/actions/setup-uv/action.yml @@ -4,8 +4,8 @@ runs: using: 'composite' steps: - name: Install uv - uses: astral-sh/setup-uv@v3 + uses: astral-sh/setup-uv@v4 with: - version: "0.5.1" + version: "0.5.4" enable-cache: true cache-dependency-glob: "**/pyproject.toml" diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index 4dc8c76c1a..0000000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,82 +0,0 @@ -name: integration - -on: - push: - branches: - - main - pull_request: - types: [opened, synchronize, reopened] - workflow_dispatch: - inputs: - trainer_branch: - description: "Branch of Trainer to test" - required: false - default: "main" - coqpit_branch: - description: "Branch of Coqpit to test" - required: false - default: "main" -jobs: - test: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.9", "3.12"] - subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] - steps: - - uses: actions/checkout@v4 - - name: Setup uv - uses: ./.github/actions/setup-uv - - name: Set up Python ${{ matrix.python-version }} - run: uv python install ${{ matrix.python-version }} - - name: Install Espeak - if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) - run: | - sudo apt-get update - sudo apt-get install espeak espeak-ng - - name: Install dependencies - run: | - sudo apt-get update - sudo apt-get install -y --no-install-recommends git make gcc - make system-deps - - name: Install custom Trainer and/or Coqpit if requested - run: | - if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then - uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }} - fi - if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then - uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }} - fi - - name: Integration tests - run: | - resolution=highest - if [ "${{ matrix.python-version }}" == "3.9" ]; then - resolution=lowest-direct - fi - uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }} - - name: Upload coverage data - uses: actions/upload-artifact@v4 - with: - include-hidden-files: true - name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} - path: .coverage.* - if-no-files-found: ignore - coverage: - if: always() - needs: test - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - name: Setup uv - uses: ./.github/actions/setup-uv - - uses: actions/download-artifact@v4 - with: - pattern: coverage-data-* - merge-multiple: true - - name: Combine coverage - run: | - uv python install - uvx coverage combine - uvx coverage html --skip-covered --skip-empty - uvx coverage report --format=markdown >> $GITHUB_STEP_SUMMARY diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 576de150fd..8d639d5dee 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -1,4 +1,4 @@ -name: unit +name: test on: push: @@ -17,7 +17,7 @@ on: required: false default: "main" jobs: - test: + unit: runs-on: ubuntu-latest strategy: fail-fast: false @@ -62,9 +62,54 @@ jobs: name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} path: .coverage.* if-no-files-found: ignore + integration: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ["3.9", "3.12"] + subset: ["test_tts", "test_tts2", "test_vocoder", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"] + steps: + - uses: actions/checkout@v4 + - name: Setup uv + uses: ./.github/actions/setup-uv + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} + - name: Install Espeak + if: contains(fromJSON('["test_tts", "test_tts2", "test_xtts", "test_zoo0", "test_zoo1", "test_zoo2"]'), matrix.subset) + run: | + sudo apt-get update + sudo apt-get install espeak espeak-ng + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y --no-install-recommends git make gcc + make system-deps + - name: Install custom Trainer and/or Coqpit if requested + run: | + if [[ -n "${{ github.event.inputs.trainer_branch }}" ]]; then + uv add git+https://github.com/idiap/coqui-ai-Trainer --branch ${{ github.event.inputs.trainer_branch }} + fi + if [[ -n "${{ github.event.inputs.coqpit_branch }}" ]]; then + uv add git+https://github.com/idiap/coqui-ai-coqpit --branch ${{ github.event.inputs.coqpit_branch }} + fi + - name: Integration tests + run: | + resolution=highest + if [ "${{ matrix.python-version }}" == "3.9" ]; then + resolution=lowest-direct + fi + uv run --resolution=$resolution --extra server --extra languages make ${{ matrix.subset }} + - name: Upload coverage data + uses: actions/upload-artifact@v4 + with: + include-hidden-files: true + name: coverage-data-${{ matrix.subset }}-${{ matrix.python-version }} + path: .coverage.* + if-no-files-found: ignore coverage: if: always() - needs: test + needs: [unit, integration] runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 From 76df6421dead004a40b1ded1b12916282f013132 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 23 Nov 2024 01:16:50 +0100 Subject: [PATCH 207/255] refactor: move more audio processing into torch_transforms --- TTS/tts/models/delightful_tts.py | 139 +----------------------- TTS/tts/models/vits.py | 126 +-------------------- TTS/utils/audio/torch_transforms.py | 96 ++++++++++++++++ TTS/vc/modules/freevc/mel_processing.py | 48 -------- tests/tts_tests/test_vits.py | 5 +- 5 files changed, 100 insertions(+), 314 deletions(-) diff --git a/TTS/tts/models/delightful_tts.py b/TTS/tts/models/delightful_tts.py index 8857004725..e6db116081 100644 --- a/TTS/tts/models/delightful_tts.py +++ b/TTS/tts/models/delightful_tts.py @@ -9,7 +9,6 @@ import torch import torch.distributed as dist from coqpit import Coqpit -from librosa.filters import mel as librosa_mel_fn from torch import nn from torch.utils.data import DataLoader from torch.utils.data.sampler import WeightedRandomSampler @@ -38,7 +37,7 @@ from TTS.utils.audio.numpy_transforms import db_to_amp as db_to_amp_numpy from TTS.utils.audio.numpy_transforms import mel_to_wav as mel_to_wav_numpy from TTS.utils.audio.processor import AudioProcessor -from TTS.utils.audio.torch_transforms import amp_to_db +from TTS.utils.audio.torch_transforms import wav_to_mel, wav_to_spec from TTS.vocoder.layers.losses import MultiScaleSTFTLoss from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results @@ -50,145 +49,11 @@ mel_basis = {} -def _wav_to_spec(y, n_fft, hop_length, win_length, center=False): - y = y.squeeze(1) - - if torch.min(y) < -1.0: - logger.info("min value is %.3f", torch.min(y)) - if torch.max(y) > 1.0: - logger.info("max value is %.3f", torch.max(y)) - - global hann_window # pylint: disable=global-statement - dtype_device = str(y.dtype) + "_" + str(y.device) - wnsize_dtype_device = str(win_length) + "_" + dtype_device - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), - (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)), - mode="reflect", - ) - y = y.squeeze(1) - - spec = torch.view_as_real( - torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=True, - ) - ) - - return spec - - -def wav_to_spec(y, n_fft, hop_length, win_length, center=False): - """ - Args Shapes: - - y : :math:`[B, 1, T]` - - Return Shapes: - - spec : :math:`[B,C,T]` - """ - spec = _wav_to_spec(y, n_fft, hop_length, win_length, center=center) - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - return spec - - def wav_to_energy(y, n_fft, hop_length, win_length, center=False): - spec = _wav_to_spec(y, n_fft, hop_length, win_length, center=center) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + spec = wav_to_spec(y, n_fft, hop_length, win_length, center=center) return torch.norm(spec, dim=1, keepdim=True) -def name_mel_basis(spec, n_fft, fmax): - n_fft_len = f"{n_fft}_{fmax}_{spec.dtype}_{spec.device}" - return n_fft_len - - -def spec_to_mel(spec, n_fft, num_mels, sample_rate, fmin, fmax): - """ - Args Shapes: - - spec : :math:`[B,C,T]` - - Return Shapes: - - mel : :math:`[B,C,T]` - """ - global mel_basis # pylint: disable=global-statement - mel_basis_key = name_mel_basis(spec, n_fft, fmax) - # pylint: disable=too-many-function-args - if mel_basis_key not in mel_basis: - # pylint: disable=missing-kwoa - mel = librosa_mel_fn(sample_rate, n_fft, num_mels, fmin, fmax) - mel_basis[mel_basis_key] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) - mel = torch.matmul(mel_basis[mel_basis_key], spec) - mel = amp_to_db(mel) - return mel - - -def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fmax, center=False): - """ - Args Shapes: - - y : :math:`[B, 1, T_y]` - - Return Shapes: - - spec : :math:`[B,C,T_spec]` - """ - y = y.squeeze(1) - - if torch.min(y) < -1.0: - logger.info("min value is %.3f", torch.min(y)) - if torch.max(y) > 1.0: - logger.info("max value is %.3f", torch.max(y)) - - global mel_basis, hann_window # pylint: disable=global-statement - mel_basis_key = name_mel_basis(y, n_fft, fmax) - wnsize_dtype_device = str(win_length) + "_" + str(y.dtype) + "_" + str(y.device) - if mel_basis_key not in mel_basis: - # pylint: disable=missing-kwoa - mel = librosa_mel_fn( - sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax - ) # pylint: disable=too-many-function-args - mel_basis[mel_basis_key] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), - (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)), - mode="reflect", - ) - y = y.squeeze(1) - - spec = torch.view_as_real( - torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=True, - ) - ) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - spec = torch.matmul(mel_basis[mel_basis_key], spec) - spec = amp_to_db(spec) - return spec - - ############################## # DATASET ############################## diff --git a/TTS/tts/models/vits.py b/TTS/tts/models/vits.py index 30d9caff02..7ec2519236 100644 --- a/TTS/tts/models/vits.py +++ b/TTS/tts/models/vits.py @@ -10,7 +10,6 @@ import torch.distributed as dist import torchaudio from coqpit import Coqpit -from librosa.filters import mel as librosa_mel_fn from monotonic_alignment_search import maximum_path from torch import nn from torch.nn import functional as F @@ -35,7 +34,7 @@ from TTS.tts.utils.text.characters import BaseCharacters, BaseVocabulary, _characters, _pad, _phonemes, _punctuations from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment -from TTS.utils.audio.torch_transforms import amp_to_db +from TTS.utils.audio.torch_transforms import spec_to_mel, wav_to_mel, wav_to_spec from TTS.utils.samplers import BucketBatchSampler from TTS.vocoder.models.hifigan_generator import HifiganGenerator from TTS.vocoder.utils.generic_utils import plot_results @@ -46,10 +45,6 @@ # IO / Feature extraction ############################## -# pylint: disable=global-statement -hann_window = {} -mel_basis = {} - @torch.no_grad() def weights_reset(m: nn.Module): @@ -79,125 +74,6 @@ def load_audio(file_path): return x, sr -def wav_to_spec(y, n_fft, hop_length, win_length, center=False): - """ - Args Shapes: - - y : :math:`[B, 1, T]` - - Return Shapes: - - spec : :math:`[B,C,T]` - """ - y = y.squeeze(1) - - if torch.min(y) < -1.0: - logger.info("min value is %.3f", torch.min(y)) - if torch.max(y) > 1.0: - logger.info("max value is %.3f", torch.max(y)) - - global hann_window - dtype_device = str(y.dtype) + "_" + str(y.device) - wnsize_dtype_device = str(win_length) + "_" + dtype_device - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), - (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)), - mode="reflect", - ) - y = y.squeeze(1) - - spec = torch.view_as_real( - torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=True, - ) - ) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - return spec - - -def spec_to_mel(spec, n_fft, num_mels, sample_rate, fmin, fmax): - """ - Args Shapes: - - spec : :math:`[B,C,T]` - - Return Shapes: - - mel : :math:`[B,C,T]` - """ - global mel_basis - dtype_device = str(spec.dtype) + "_" + str(spec.device) - fmax_dtype_device = str(fmax) + "_" + dtype_device - if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) - mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) - mel = torch.matmul(mel_basis[fmax_dtype_device], spec) - mel = amp_to_db(mel) - return mel - - -def wav_to_mel(y, n_fft, num_mels, sample_rate, hop_length, win_length, fmin, fmax, center=False): - """ - Args Shapes: - - y : :math:`[B, 1, T]` - - Return Shapes: - - spec : :math:`[B,C,T]` - """ - y = y.squeeze(1) - - if torch.min(y) < -1.0: - logger.info("min value is %.3f", torch.min(y)) - if torch.max(y) > 1.0: - logger.info("max value is %.3f", torch.max(y)) - - global mel_basis, hann_window - dtype_device = str(y.dtype) + "_" + str(y.device) - fmax_dtype_device = str(fmax) + "_" + dtype_device - wnsize_dtype_device = str(win_length) + "_" + dtype_device - if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) - mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), - (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)), - mode="reflect", - ) - y = y.squeeze(1) - - spec = torch.view_as_real( - torch.stft( - y, - n_fft, - hop_length=hop_length, - win_length=win_length, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=True, - ) - ) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - spec = torch.matmul(mel_basis[fmax_dtype_device], spec) - spec = amp_to_db(spec) - return spec - - ############################# # CONFIGS ############################# diff --git a/TTS/utils/audio/torch_transforms.py b/TTS/utils/audio/torch_transforms.py index dda4c0a419..59bb23cc4f 100644 --- a/TTS/utils/audio/torch_transforms.py +++ b/TTS/utils/audio/torch_transforms.py @@ -1,7 +1,15 @@ +import logging + import librosa import torch from torch import nn +logger = logging.getLogger(__name__) + + +hann_window = {} +mel_basis = {} + def amp_to_db(x: torch.Tensor, *, spec_gain: float = 1.0, clip_val: float = 1e-5) -> torch.Tensor: """Spectral normalization / dynamic range compression.""" @@ -13,6 +21,94 @@ def db_to_amp(x: torch.Tensor, *, spec_gain: float = 1.0) -> torch.Tensor: return torch.exp(x) / spec_gain +def wav_to_spec(y: torch.Tensor, n_fft: int, hop_length: int, win_length: int, *, center: bool = False) -> torch.Tensor: + """ + Args Shapes: + - y : :math:`[B, 1, T]` + + Return Shapes: + - spec : :math:`[B,C,T]` + """ + y = y.squeeze(1) + + if torch.min(y) < -1.0: + logger.info("min value is %.3f", torch.min(y)) + if torch.max(y) > 1.0: + logger.info("max value is %.3f", torch.max(y)) + + global hann_window + wnsize_dtype_device = f"{win_length}_{y.dtype}_{y.device}" + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_length).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad( + y.unsqueeze(1), + (int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)), + mode="reflect", + ) + y = y.squeeze(1) + + spec = torch.view_as_real( + torch.stft( + y, + n_fft, + hop_length=hop_length, + win_length=win_length, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=True, + ) + ) + + return torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + + +def spec_to_mel( + spec: torch.Tensor, n_fft: int, num_mels: int, sample_rate: int, fmin: float, fmax: float +) -> torch.Tensor: + """ + Args Shapes: + - spec : :math:`[B,C,T]` + + Return Shapes: + - mel : :math:`[B,C,T]` + """ + global mel_basis + fmax_dtype_device = f"{n_fft}_{fmax}_{spec.dtype}_{spec.device}" + if fmax_dtype_device not in mel_basis: + # TODO: switch librosa to torchaudio + mel = librosa.filters.mel(sr=sample_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) + mel = torch.matmul(mel_basis[fmax_dtype_device], spec) + return amp_to_db(mel) + + +def wav_to_mel( + y: torch.Tensor, + n_fft: int, + num_mels: int, + sample_rate: int, + hop_length: int, + win_length: int, + fmin: float, + fmax: float, + *, + center: bool = False, +) -> torch.Tensor: + """ + Args Shapes: + - y : :math:`[B, 1, T]` + + Return Shapes: + - spec : :math:`[B,C,T]` + """ + spec = wav_to_spec(y, n_fft, hop_length, win_length, center=center) + return spec_to_mel(spec, n_fft, num_mels, sample_rate, fmin, fmax) + + class TorchSTFT(nn.Module): # pylint: disable=abstract-method """Some of the audio processing funtions using Torch for faster batch processing. diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/modules/freevc/mel_processing.py index 4da5e27c83..017d900284 100644 --- a/TTS/vc/modules/freevc/mel_processing.py +++ b/TTS/vc/modules/freevc/mel_processing.py @@ -14,54 +14,6 @@ hann_window = {} -def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): - if torch.min(y) < -1.0: - logger.info("Min value is: %.3f", torch.min(y)) - if torch.max(y) > 1.0: - logger.info("Max value is: %.3f", torch.max(y)) - - global hann_window - dtype_device = str(y.dtype) + "_" + str(y.device) - wnsize_dtype_device = str(win_size) + "_" + dtype_device - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode="reflect" - ) - y = y.squeeze(1) - - spec = torch.view_as_real( - torch.stft( - y, - n_fft, - hop_length=hop_size, - win_length=win_size, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=True, - ) - ) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - return spec - - -def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): - global mel_basis - dtype_device = str(spec.dtype) + "_" + str(spec.device) - fmax_dtype_device = str(fmax) + "_" + dtype_device - if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sr=sampling_rate, n_fft=n_fft, n_mels=num_mels, fmin=fmin, fmax=fmax) - mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) - spec = torch.matmul(mel_basis[fmax_dtype_device], spec) - spec = amp_to_db(spec) - return spec - - def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): if torch.min(y) < -1.0: logger.info("Min value is: %.3f", torch.min(y)) diff --git a/tests/tts_tests/test_vits.py b/tests/tts_tests/test_vits.py index a27bdfe5b5..c8a52e1c1b 100644 --- a/tests/tts_tests/test_vits.py +++ b/tests/tts_tests/test_vits.py @@ -14,12 +14,9 @@ VitsArgs, VitsAudioConfig, load_audio, - spec_to_mel, - wav_to_mel, - wav_to_spec, ) from TTS.tts.utils.speakers import SpeakerManager -from TTS.utils.audio.torch_transforms import amp_to_db, db_to_amp +from TTS.utils.audio.torch_transforms import amp_to_db, db_to_amp, spec_to_mel, wav_to_mel, wav_to_spec LANG_FILE = os.path.join(get_tests_input_path(), "language_ids.json") SPEAKER_ENCODER_CONFIG = os.path.join(get_tests_input_path(), "test_speaker_encoder_config.json") From 8bf288eeab63adcf26d32125615628cc1abdaf31 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 24 Nov 2024 15:37:04 +0100 Subject: [PATCH 208/255] test: move test_helpers.py to fast unit tests --- tests/{tts_tests => aux_tests}/test_helpers.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tests/{tts_tests => aux_tests}/test_helpers.py (100%) diff --git a/tests/tts_tests/test_helpers.py b/tests/aux_tests/test_helpers.py similarity index 100% rename from tests/tts_tests/test_helpers.py rename to tests/aux_tests/test_helpers.py From 7330ad8854f2e72cee5b66e481188cb3750e28df Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 24 Nov 2024 17:46:51 +0100 Subject: [PATCH 209/255] refactor: move duplicate alignment functions into helpers --- .../layers/delightful_tts/acoustic_model.py | 61 ++++------------ TTS/tts/layers/delightful_tts/encoders.py | 13 +--- TTS/tts/models/align_tts.py | 36 +--------- TTS/tts/models/forward_tts.py | 50 +------------- TTS/tts/utils/helpers.py | 69 +++++++++++++++++-- tests/aux_tests/test_helpers.py | 31 ++++++++- tests/tts_tests2/test_forward_tts.py | 24 +------ 7 files changed, 114 insertions(+), 170 deletions(-) diff --git a/TTS/tts/layers/delightful_tts/acoustic_model.py b/TTS/tts/layers/delightful_tts/acoustic_model.py index 3c0e3a3a76..981d6cdb1f 100644 --- a/TTS/tts/layers/delightful_tts/acoustic_model.py +++ b/TTS/tts/layers/delightful_tts/acoustic_model.py @@ -12,7 +12,6 @@ from TTS.tts.layers.delightful_tts.encoders import ( PhonemeLevelProsodyEncoder, UtteranceLevelProsodyEncoder, - get_mask_from_lengths, ) from TTS.tts.layers.delightful_tts.energy_adaptor import EnergyAdaptor from TTS.tts.layers.delightful_tts.networks import EmbeddingPadded, positional_encoding @@ -20,7 +19,7 @@ from TTS.tts.layers.delightful_tts.pitch_adaptor import PitchAdaptor from TTS.tts.layers.delightful_tts.variance_predictor import VariancePredictor from TTS.tts.layers.generic.aligner import AlignmentNetwork -from TTS.tts.utils.helpers import generate_path, sequence_mask +from TTS.tts.utils.helpers import expand_encoder_outputs, generate_attention, sequence_mask logger = logging.getLogger(__name__) @@ -231,42 +230,6 @@ def _init_d_vector(self): raise ValueError("[!] Speaker embedding layer already initialized before d_vector settings.") self.embedded_speaker_dim = self.args.d_vector_dim - @staticmethod - def generate_attn(dr, x_mask, y_mask=None): - """Generate an attention mask from the linear scale durations. - - Args: - dr (Tensor): Linear scale durations. - x_mask (Tensor): Mask for the input (character) sequence. - y_mask (Tensor): Mask for the output (spectrogram) sequence. Compute it from the predicted durations - if None. Defaults to None. - - Shapes - - dr: :math:`(B, T_{en})` - - x_mask: :math:`(B, T_{en})` - - y_mask: :math:`(B, T_{de})` - """ - # compute decode mask from the durations - if y_mask is None: - y_lengths = dr.sum(1).long() - y_lengths[y_lengths < 1] = 1 - y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(dr.dtype) - attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2) - attn = generate_path(dr, attn_mask.squeeze(1)).to(dr.dtype) - return attn - - def _expand_encoder_with_durations( - self, - o_en: torch.FloatTensor, - dr: torch.IntTensor, - x_mask: torch.IntTensor, - y_lengths: torch.IntTensor, - ): - y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en.dtype) - attn = self.generate_attn(dr, x_mask, y_mask) - o_en_ex = torch.einsum("kmn, kjm -> kjn", [attn.float(), o_en]) - return y_mask, o_en_ex, attn.transpose(1, 2) - def _forward_aligner( self, x: torch.FloatTensor, @@ -340,8 +303,8 @@ def forward( {"d_vectors": d_vectors, "speaker_ids": speaker_idx} ) # pylint: disable=unused-variable - src_mask = get_mask_from_lengths(src_lens) # [B, T_src] - mel_mask = get_mask_from_lengths(mel_lens) # [B, T_mel] + src_mask = ~sequence_mask(src_lens) # [B, T_src] + mel_mask = ~sequence_mask(mel_lens) # [B, T_mel] # Token embeddings token_embeddings = self.src_word_emb(tokens) # [B, T_src, C_hidden] @@ -420,8 +383,8 @@ def forward( encoder_outputs = encoder_outputs.transpose(1, 2) + pitch_emb + energy_emb log_duration_prediction = self.duration_predictor(x=encoder_outputs_res.detach(), mask=src_mask) - mel_pred_mask, encoder_outputs_ex, alignments = self._expand_encoder_with_durations( - o_en=encoder_outputs, y_lengths=mel_lens, dr=dr, x_mask=~src_mask[:, None] + encoder_outputs_ex, alignments, mel_pred_mask = expand_encoder_outputs( + encoder_outputs, y_lengths=mel_lens, duration=dr, x_mask=~src_mask[:, None] ) x = self.decoder( @@ -435,7 +398,7 @@ def forward( dr = torch.log(dr + 1) dr_pred = torch.exp(log_duration_prediction) - 1 - alignments_dp = self.generate_attn(dr_pred, src_mask.unsqueeze(1), mel_pred_mask) # [B, T_max, T_max2'] + alignments_dp = generate_attention(dr_pred, src_mask.unsqueeze(1), mel_pred_mask) # [B, T_max, T_max2'] return { "model_outputs": x, @@ -448,7 +411,7 @@ def forward( "p_prosody_pred": p_prosody_pred, "p_prosody_ref": p_prosody_ref, "alignments_dp": alignments_dp, - "alignments": alignments, # [B, T_de, T_en] + "alignments": alignments.transpose(1, 2), # [B, T_de, T_en] "aligner_soft": aligner_soft, "aligner_mas": aligner_mas, "aligner_durations": aligner_durations, @@ -469,7 +432,7 @@ def inference( pitch_transform: Callable = None, energy_transform: Callable = None, ) -> torch.Tensor: - src_mask = get_mask_from_lengths(torch.tensor([tokens.shape[1]], dtype=torch.int64, device=tokens.device)) + src_mask = ~sequence_mask(torch.tensor([tokens.shape[1]], dtype=torch.int64, device=tokens.device)) src_lens = torch.tensor(tokens.shape[1:2]).to(tokens.device) # pylint: disable=unused-variable sid, g, lid, _ = self._set_cond_input( # pylint: disable=unused-variable {"d_vectors": d_vectors, "speaker_ids": speaker_idx} @@ -536,11 +499,11 @@ def inference( duration_pred = torch.round(duration_pred) # -> [B, T_src] mel_lens = duration_pred.sum(1) # -> [B,] - _, encoder_outputs_ex, alignments = self._expand_encoder_with_durations( - o_en=encoder_outputs, y_lengths=mel_lens, dr=duration_pred.squeeze(1), x_mask=~src_mask[:, None] + encoder_outputs_ex, alignments, _ = expand_encoder_outputs( + encoder_outputs, y_lengths=mel_lens, duration=duration_pred.squeeze(1), x_mask=~src_mask[:, None] ) - mel_mask = get_mask_from_lengths( + mel_mask = ~sequence_mask( torch.tensor([encoder_outputs_ex.shape[2]], dtype=torch.int64, device=encoder_outputs_ex.device) ) @@ -557,7 +520,7 @@ def inference( x = self.to_mel(x) outputs = { "model_outputs": x, - "alignments": alignments, + "alignments": alignments.transpose(1, 2), # "pitch": pitch_emb_pred, "durations": duration_pred, "pitch": pitch_pred, diff --git a/TTS/tts/layers/delightful_tts/encoders.py b/TTS/tts/layers/delightful_tts/encoders.py index 0878f0677a..bd0c319dc1 100644 --- a/TTS/tts/layers/delightful_tts/encoders.py +++ b/TTS/tts/layers/delightful_tts/encoders.py @@ -7,14 +7,7 @@ from TTS.tts.layers.delightful_tts.conformer import ConformerMultiHeadedSelfAttention from TTS.tts.layers.delightful_tts.conv_layers import CoordConv1d from TTS.tts.layers.delightful_tts.networks import STL - - -def get_mask_from_lengths(lengths: torch.Tensor) -> torch.Tensor: - batch_size = lengths.shape[0] - max_len = torch.max(lengths).item() - ids = torch.arange(0, max_len, device=lengths.device).unsqueeze(0).expand(batch_size, -1) - mask = ids >= lengths.unsqueeze(1).expand(-1, max_len) - return mask +from TTS.tts.utils.helpers import sequence_mask def stride_lens(lens: torch.Tensor, stride: int = 2) -> torch.Tensor: @@ -93,7 +86,7 @@ def forward(self, x: torch.Tensor, mel_lens: torch.Tensor) -> Tuple[torch.Tensor outputs --- [N, E//2] """ - mel_masks = get_mask_from_lengths(mel_lens).unsqueeze(1) + mel_masks = ~sequence_mask(mel_lens).unsqueeze(1) x = x.masked_fill(mel_masks, 0) for conv, norm in zip(self.convs, self.norms): x = conv(x) @@ -103,7 +96,7 @@ def forward(self, x: torch.Tensor, mel_lens: torch.Tensor) -> Tuple[torch.Tensor for _ in range(2): mel_lens = stride_lens(mel_lens) - mel_masks = get_mask_from_lengths(mel_lens) + mel_masks = ~sequence_mask(mel_lens) x = x.masked_fill(mel_masks.unsqueeze(1), 0) x = x.permute((0, 2, 1)) diff --git a/TTS/tts/models/align_tts.py b/TTS/tts/models/align_tts.py index 1c3d57582e..28a52bc558 100644 --- a/TTS/tts/models/align_tts.py +++ b/TTS/tts/models/align_tts.py @@ -13,7 +13,7 @@ from TTS.tts.layers.feed_forward.encoder import Encoder from TTS.tts.layers.generic.pos_encoding import PositionalEncoding from TTS.tts.models.base_tts import BaseTTS -from TTS.tts.utils.helpers import generate_path, sequence_mask +from TTS.tts.utils.helpers import expand_encoder_outputs, generate_attention, sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_spectrogram @@ -169,35 +169,6 @@ def compute_align_path(self, mu, log_sigma, y, x_mask, y_mask): dr_mas = torch.sum(attn, -1) return dr_mas.squeeze(1), log_p - @staticmethod - def generate_attn(dr, x_mask, y_mask=None): - # compute decode mask from the durations - if y_mask is None: - y_lengths = dr.sum(1).long() - y_lengths[y_lengths < 1] = 1 - y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(dr.dtype) - attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2) - attn = generate_path(dr, attn_mask.squeeze(1)).to(dr.dtype) - return attn - - def expand_encoder_outputs(self, en, dr, x_mask, y_mask): - """Generate attention alignment map from durations and - expand encoder outputs - - Examples:: - - encoder output: [a,b,c,d] - - durations: [1, 3, 2, 1] - - - expanded: [a, b, b, b, c, c, d] - - attention map: [[0, 0, 0, 0, 0, 0, 1], - [0, 0, 0, 0, 1, 1, 0], - [0, 1, 1, 1, 0, 0, 0], - [1, 0, 0, 0, 0, 0, 0]] - """ - attn = self.generate_attn(dr, x_mask, y_mask) - o_en_ex = torch.matmul(attn.squeeze(1).transpose(1, 2), en.transpose(1, 2)).transpose(1, 2) - return o_en_ex, attn - def format_durations(self, o_dr_log, x_mask): o_dr = (torch.exp(o_dr_log) - 1) * x_mask * self.length_scale o_dr[o_dr < 1] = 1.0 @@ -243,9 +214,8 @@ def _forward_encoder(self, x, x_lengths, g=None): return o_en, o_en_dp, x_mask, g def _forward_decoder(self, o_en, o_en_dp, dr, x_mask, y_lengths, g): - y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en_dp.dtype) # expand o_en with durations - o_en_ex, attn = self.expand_encoder_outputs(o_en, dr, x_mask, y_mask) + o_en_ex, attn, y_mask = expand_encoder_outputs(o_en, dr, x_mask, y_lengths) # positional encoding if hasattr(self, "pos_encoder"): o_en_ex = self.pos_encoder(o_en_ex, y_mask) @@ -282,7 +252,7 @@ def forward( o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g) dr_mas, mu, log_sigma, logp = self._forward_mdn(o_en, y, y_lengths, x_mask) y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en_dp.dtype) - attn = self.generate_attn(dr_mas, x_mask, y_mask) + attn = generate_attention(dr_mas, x_mask, y_mask) elif phase == 1: # train decoder o_en, o_en_dp, x_mask, g = self._forward_encoder(x, x_lengths, g) diff --git a/TTS/tts/models/forward_tts.py b/TTS/tts/models/forward_tts.py index d449e580da..d09e3ea91b 100644 --- a/TTS/tts/models/forward_tts.py +++ b/TTS/tts/models/forward_tts.py @@ -14,7 +14,7 @@ from TTS.tts.layers.generic.pos_encoding import PositionalEncoding from TTS.tts.layers.glow_tts.duration_predictor import DurationPredictor from TTS.tts.models.base_tts import BaseTTS -from TTS.tts.utils.helpers import average_over_durations, generate_path, sequence_mask +from TTS.tts.utils.helpers import average_over_durations, expand_encoder_outputs, generate_attention, sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.tts.utils.text.tokenizer import TTSTokenizer from TTS.tts.utils.visual import plot_alignment, plot_avg_energy, plot_avg_pitch, plot_spectrogram @@ -310,49 +310,6 @@ def init_multispeaker(self, config: Coqpit): self.emb_g = nn.Embedding(self.num_speakers, self.args.hidden_channels) nn.init.uniform_(self.emb_g.weight, -0.1, 0.1) - @staticmethod - def generate_attn(dr, x_mask, y_mask=None): - """Generate an attention mask from the durations. - - Shapes - - dr: :math:`(B, T_{en})` - - x_mask: :math:`(B, T_{en})` - - y_mask: :math:`(B, T_{de})` - """ - # compute decode mask from the durations - if y_mask is None: - y_lengths = dr.sum(1).long() - y_lengths[y_lengths < 1] = 1 - y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(dr.dtype) - attn_mask = torch.unsqueeze(x_mask, -1) * torch.unsqueeze(y_mask, 2) - attn = generate_path(dr, attn_mask.squeeze(1)).to(dr.dtype) - return attn - - def expand_encoder_outputs(self, en, dr, x_mask, y_mask): - """Generate attention alignment map from durations and - expand encoder outputs - - Shapes: - - en: :math:`(B, D_{en}, T_{en})` - - dr: :math:`(B, T_{en})` - - x_mask: :math:`(B, T_{en})` - - y_mask: :math:`(B, T_{de})` - - Examples:: - - encoder output: [a,b,c,d] - durations: [1, 3, 2, 1] - - expanded: [a, b, b, b, c, c, d] - attention map: [[0, 0, 0, 0, 0, 0, 1], - [0, 0, 0, 0, 1, 1, 0], - [0, 1, 1, 1, 0, 0, 0], - [1, 0, 0, 0, 0, 0, 0]] - """ - attn = self.generate_attn(dr, x_mask, y_mask) - o_en_ex = torch.matmul(attn.squeeze(1).transpose(1, 2).to(en.dtype), en.transpose(1, 2)).transpose(1, 2) - return o_en_ex, attn - def format_durations(self, o_dr_log, x_mask): """Format predicted durations. 1. Convert to linear scale from log scale @@ -443,9 +400,8 @@ def _forward_decoder( Returns: Tuple[torch.FloatTensor, torch.FloatTensor]: Decoder output, attention map from durations. """ - y_mask = torch.unsqueeze(sequence_mask(y_lengths, None), 1).to(o_en.dtype) # expand o_en with durations - o_en_ex, attn = self.expand_encoder_outputs(o_en, dr, x_mask, y_mask) + o_en_ex, attn, y_mask = expand_encoder_outputs(o_en, dr, x_mask, y_lengths) # positional encoding if hasattr(self, "pos_encoder"): o_en_ex = self.pos_encoder(o_en_ex, y_mask) @@ -624,7 +580,7 @@ def forward( o_dr_log = self.duration_predictor(o_en, x_mask) o_dr = torch.clamp(torch.exp(o_dr_log) - 1, 0, self.max_duration) # generate attn mask from predicted durations - o_attn = self.generate_attn(o_dr.squeeze(1), x_mask) + o_attn = generate_attention(o_dr.squeeze(1), x_mask) # aligner o_alignment_dur = None alignment_soft = None diff --git a/TTS/tts/utils/helpers.py b/TTS/tts/utils/helpers.py index d1722501f7..ff10f751f2 100644 --- a/TTS/tts/utils/helpers.py +++ b/TTS/tts/utils/helpers.py @@ -1,3 +1,5 @@ +from typing import Optional + import numpy as np import torch from scipy.stats import betabinom @@ -33,7 +35,7 @@ def inverse_transform(self, X): # from https://gist.github.com/jihunchoi/f1434a77df9db1bb337417854b398df1 -def sequence_mask(sequence_length, max_len=None): +def sequence_mask(sequence_length: torch.Tensor, max_len: Optional[int] = None) -> torch.Tensor: """Create a sequence mask for filtering padding in a sequence tensor. Args: @@ -44,7 +46,7 @@ def sequence_mask(sequence_length, max_len=None): - mask: :math:`[B, T_max]` """ if max_len is None: - max_len = sequence_length.max() + max_len = int(sequence_length.max()) seq_range = torch.arange(max_len, dtype=sequence_length.dtype, device=sequence_length.device) # B x T_max return seq_range.unsqueeze(0) < sequence_length.unsqueeze(1) @@ -143,22 +145,75 @@ def convert_pad_shape(pad_shape: list[list]) -> list: return [item for sublist in l for item in sublist] -def generate_path(duration, mask): - """ +def generate_path(duration: torch.Tensor, mask: torch.Tensor) -> torch.Tensor: + """Generate alignment path based on the given segment durations. + Shapes: - duration: :math:`[B, T_en]` - mask: :math:'[B, T_en, T_de]` - path: :math:`[B, T_en, T_de]` """ b, t_x, t_y = mask.shape - cum_duration = torch.cumsum(duration, 1) + cum_duration = torch.cumsum(duration, dim=1) cum_duration_flat = cum_duration.view(b * t_x) path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) path = path.view(b, t_x, t_y) path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] - path = path * mask - return path + return path * mask + + +def generate_attention( + duration: torch.Tensor, x_mask: torch.Tensor, y_mask: Optional[torch.Tensor] = None +) -> torch.Tensor: + """Generate an attention map from the linear scale durations. + + Args: + duration (Tensor): Linear scale durations. + x_mask (Tensor): Mask for the input (character) sequence. + y_mask (Tensor): Mask for the output (spectrogram) sequence. Compute it from the predicted durations + if None. Defaults to None. + + Shapes + - duration: :math:`(B, T_{en})` + - x_mask: :math:`(B, T_{en})` + - y_mask: :math:`(B, T_{de})` + """ + # compute decode mask from the durations + if y_mask is None: + y_lengths = duration.sum(dim=1).long() + y_lengths[y_lengths < 1] = 1 + y_mask = sequence_mask(y_lengths).unsqueeze(1).to(duration.dtype) + attn_mask = x_mask.unsqueeze(-1) * y_mask.unsqueeze(2) + return generate_path(duration, attn_mask.squeeze(1)).to(duration.dtype) + + +def expand_encoder_outputs( + x: torch.Tensor, duration: torch.Tensor, x_mask: torch.Tensor, y_lengths: torch.Tensor +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """Generate attention alignment map from durations and expand encoder outputs. + + Shapes: + - x: Encoder output :math:`(B, D_{en}, T_{en})` + - duration: :math:`(B, T_{en})` + - x_mask: :math:`(B, T_{en})` + - y_lengths: :math:`(B)` + + Examples:: + + encoder output: [a,b,c,d] + durations: [1, 3, 2, 1] + + expanded: [a, b, b, b, c, c, d] + attention map: [[0, 0, 0, 0, 0, 0, 1], + [0, 0, 0, 0, 1, 1, 0], + [0, 1, 1, 1, 0, 0, 0], + [1, 0, 0, 0, 0, 0, 0]] + """ + y_mask = sequence_mask(y_lengths).unsqueeze(1).to(x.dtype) + attn = generate_attention(duration, x_mask, y_mask) + x_expanded = torch.einsum("kmn, kjm -> kjn", [attn.float(), x]) + return x_expanded, attn, y_mask def beta_binomial_prior_distribution(phoneme_count, mel_count, scaling_factor=1.0): diff --git a/tests/aux_tests/test_helpers.py b/tests/aux_tests/test_helpers.py index d07efa3620..6781cbc5d4 100644 --- a/tests/aux_tests/test_helpers.py +++ b/tests/aux_tests/test_helpers.py @@ -1,6 +1,14 @@ import torch as T -from TTS.tts.utils.helpers import average_over_durations, generate_path, rand_segments, segment, sequence_mask +from TTS.tts.utils.helpers import ( + average_over_durations, + expand_encoder_outputs, + generate_attention, + generate_path, + rand_segments, + segment, + sequence_mask, +) def test_average_over_durations(): # pylint: disable=no-self-use @@ -86,3 +94,24 @@ def test_generate_path(): assert all(path[b, t, :current_idx] == 0.0) assert all(path[b, t, current_idx + durations[b, t].item() :] == 0.0) current_idx += durations[b, t].item() + + assert T.all(path == generate_attention(durations, x_mask, y_mask)) + assert T.all(path == generate_attention(durations, x_mask)) + + +def test_expand_encoder_outputs(): + inputs = T.rand(2, 5, 57) + durations = T.randint(1, 4, (2, 57)) + + x_mask = T.ones(2, 1, 57) + y_lengths = T.ones(2) * durations.sum(1).max() + + expanded, _, _ = expand_encoder_outputs(inputs, durations, x_mask, y_lengths) + + for b in range(durations.shape[0]): + index = 0 + for idx, dur in enumerate(durations[b]): + idx_expanded = expanded[b, :, index : index + dur.item()] + diff = (idx_expanded - inputs[b, :, idx].repeat(int(dur)).view(idx_expanded.shape)).sum() + assert abs(diff) < 1e-6, diff + index += dur diff --git a/tests/tts_tests2/test_forward_tts.py b/tests/tts_tests2/test_forward_tts.py index cec0f211c8..13a2c270af 100644 --- a/tests/tts_tests2/test_forward_tts.py +++ b/tests/tts_tests2/test_forward_tts.py @@ -6,29 +6,7 @@ # pylint: disable=unused-variable -def expand_encoder_outputs_test(): - model = ForwardTTS(ForwardTTSArgs(num_chars=10)) - - inputs = T.rand(2, 5, 57) - durations = T.randint(1, 4, (2, 57)) - - x_mask = T.ones(2, 1, 57) - y_mask = T.ones(2, 1, durations.sum(1).max()) - - expanded, _ = model.expand_encoder_outputs(inputs, durations, x_mask, y_mask) - - for b in range(durations.shape[0]): - index = 0 - for idx, dur in enumerate(durations[b]): - diff = ( - expanded[b, :, index : index + dur.item()] - - inputs[b, :, idx].repeat(dur.item()).view(expanded[b, :, index : index + dur.item()].shape) - ).sum() - assert abs(diff) < 1e-6, diff - index += dur - - -def model_input_output_test(): +def test_model_input_output(): """Assert the output shapes of the model in different modes""" # VANILLA MODEL From 170d3dae92641aacf99827358e3fadbc3b7436ea Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sun, 24 Nov 2024 19:36:45 +0100 Subject: [PATCH 210/255] refactor: remove duplicate to_camel --- TTS/vc/models/__init__.py | 5 ----- TTS/vocoder/models/__init__.py | 7 ++----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/TTS/vc/models/__init__.py b/TTS/vc/models/__init__.py index a498b292b7..a9807d7006 100644 --- a/TTS/vc/models/__init__.py +++ b/TTS/vc/models/__init__.py @@ -6,11 +6,6 @@ logger = logging.getLogger(__name__) -def to_camel(text): - text = text.capitalize() - return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) - - def setup_model(config: "Coqpit", samples: Union[List[List], List[Dict]] = None) -> "BaseVC": logger.info("Using model: %s", config.model) # fetch the right model implementation. diff --git a/TTS/vocoder/models/__init__.py b/TTS/vocoder/models/__init__.py index 7a1716f16d..b6a1850484 100644 --- a/TTS/vocoder/models/__init__.py +++ b/TTS/vocoder/models/__init__.py @@ -4,12 +4,9 @@ from coqpit import Coqpit -logger = logging.getLogger(__name__) - +from TTS.utils.generic_utils import to_camel -def to_camel(text): - text = text.capitalize() - return re.sub(r"(?!^)_([a-zA-Z])", lambda m: m.group(1).upper(), text) +logger = logging.getLogger(__name__) def setup_model(config: Coqpit): From 63625e79af1e13928474fdf964a3322273542939 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 27 Nov 2024 16:12:38 +0100 Subject: [PATCH 211/255] refactor: import get_last_checkpoint from trainer.io --- TTS/bin/compute_attention_masks.py | 2 +- TTS/encoder/utils/training.py | 4 ++-- tests/tts_tests/test_neuralhmm_tts_train.py | 2 +- tests/tts_tests/test_overflow_train.py | 2 +- tests/tts_tests/test_speedy_speech_train.py | 2 +- tests/tts_tests/test_tacotron2_d-vectors_train.py | 2 +- tests/tts_tests/test_tacotron2_speaker_emb_train.py | 2 +- tests/tts_tests/test_tacotron2_train.py | 2 +- tests/tts_tests/test_tacotron_train.py | 2 +- tests/tts_tests/test_vits_multilingual_speaker_emb_train.py | 2 +- tests/tts_tests/test_vits_multilingual_train-d_vectors.py | 2 +- tests/tts_tests/test_vits_speaker_emb_train.py | 2 +- tests/tts_tests/test_vits_train.py | 2 +- tests/tts_tests2/test_align_tts_train.py | 2 +- tests/tts_tests2/test_delightful_tts_d-vectors_train.py | 2 +- tests/tts_tests2/test_delightful_tts_emb_spk.py | 2 +- tests/tts_tests2/test_delightful_tts_train.py | 2 +- tests/tts_tests2/test_fast_pitch_speaker_emb_train.py | 2 +- tests/tts_tests2/test_fast_pitch_train.py | 2 +- tests/tts_tests2/test_fastspeech_2_speaker_emb_train.py | 2 +- tests/tts_tests2/test_fastspeech_2_train.py | 2 +- tests/tts_tests2/test_glow_tts_d-vectors_train.py | 2 +- tests/tts_tests2/test_glow_tts_speaker_emb_train.py | 2 +- tests/tts_tests2/test_glow_tts_train.py | 2 +- 24 files changed, 25 insertions(+), 25 deletions(-) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index 127199186b..535182d214 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -80,7 +80,7 @@ num_chars = len(phonemes) if C.use_phonemes else len(symbols) # TODO: handle multi-speaker model = setup_model(C) - model, _ = load_checkpoint(model, args.model_path, args.use_cuda, True) + model, _ = load_checkpoint(model, args.model_path, use_cuda=args.use_cuda, eval=True) # data loader preprocessor = importlib.import_module("TTS.tts.datasets.formatters") diff --git a/TTS/encoder/utils/training.py b/TTS/encoder/utils/training.py index cc3a78b084..48629c7a57 100644 --- a/TTS/encoder/utils/training.py +++ b/TTS/encoder/utils/training.py @@ -2,9 +2,9 @@ from dataclasses import dataclass, field from coqpit import Coqpit -from trainer import TrainerArgs, get_last_checkpoint +from trainer import TrainerArgs from trainer.generic_utils import get_experiment_folder_path, get_git_branch -from trainer.io import copy_model_files +from trainer.io import copy_model_files, get_last_checkpoint from trainer.logging import logger_factory from trainer.logging.console_logger import ConsoleLogger diff --git a/tests/tts_tests/test_neuralhmm_tts_train.py b/tests/tts_tests/test_neuralhmm_tts_train.py index 25d9aa8148..4789d53d9e 100644 --- a/tests/tts_tests/test_neuralhmm_tts_train.py +++ b/tests/tts_tests/test_neuralhmm_tts_train.py @@ -4,7 +4,7 @@ import shutil import torch -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.neuralhmm_tts_config import NeuralhmmTTSConfig diff --git a/tests/tts_tests/test_overflow_train.py b/tests/tts_tests/test_overflow_train.py index 86fa60af72..d86bde6854 100644 --- a/tests/tts_tests/test_overflow_train.py +++ b/tests/tts_tests/test_overflow_train.py @@ -4,7 +4,7 @@ import shutil import torch -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.overflow_config import OverflowConfig diff --git a/tests/tts_tests/test_speedy_speech_train.py b/tests/tts_tests/test_speedy_speech_train.py index 530781ef88..2aac7f101d 100644 --- a/tests/tts_tests/test_speedy_speech_train.py +++ b/tests/tts_tests/test_speedy_speech_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.speedy_speech_config import SpeedySpeechConfig diff --git a/tests/tts_tests/test_tacotron2_d-vectors_train.py b/tests/tts_tests/test_tacotron2_d-vectors_train.py index 99ba4349c4..d2d1d5c35f 100644 --- a/tests/tts_tests/test_tacotron2_d-vectors_train.py +++ b/tests/tts_tests/test_tacotron2_d-vectors_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.tacotron2_config import Tacotron2Config diff --git a/tests/tts_tests/test_tacotron2_speaker_emb_train.py b/tests/tts_tests/test_tacotron2_speaker_emb_train.py index 5f1bc3fd50..83a07d1a6c 100644 --- a/tests/tts_tests/test_tacotron2_speaker_emb_train.py +++ b/tests/tts_tests/test_tacotron2_speaker_emb_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.tacotron2_config import Tacotron2Config diff --git a/tests/tts_tests/test_tacotron2_train.py b/tests/tts_tests/test_tacotron2_train.py index 40107070e1..df0e934d8e 100644 --- a/tests/tts_tests/test_tacotron2_train.py +++ b/tests/tts_tests/test_tacotron2_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.tacotron2_config import Tacotron2Config diff --git a/tests/tts_tests/test_tacotron_train.py b/tests/tts_tests/test_tacotron_train.py index f7751931ae..17f1fd46a6 100644 --- a/tests/tts_tests/test_tacotron_train.py +++ b/tests/tts_tests/test_tacotron_train.py @@ -2,7 +2,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.tacotron_config import TacotronConfig diff --git a/tests/tts_tests/test_vits_multilingual_speaker_emb_train.py b/tests/tts_tests/test_vits_multilingual_speaker_emb_train.py index 71597ef32f..09df7d29f2 100644 --- a/tests/tts_tests/test_vits_multilingual_speaker_emb_train.py +++ b/tests/tts_tests/test_vits_multilingual_speaker_emb_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseDatasetConfig diff --git a/tests/tts_tests/test_vits_multilingual_train-d_vectors.py b/tests/tts_tests/test_vits_multilingual_train-d_vectors.py index fd58db534a..7ae09c0e5c 100644 --- a/tests/tts_tests/test_vits_multilingual_train-d_vectors.py +++ b/tests/tts_tests/test_vits_multilingual_train-d_vectors.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseDatasetConfig diff --git a/tests/tts_tests/test_vits_speaker_emb_train.py b/tests/tts_tests/test_vits_speaker_emb_train.py index b7fe197cfe..69fae21f8d 100644 --- a/tests/tts_tests/test_vits_speaker_emb_train.py +++ b/tests/tts_tests/test_vits_speaker_emb_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.vits_config import VitsConfig diff --git a/tests/tts_tests/test_vits_train.py b/tests/tts_tests/test_vits_train.py index ea5dc02405..78f42d154b 100644 --- a/tests/tts_tests/test_vits_train.py +++ b/tests/tts_tests/test_vits_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.vits_config import VitsConfig diff --git a/tests/tts_tests2/test_align_tts_train.py b/tests/tts_tests2/test_align_tts_train.py index 9b0b730df4..91c3c35bc6 100644 --- a/tests/tts_tests2/test_align_tts_train.py +++ b/tests/tts_tests2/test_align_tts_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.align_tts_config import AlignTTSConfig diff --git a/tests/tts_tests2/test_delightful_tts_d-vectors_train.py b/tests/tts_tests2/test_delightful_tts_d-vectors_train.py index 8fc4ea7e9b..1e5cd49f73 100644 --- a/tests/tts_tests2/test_delightful_tts_d-vectors_train.py +++ b/tests/tts_tests2/test_delightful_tts_d-vectors_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.delightful_tts_config import DelightfulTtsAudioConfig, DelightfulTTSConfig diff --git a/tests/tts_tests2/test_delightful_tts_emb_spk.py b/tests/tts_tests2/test_delightful_tts_emb_spk.py index 6fb70c5f61..9bbf7a55ea 100644 --- a/tests/tts_tests2/test_delightful_tts_emb_spk.py +++ b/tests/tts_tests2/test_delightful_tts_emb_spk.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.delightful_tts_config import DelightfulTtsAudioConfig, DelightfulTTSConfig diff --git a/tests/tts_tests2/test_delightful_tts_train.py b/tests/tts_tests2/test_delightful_tts_train.py index a917d77657..3e6fbd2e86 100644 --- a/tests/tts_tests2/test_delightful_tts_train.py +++ b/tests/tts_tests2/test_delightful_tts_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseAudioConfig diff --git a/tests/tts_tests2/test_fast_pitch_speaker_emb_train.py b/tests/tts_tests2/test_fast_pitch_speaker_emb_train.py index 7f79bfcab2..e6bc9f9feb 100644 --- a/tests/tts_tests2/test_fast_pitch_speaker_emb_train.py +++ b/tests/tts_tests2/test_fast_pitch_speaker_emb_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseAudioConfig diff --git a/tests/tts_tests2/test_fast_pitch_train.py b/tests/tts_tests2/test_fast_pitch_train.py index a525715b53..fe87c8b600 100644 --- a/tests/tts_tests2/test_fast_pitch_train.py +++ b/tests/tts_tests2/test_fast_pitch_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseAudioConfig diff --git a/tests/tts_tests2/test_fastspeech_2_speaker_emb_train.py b/tests/tts_tests2/test_fastspeech_2_speaker_emb_train.py index 35bda597d5..735d2fc4c6 100644 --- a/tests/tts_tests2/test_fastspeech_2_speaker_emb_train.py +++ b/tests/tts_tests2/test_fastspeech_2_speaker_emb_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseAudioConfig diff --git a/tests/tts_tests2/test_fastspeech_2_train.py b/tests/tts_tests2/test_fastspeech_2_train.py index dd4b07d240..07fc5a1a2c 100644 --- a/tests/tts_tests2/test_fastspeech_2_train.py +++ b/tests/tts_tests2/test_fastspeech_2_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.config.shared_configs import BaseAudioConfig diff --git a/tests/tts_tests2/test_glow_tts_d-vectors_train.py b/tests/tts_tests2/test_glow_tts_d-vectors_train.py index f1cfd4368f..8236607c25 100644 --- a/tests/tts_tests2/test_glow_tts_d-vectors_train.py +++ b/tests/tts_tests2/test_glow_tts_d-vectors_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.glow_tts_config import GlowTTSConfig diff --git a/tests/tts_tests2/test_glow_tts_speaker_emb_train.py b/tests/tts_tests2/test_glow_tts_speaker_emb_train.py index b1eb6237a4..4a8bd0658d 100644 --- a/tests/tts_tests2/test_glow_tts_speaker_emb_train.py +++ b/tests/tts_tests2/test_glow_tts_speaker_emb_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.glow_tts_config import GlowTTSConfig diff --git a/tests/tts_tests2/test_glow_tts_train.py b/tests/tts_tests2/test_glow_tts_train.py index 0a8e226b65..1d7f913575 100644 --- a/tests/tts_tests2/test_glow_tts_train.py +++ b/tests/tts_tests2/test_glow_tts_train.py @@ -3,7 +3,7 @@ import os import shutil -from trainer import get_last_checkpoint +from trainer.io import get_last_checkpoint from tests import get_device_id, get_tests_output_path, run_cli from TTS.tts.configs.glow_tts_config import GlowTTSConfig From ce202532cfe74e2e297e4109a80a3b125f54bd49 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 2 Dec 2024 16:54:11 +0100 Subject: [PATCH 212/255] fix(xtts): clearer error message when file given to checkpoint_dir --- TTS/tts/models/xtts.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index 35de91e359..d780e2b323 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -2,6 +2,7 @@ import os from dataclasses import dataclass from pathlib import Path +from typing import Optional import librosa import torch @@ -10,6 +11,7 @@ from coqpit import Coqpit from trainer.io import load_fsspec +from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.layers.xtts.gpt import GPT from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder from TTS.tts.layers.xtts.stream_generator import init_stream_support @@ -719,14 +721,14 @@ def get_compatible_checkpoint_state_dict(self, model_path): def load_checkpoint( self, - config, - checkpoint_dir=None, - checkpoint_path=None, - vocab_path=None, - eval=True, - strict=True, - use_deepspeed=False, - speaker_file_path=None, + config: XttsConfig, + checkpoint_dir: Optional[str] = None, + checkpoint_path: Optional[str] = None, + vocab_path: Optional[str] = None, + eval: bool = True, + strict: bool = True, + use_deepspeed: bool = False, + speaker_file_path: Optional[str] = None, ): """ Loads a checkpoint from disk and initializes the model's state and tokenizer. @@ -742,7 +744,9 @@ def load_checkpoint( Returns: None """ - + if checkpoint_dir is not None and Path(checkpoint_dir).is_file(): + msg = f"You passed a file to `checkpoint_dir=`. Use `checkpoint_path={checkpoint_dir}` instead." + raise ValueError(msg) model_path = checkpoint_path or os.path.join(checkpoint_dir, "model.pth") if vocab_path is None: if checkpoint_dir is not None and (Path(checkpoint_dir) / "vocab.json").is_file(): From 6de98ff480f8f921dc8cb346477d4647702cc381 Mon Sep 17 00:00:00 2001 From: akulkarni Date: Thu, 13 Jun 2024 16:27:50 +0200 Subject: [PATCH 213/255] feat(openvoice): initial integration --- TTS/vc/modules/openvoice/__init__.py | 0 TTS/vc/modules/openvoice/attentions.py | 423 +++++++++++++++ TTS/vc/modules/openvoice/commons.py | 151 ++++++ TTS/vc/modules/openvoice/config.json | 57 ++ TTS/vc/modules/openvoice/models.py | 480 +++++++++++++++++ TTS/vc/modules/openvoice/modules.py | 588 +++++++++++++++++++++ TTS/vc/modules/openvoice/standalone_api.py | 342 ++++++++++++ TTS/vc/modules/openvoice/transforms.py | 203 +++++++ 8 files changed, 2244 insertions(+) create mode 100644 TTS/vc/modules/openvoice/__init__.py create mode 100644 TTS/vc/modules/openvoice/attentions.py create mode 100644 TTS/vc/modules/openvoice/commons.py create mode 100644 TTS/vc/modules/openvoice/config.json create mode 100644 TTS/vc/modules/openvoice/models.py create mode 100644 TTS/vc/modules/openvoice/modules.py create mode 100644 TTS/vc/modules/openvoice/standalone_api.py create mode 100644 TTS/vc/modules/openvoice/transforms.py diff --git a/TTS/vc/modules/openvoice/__init__.py b/TTS/vc/modules/openvoice/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/TTS/vc/modules/openvoice/attentions.py b/TTS/vc/modules/openvoice/attentions.py new file mode 100644 index 0000000000..73c5554c98 --- /dev/null +++ b/TTS/vc/modules/openvoice/attentions.py @@ -0,0 +1,423 @@ +import math + +import torch +from torch import nn +from torch.nn import functional as F + +from TTS.vc.modules.openvoice import commons + + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +class Encoder(nn.Module): + def __init__( + self, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size=1, + p_dropout=0.0, + window_size=4, + isflow=True, + **kwargs, + ): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.window_size = window_size + # if isflow: + # cond_layer = torch.nn.Conv1d(256, 2*hidden_channels*n_layers, 1) + # self.cond_pre = torch.nn.Conv1d(hidden_channels, 2*hidden_channels, 1) + # self.cond_layer = weight_norm(cond_layer, name='weight') + # self.gin_channels = 256 + self.cond_layer_idx = self.n_layers + if "gin_channels" in kwargs: + self.gin_channels = kwargs["gin_channels"] + if self.gin_channels != 0: + self.spk_emb_linear = nn.Linear(self.gin_channels, self.hidden_channels) + # vits2 says 3rd block, so idx is 2 by default + self.cond_layer_idx = kwargs["cond_layer_idx"] if "cond_layer_idx" in kwargs else 2 + # logging.debug(self.gin_channels, self.cond_layer_idx) + assert self.cond_layer_idx < self.n_layers, "cond_layer_idx should be less than n_layers" + self.drop = nn.Dropout(p_dropout) + self.attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + + for i in range(self.n_layers): + self.attn_layers.append( + MultiHeadAttention( + hidden_channels, + hidden_channels, + n_heads, + p_dropout=p_dropout, + window_size=window_size, + ) + ) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append( + FFN( + hidden_channels, + hidden_channels, + filter_channels, + kernel_size, + p_dropout=p_dropout, + ) + ) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask, g=None): + attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + if i == self.cond_layer_idx and g is not None: + g = self.spk_emb_linear(g.transpose(1, 2)) + g = g.transpose(1, 2) + x = x + g + x = x * x_mask + y = self.attn_layers[i](x, x, attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class Decoder(nn.Module): + def __init__( + self, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size=1, + p_dropout=0.0, + proximal_bias=False, + proximal_init=True, + **kwargs, + ): + super().__init__() + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + + self.drop = nn.Dropout(p_dropout) + self.self_attn_layers = nn.ModuleList() + self.norm_layers_0 = nn.ModuleList() + self.encdec_attn_layers = nn.ModuleList() + self.norm_layers_1 = nn.ModuleList() + self.ffn_layers = nn.ModuleList() + self.norm_layers_2 = nn.ModuleList() + for i in range(self.n_layers): + self.self_attn_layers.append( + MultiHeadAttention( + hidden_channels, + hidden_channels, + n_heads, + p_dropout=p_dropout, + proximal_bias=proximal_bias, + proximal_init=proximal_init, + ) + ) + self.norm_layers_0.append(LayerNorm(hidden_channels)) + self.encdec_attn_layers.append( + MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout) + ) + self.norm_layers_1.append(LayerNorm(hidden_channels)) + self.ffn_layers.append( + FFN( + hidden_channels, + hidden_channels, + filter_channels, + kernel_size, + p_dropout=p_dropout, + causal=True, + ) + ) + self.norm_layers_2.append(LayerNorm(hidden_channels)) + + def forward(self, x, x_mask, h, h_mask): + """ + x: decoder input + h: encoder output + """ + self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype) + encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1) + x = x * x_mask + for i in range(self.n_layers): + y = self.self_attn_layers[i](x, x, self_attn_mask) + y = self.drop(y) + x = self.norm_layers_0[i](x + y) + + y = self.encdec_attn_layers[i](x, h, encdec_attn_mask) + y = self.drop(y) + x = self.norm_layers_1[i](x + y) + + y = self.ffn_layers[i](x, x_mask) + y = self.drop(y) + x = self.norm_layers_2[i](x + y) + x = x * x_mask + return x + + +class MultiHeadAttention(nn.Module): + def __init__( + self, + channels, + out_channels, + n_heads, + p_dropout=0.0, + window_size=None, + heads_share=True, + block_length=None, + proximal_bias=False, + proximal_init=False, + ): + super().__init__() + assert channels % n_heads == 0 + + self.channels = channels + self.out_channels = out_channels + self.n_heads = n_heads + self.p_dropout = p_dropout + self.window_size = window_size + self.heads_share = heads_share + self.block_length = block_length + self.proximal_bias = proximal_bias + self.proximal_init = proximal_init + self.attn = None + + self.k_channels = channels // n_heads + self.conv_q = nn.Conv1d(channels, channels, 1) + self.conv_k = nn.Conv1d(channels, channels, 1) + self.conv_v = nn.Conv1d(channels, channels, 1) + self.conv_o = nn.Conv1d(channels, out_channels, 1) + self.drop = nn.Dropout(p_dropout) + + if window_size is not None: + n_heads_rel = 1 if heads_share else n_heads + rel_stddev = self.k_channels**-0.5 + self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev) + self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev) + + nn.init.xavier_uniform_(self.conv_q.weight) + nn.init.xavier_uniform_(self.conv_k.weight) + nn.init.xavier_uniform_(self.conv_v.weight) + if proximal_init: + with torch.no_grad(): + self.conv_k.weight.copy_(self.conv_q.weight) + self.conv_k.bias.copy_(self.conv_q.bias) + + def forward(self, x, c, attn_mask=None): + q = self.conv_q(x) + k = self.conv_k(c) + v = self.conv_v(c) + + x, self.attn = self.attention(q, k, v, mask=attn_mask) + + x = self.conv_o(x) + return x + + def attention(self, query, key, value, mask=None): + # reshape [b, d, t] -> [b, n_h, t, d_k] + b, d, t_s, t_t = (*key.size(), query.size(2)) + query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3) + key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) + + scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1)) + if self.window_size is not None: + assert t_s == t_t, "Relative attention is only available for self-attention." + key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) + rel_logits = self._matmul_with_relative_keys(query / math.sqrt(self.k_channels), key_relative_embeddings) + scores_local = self._relative_position_to_absolute_position(rel_logits) + scores = scores + scores_local + if self.proximal_bias: + assert t_s == t_t, "Proximal bias is only available for self-attention." + scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype) + if mask is not None: + scores = scores.masked_fill(mask == 0, -1e4) + if self.block_length is not None: + assert t_s == t_t, "Local attention is only available for self-attention." + block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length) + scores = scores.masked_fill(block_mask == 0, -1e4) + p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s] + p_attn = self.drop(p_attn) + output = torch.matmul(p_attn, value) + if self.window_size is not None: + relative_weights = self._absolute_position_to_relative_position(p_attn) + value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s) + output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings) + output = output.transpose(2, 3).contiguous().view(b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t] + return output, p_attn + + def _matmul_with_relative_values(self, x, y): + """ + x: [b, h, l, m] + y: [h or 1, m, d] + ret: [b, h, l, d] + """ + ret = torch.matmul(x, y.unsqueeze(0)) + return ret + + def _matmul_with_relative_keys(self, x, y): + """ + x: [b, h, l, d] + y: [h or 1, m, d] + ret: [b, h, l, m] + """ + ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) + return ret + + def _get_relative_embeddings(self, relative_embeddings, length): + 2 * self.window_size + 1 + # Pad first before slice to avoid using cond ops. + pad_length = max(length - (self.window_size + 1), 0) + slice_start_position = max((self.window_size + 1) - length, 0) + slice_end_position = slice_start_position + 2 * length - 1 + if pad_length > 0: + padded_relative_embeddings = F.pad( + relative_embeddings, + commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]), + ) + else: + padded_relative_embeddings = relative_embeddings + used_relative_embeddings = padded_relative_embeddings[:, slice_start_position:slice_end_position] + return used_relative_embeddings + + def _relative_position_to_absolute_position(self, x): + """ + x: [b, h, l, 2*l-1] + ret: [b, h, l, l] + """ + batch, heads, length, _ = x.size() + # Concat columns of pad to shift from relative to absolute indexing. + x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]])) + + # Concat extra elements so to add up to shape (len+1, 2*len-1). + x_flat = x.view([batch, heads, length * 2 * length]) + x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])) + + # Reshape and slice out the padded elements. + x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[:, :, :length, length - 1 :] + return x_final + + def _absolute_position_to_relative_position(self, x): + """ + x: [b, h, l, l] + ret: [b, h, l, 2*l-1] + """ + batch, heads, length, _ = x.size() + # pad along column + x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])) + x_flat = x.view([batch, heads, length**2 + length * (length - 1)]) + # add 0's in the beginning that will skew the elements after reshape + x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]])) + x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:] + return x_final + + def _attention_bias_proximal(self, length): + """Bias for self-attention to encourage attention to close positions. + Args: + length: an integer scalar. + Returns: + a Tensor with shape [1, 1, length, length] + """ + r = torch.arange(length, dtype=torch.float32) + diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) + return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0) + + +class FFN(nn.Module): + def __init__( + self, + in_channels, + out_channels, + filter_channels, + kernel_size, + p_dropout=0.0, + activation=None, + causal=False, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.activation = activation + self.causal = causal + + if causal: + self.padding = self._causal_padding + else: + self.padding = self._same_padding + + self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size) + self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size) + self.drop = nn.Dropout(p_dropout) + + def forward(self, x, x_mask): + x = self.conv_1(self.padding(x * x_mask)) + if self.activation == "gelu": + x = x * torch.sigmoid(1.702 * x) + else: + x = torch.relu(x) + x = self.drop(x) + x = self.conv_2(self.padding(x * x_mask)) + return x * x_mask + + def _causal_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = self.kernel_size - 1 + pad_r = 0 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x + + def _same_padding(self, x): + if self.kernel_size == 1: + return x + pad_l = (self.kernel_size - 1) // 2 + pad_r = self.kernel_size // 2 + padding = [[0, 0], [0, 0], [pad_l, pad_r]] + x = F.pad(x, commons.convert_pad_shape(padding)) + return x diff --git a/TTS/vc/modules/openvoice/commons.py b/TTS/vc/modules/openvoice/commons.py new file mode 100644 index 0000000000..123ee7e156 --- /dev/null +++ b/TTS/vc/modules/openvoice/commons.py @@ -0,0 +1,151 @@ +import math + +import torch +from torch.nn import functional as F + + +def init_weights(m, mean=0.0, std=0.01): + classname = m.__class__.__name__ + if classname.find("Conv") != -1: + m.weight.data.normal_(mean, std) + + +def get_padding(kernel_size, dilation=1): + return int((kernel_size * dilation - dilation) / 2) + + +def intersperse(lst, item): + result = [item] * (len(lst) * 2 + 1) + result[1::2] = lst + return result + + +def kl_divergence(m_p, logs_p, m_q, logs_q): + """KL(P||Q)""" + kl = (logs_q - logs_p) - 0.5 + kl += 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q) + return kl + + +def rand_gumbel(shape): + """Sample from the Gumbel distribution, protect from overflows.""" + uniform_samples = torch.rand(shape) * 0.99998 + 0.00001 + return -torch.log(-torch.log(uniform_samples)) + + +def rand_gumbel_like(x): + g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device) + return g + + +def slice_segments(x, ids_str, segment_size=4): + ret = torch.zeros_like(x[:, :, :segment_size]) + for i in range(x.size(0)): + idx_str = ids_str[i] + idx_end = idx_str + segment_size + ret[i] = x[i, :, idx_str:idx_end] + return ret + + +def rand_slice_segments(x, x_lengths=None, segment_size=4): + b, d, t = x.size() + if x_lengths is None: + x_lengths = t + ids_str_max = x_lengths - segment_size + 1 + ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) + ret = slice_segments(x, ids_str, segment_size) + return ret, ids_str + + +def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4): + position = torch.arange(length, dtype=torch.float) + num_timescales = channels // 2 + log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / (num_timescales - 1) + inv_timescales = min_timescale * torch.exp( + torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment + ) + scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1) + signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0) + signal = F.pad(signal, [0, 0, 0, channels % 2]) + signal = signal.view(1, channels, length) + return signal + + +def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return x + signal.to(dtype=x.dtype, device=x.device) + + +def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1): + b, channels, length = x.size() + signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) + return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis) + + +def subsequent_mask(length): + mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0) + return mask + + +@torch.jit.script +def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): + n_channels_int = n_channels[0] + in_act = input_a + input_b + t_act = torch.tanh(in_act[:, :n_channels_int, :]) + s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) + acts = t_act * s_act + return acts + + +def convert_pad_shape(pad_shape): + layer = pad_shape[::-1] + pad_shape = [item for sublist in layer for item in sublist] + return pad_shape + + +def shift_1d(x): + x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] + return x + + +def sequence_mask(length, max_length=None): + if max_length is None: + max_length = length.max() + x = torch.arange(max_length, dtype=length.dtype, device=length.device) + return x.unsqueeze(0) < length.unsqueeze(1) + + +def generate_path(duration, mask): + """ + duration: [b, 1, t_x] + mask: [b, 1, t_y, t_x] + """ + + b, _, t_y, t_x = mask.shape + cum_duration = torch.cumsum(duration, -1) + + cum_duration_flat = cum_duration.view(b * t_x) + path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) + path = path.view(b, t_x, t_y) + path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] + path = path.unsqueeze(1).transpose(2, 3) * mask + return path + + +def clip_grad_value_(parameters, clip_value, norm_type=2): + if isinstance(parameters, torch.Tensor): + parameters = [parameters] + parameters = list(filter(lambda p: p.grad is not None, parameters)) + norm_type = float(norm_type) + if clip_value is not None: + clip_value = float(clip_value) + + total_norm = 0 + for p in parameters: + param_norm = p.grad.data.norm(norm_type) + total_norm += param_norm.item() ** norm_type + if clip_value is not None: + p.grad.data.clamp_(min=-clip_value, max=clip_value) + total_norm = total_norm ** (1.0 / norm_type) + return total_norm diff --git a/TTS/vc/modules/openvoice/config.json b/TTS/vc/modules/openvoice/config.json new file mode 100644 index 0000000000..3e33566b0d --- /dev/null +++ b/TTS/vc/modules/openvoice/config.json @@ -0,0 +1,57 @@ +{ + "_version_": "v2", + "data": { + "sampling_rate": 22050, + "filter_length": 1024, + "hop_length": 256, + "win_length": 1024, + "n_speakers": 0 + }, + "model": { + "zero_g": true, + "inter_channels": 192, + "hidden_channels": 192, + "filter_channels": 768, + "n_heads": 2, + "n_layers": 6, + "kernel_size": 3, + "p_dropout": 0.1, + "resblock": "1", + "resblock_kernel_sizes": [ + 3, + 7, + 11 + ], + "resblock_dilation_sizes": [ + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ], + [ + 1, + 3, + 5 + ] + ], + "upsample_rates": [ + 8, + 8, + 2, + 2 + ], + "upsample_initial_channel": 512, + "upsample_kernel_sizes": [ + 16, + 16, + 4, + 4 + ], + "gin_channels": 256 + } +} \ No newline at end of file diff --git a/TTS/vc/modules/openvoice/models.py b/TTS/vc/modules/openvoice/models.py new file mode 100644 index 0000000000..c1ae7574ce --- /dev/null +++ b/TTS/vc/modules/openvoice/models.py @@ -0,0 +1,480 @@ +import math + +import torch +from torch import nn +from torch.nn import Conv1d, ConvTranspose1d +from torch.nn import functional as F +from torch.nn.utils import remove_weight_norm, weight_norm + +from TTS.vc.modules.openvoice import attentions, commons, modules +from TTS.vc.modules.openvoice.commons import init_weights + + +class TextEncoder(nn.Module): + def __init__( + self, n_vocab, out_channels, hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout + ): + super().__init__() + self.n_vocab = n_vocab + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.filter_channels = filter_channels + self.n_heads = n_heads + self.n_layers = n_layers + self.kernel_size = kernel_size + self.p_dropout = p_dropout + + self.emb = nn.Embedding(n_vocab, hidden_channels) + nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5) + + self.encoder = attentions.Encoder(hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths): + x = self.emb(x) * math.sqrt(self.hidden_channels) # [b, t, h] + x = torch.transpose(x, 1, -1) # [b, h, t] + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + + x = self.encoder(x * x_mask, x_mask) + stats = self.proj(x) * x_mask + + m, logs = torch.split(stats, self.out_channels, dim=1) + return x, m, logs, x_mask + + +class DurationPredictor(nn.Module): + def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, gin_channels=0): + super().__init__() + + self.in_channels = in_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.gin_channels = gin_channels + + self.drop = nn.Dropout(p_dropout) + self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size, padding=kernel_size // 2) + self.norm_1 = modules.LayerNorm(filter_channels) + self.conv_2 = nn.Conv1d(filter_channels, filter_channels, kernel_size, padding=kernel_size // 2) + self.norm_2 = modules.LayerNorm(filter_channels) + self.proj = nn.Conv1d(filter_channels, 1, 1) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, in_channels, 1) + + def forward(self, x, x_mask, g=None): + x = torch.detach(x) + if g is not None: + g = torch.detach(g) + x = x + self.cond(g) + x = self.conv_1(x * x_mask) + x = torch.relu(x) + x = self.norm_1(x) + x = self.drop(x) + x = self.conv_2(x * x_mask) + x = torch.relu(x) + x = self.norm_2(x) + x = self.drop(x) + x = self.proj(x * x_mask) + return x * x_mask + + +class StochasticDurationPredictor(nn.Module): + def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, n_flows=4, gin_channels=0): + super().__init__() + filter_channels = in_channels # it needs to be removed from future version. + self.in_channels = in_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.p_dropout = p_dropout + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.log_flow = modules.Log() + self.flows = nn.ModuleList() + self.flows.append(modules.ElementwiseAffine(2)) + for i in range(n_flows): + self.flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3)) + self.flows.append(modules.Flip()) + + self.post_pre = nn.Conv1d(1, filter_channels, 1) + self.post_proj = nn.Conv1d(filter_channels, filter_channels, 1) + self.post_convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout) + self.post_flows = nn.ModuleList() + self.post_flows.append(modules.ElementwiseAffine(2)) + for i in range(4): + self.post_flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3)) + self.post_flows.append(modules.Flip()) + + self.pre = nn.Conv1d(in_channels, filter_channels, 1) + self.proj = nn.Conv1d(filter_channels, filter_channels, 1) + self.convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout) + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, filter_channels, 1) + + def forward(self, x, x_mask, w=None, g=None, reverse=False, noise_scale=1.0): + x = torch.detach(x) + x = self.pre(x) + if g is not None: + g = torch.detach(g) + x = x + self.cond(g) + x = self.convs(x, x_mask) + x = self.proj(x) * x_mask + + if not reverse: + flows = self.flows + assert w is not None + + logdet_tot_q = 0 + h_w = self.post_pre(w) + h_w = self.post_convs(h_w, x_mask) + h_w = self.post_proj(h_w) * x_mask + e_q = torch.randn(w.size(0), 2, w.size(2)).to(device=x.device, dtype=x.dtype) * x_mask + z_q = e_q + for flow in self.post_flows: + z_q, logdet_q = flow(z_q, x_mask, g=(x + h_w)) + logdet_tot_q += logdet_q + z_u, z1 = torch.split(z_q, [1, 1], 1) + u = torch.sigmoid(z_u) * x_mask + z0 = (w - u) * x_mask + logdet_tot_q += torch.sum((F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1, 2]) + logq = torch.sum(-0.5 * (math.log(2 * math.pi) + (e_q**2)) * x_mask, [1, 2]) - logdet_tot_q + + logdet_tot = 0 + z0, logdet = self.log_flow(z0, x_mask) + logdet_tot += logdet + z = torch.cat([z0, z1], 1) + for flow in flows: + z, logdet = flow(z, x_mask, g=x, reverse=reverse) + logdet_tot = logdet_tot + logdet + nll = torch.sum(0.5 * (math.log(2 * math.pi) + (z**2)) * x_mask, [1, 2]) - logdet_tot + return nll + logq # [b] + else: + flows = list(reversed(self.flows)) + flows = flows[:-2] + [flows[-1]] # remove a useless vflow + z = torch.randn(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale + for flow in flows: + z = flow(z, x_mask, g=x, reverse=reverse) + z0, z1 = torch.split(z, [1, 1], 1) + logw = z0 + return logw + + +class PosteriorEncoder(nn.Module): + def __init__( + self, + in_channels, + out_channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + ): + super().__init__() + self.in_channels = in_channels + self.out_channels = out_channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + + self.pre = nn.Conv1d(in_channels, hidden_channels, 1) + self.enc = modules.WN( + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + ) + self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) + + def forward(self, x, x_lengths, g=None, tau=1.0): + x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) + x = self.pre(x) * x_mask + x = self.enc(x, x_mask, g=g) + stats = self.proj(x) * x_mask + m, logs = torch.split(stats, self.out_channels, dim=1) + z = (m + torch.randn_like(m) * tau * torch.exp(logs)) * x_mask + return z, m, logs, x_mask + + +class Generator(torch.nn.Module): + def __init__( + self, + initial_channel, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=0, + ): + super(Generator, self).__init__() + self.num_kernels = len(resblock_kernel_sizes) + self.num_upsamples = len(upsample_rates) + self.conv_pre = Conv1d(initial_channel, upsample_initial_channel, 7, 1, padding=3) + resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 + + self.ups = nn.ModuleList() + for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): + self.ups.append( + weight_norm( + ConvTranspose1d( + upsample_initial_channel // (2**i), + upsample_initial_channel // (2 ** (i + 1)), + k, + u, + padding=(k - u) // 2, + ) + ) + ) + + self.resblocks = nn.ModuleList() + for i in range(len(self.ups)): + ch = upsample_initial_channel // (2 ** (i + 1)) + for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): + self.resblocks.append(resblock(ch, k, d)) + + self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) + self.ups.apply(init_weights) + + if gin_channels != 0: + self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) + + def forward(self, x, g=None): + x = self.conv_pre(x) + if g is not None: + x = x + self.cond(g) + + for i in range(self.num_upsamples): + x = F.leaky_relu(x, modules.LRELU_SLOPE) + x = self.ups[i](x) + xs = None + for j in range(self.num_kernels): + if xs is None: + xs = self.resblocks[i * self.num_kernels + j](x) + else: + xs += self.resblocks[i * self.num_kernels + j](x) + x = xs / self.num_kernels + x = F.leaky_relu(x) + x = self.conv_post(x) + x = torch.tanh(x) + + return x + + def remove_weight_norm(self): + print("Removing weight norm...") + for layer in self.ups: + remove_weight_norm(layer) + for layer in self.resblocks: + layer.remove_weight_norm() + + +class ReferenceEncoder(nn.Module): + """ + inputs --- [N, Ty/r, n_mels*r] mels + outputs --- [N, ref_enc_gru_size] + """ + + def __init__(self, spec_channels, gin_channels=0, layernorm=True): + super().__init__() + self.spec_channels = spec_channels + ref_enc_filters = [32, 32, 64, 64, 128, 128] + K = len(ref_enc_filters) + filters = [1] + ref_enc_filters + convs = [ + weight_norm( + nn.Conv2d( + in_channels=filters[i], + out_channels=filters[i + 1], + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1), + ) + ) + for i in range(K) + ] + self.convs = nn.ModuleList(convs) + + out_channels = self.calculate_channels(spec_channels, 3, 2, 1, K) + self.gru = nn.GRU( + input_size=ref_enc_filters[-1] * out_channels, + hidden_size=256 // 2, + batch_first=True, + ) + self.proj = nn.Linear(128, gin_channels) + if layernorm: + self.layernorm = nn.LayerNorm(self.spec_channels) + else: + self.layernorm = None + + def forward(self, inputs, mask=None): + N = inputs.size(0) + + out = inputs.view(N, 1, -1, self.spec_channels) # [N, 1, Ty, n_freqs] + if self.layernorm is not None: + out = self.layernorm(out) + + for conv in self.convs: + out = conv(out) + # out = wn(out) + out = F.relu(out) # [N, 128, Ty//2^K, n_mels//2^K] + + out = out.transpose(1, 2) # [N, Ty//2^K, 128, n_mels//2^K] + T = out.size(1) + N = out.size(0) + out = out.contiguous().view(N, T, -1) # [N, Ty//2^K, 128*n_mels//2^K] + + self.gru.flatten_parameters() + memory, out = self.gru(out) # out --- [1, N, 128] + + return self.proj(out.squeeze(0)) + + def calculate_channels(self, L, kernel_size, stride, pad, n_convs): + for i in range(n_convs): + L = (L - kernel_size + 2 * pad) // stride + 1 + return L + + +class ResidualCouplingBlock(nn.Module): + def __init__(self, channels, hidden_channels, kernel_size, dilation_rate, n_layers, n_flows=4, gin_channels=0): + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.n_flows = n_flows + self.gin_channels = gin_channels + + self.flows = nn.ModuleList() + for i in range(n_flows): + self.flows.append( + modules.ResidualCouplingLayer( + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=gin_channels, + mean_only=True, + ) + ) + self.flows.append(modules.Flip()) + + def forward(self, x, x_mask, g=None, reverse=False): + if not reverse: + for flow in self.flows: + x, _ = flow(x, x_mask, g=g, reverse=reverse) + else: + for flow in reversed(self.flows): + x = flow(x, x_mask, g=g, reverse=reverse) + return x + + +class SynthesizerTrn(nn.Module): + """ + Synthesizer for Training + """ + + def __init__( + self, + n_vocab, + spec_channels, + inter_channels, + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + n_speakers=256, + gin_channels=256, + zero_g=False, + **kwargs, + ): + super().__init__() + + self.dec = Generator( + inter_channels, + resblock, + resblock_kernel_sizes, + resblock_dilation_sizes, + upsample_rates, + upsample_initial_channel, + upsample_kernel_sizes, + gin_channels=gin_channels, + ) + self.enc_q = PosteriorEncoder( + spec_channels, + inter_channels, + hidden_channels, + 5, + 1, + 16, + gin_channels=gin_channels, + ) + + self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels) + + self.n_speakers = n_speakers + if n_speakers == 0: + self.ref_enc = ReferenceEncoder(spec_channels, gin_channels) + else: + self.enc_p = TextEncoder( + n_vocab, inter_channels, hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout + ) + self.sdp = StochasticDurationPredictor(hidden_channels, 192, 3, 0.5, 4, gin_channels=gin_channels) + self.dp = DurationPredictor(hidden_channels, 256, 3, 0.5, gin_channels=gin_channels) + self.emb_g = nn.Embedding(n_speakers, gin_channels) + self.zero_g = zero_g + + def infer( + self, x, x_lengths, sid=None, noise_scale=1, length_scale=1, noise_scale_w=1.0, sdp_ratio=0.2, max_len=None + ): + x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths) + if self.n_speakers > 0: + g = self.emb_g(sid).unsqueeze(-1) # [b, h, 1] + else: + g = None + + logw = self.sdp(x, x_mask, g=g, reverse=True, noise_scale=noise_scale_w) * sdp_ratio + self.dp( + x, x_mask, g=g + ) * (1 - sdp_ratio) + + w = torch.exp(logw) * x_mask * length_scale + w_ceil = torch.ceil(w) + y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long() + y_mask = torch.unsqueeze(commons.sequence_mask(y_lengths, None), 1).to(x_mask.dtype) + attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1) + attn = commons.generate_path(w_ceil, attn_mask) + + m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t'] + logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose( + 1, 2 + ) # [b, t', t], [b, t, d] -> [b, d, t'] + + z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale + z = self.flow(z_p, y_mask, g=g, reverse=True) + o = self.dec((z * y_mask)[:, :, :max_len], g=g) + return o, attn, y_mask, (z, z_p, m_p, logs_p) + + def voice_conversion(self, y, y_lengths, sid_src, sid_tgt, tau=1.0): + g_src = sid_src + g_tgt = sid_tgt + z, m_q, logs_q, y_mask = self.enc_q( + y, y_lengths, g=g_src if not self.zero_g else torch.zeros_like(g_src), tau=tau + ) + z_p = self.flow(z, y_mask, g=g_src) + z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True) + o_hat = self.dec(z_hat * y_mask, g=g_tgt if not self.zero_g else torch.zeros_like(g_tgt)) + return o_hat, y_mask, (z, z_p, z_hat) diff --git a/TTS/vc/modules/openvoice/modules.py b/TTS/vc/modules/openvoice/modules.py new file mode 100644 index 0000000000..b3a60d5b12 --- /dev/null +++ b/TTS/vc/modules/openvoice/modules.py @@ -0,0 +1,588 @@ +import math + +import torch +from torch import nn +from torch.nn import Conv1d +from torch.nn import functional as F +from torch.nn.utils import remove_weight_norm, weight_norm + +from TTS.vc.modules.openvoice import commons +from TTS.vc.modules.openvoice.attentions import Encoder +from TTS.vc.modules.openvoice.commons import get_padding, init_weights +from TTS.vc.modules.openvoice.transforms import piecewise_rational_quadratic_transform + +LRELU_SLOPE = 0.1 + + +class LayerNorm(nn.Module): + def __init__(self, channels, eps=1e-5): + super().__init__() + self.channels = channels + self.eps = eps + + self.gamma = nn.Parameter(torch.ones(channels)) + self.beta = nn.Parameter(torch.zeros(channels)) + + def forward(self, x): + x = x.transpose(1, -1) + x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) + return x.transpose(1, -1) + + +class ConvReluNorm(nn.Module): + def __init__( + self, + in_channels, + hidden_channels, + out_channels, + kernel_size, + n_layers, + p_dropout, + ): + super().__init__() + self.in_channels = in_channels + self.hidden_channels = hidden_channels + self.out_channels = out_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + assert n_layers > 1, "Number of layers should be larger than 0." + + self.conv_layers = nn.ModuleList() + self.norm_layers = nn.ModuleList() + self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2)) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout)) + for _ in range(n_layers - 1): + self.conv_layers.append( + nn.Conv1d( + hidden_channels, + hidden_channels, + kernel_size, + padding=kernel_size // 2, + ) + ) + self.norm_layers.append(LayerNorm(hidden_channels)) + self.proj = nn.Conv1d(hidden_channels, out_channels, 1) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask): + x_org = x + for i in range(self.n_layers): + x = self.conv_layers[i](x * x_mask) + x = self.norm_layers[i](x) + x = self.relu_drop(x) + x = x_org + self.proj(x) + return x * x_mask + + +class DDSConv(nn.Module): + """ + Dilated and Depth-Separable Convolution + """ + + def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0): + super().__init__() + self.channels = channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.p_dropout = p_dropout + + self.drop = nn.Dropout(p_dropout) + self.convs_sep = nn.ModuleList() + self.convs_1x1 = nn.ModuleList() + self.norms_1 = nn.ModuleList() + self.norms_2 = nn.ModuleList() + for i in range(n_layers): + dilation = kernel_size**i + padding = (kernel_size * dilation - dilation) // 2 + self.convs_sep.append( + nn.Conv1d( + channels, + channels, + kernel_size, + groups=channels, + dilation=dilation, + padding=padding, + ) + ) + self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) + self.norms_1.append(LayerNorm(channels)) + self.norms_2.append(LayerNorm(channels)) + + def forward(self, x, x_mask, g=None): + if g is not None: + x = x + g + for i in range(self.n_layers): + y = self.convs_sep[i](x * x_mask) + y = self.norms_1[i](y) + y = F.gelu(y) + y = self.convs_1x1[i](y) + y = self.norms_2[i](y) + y = F.gelu(y) + y = self.drop(y) + x = x + y + return x * x_mask + + +class WN(torch.nn.Module): + def __init__( + self, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + gin_channels=0, + p_dropout=0, + ): + super(WN, self).__init__() + assert kernel_size % 2 == 1 + self.hidden_channels = hidden_channels + self.kernel_size = (kernel_size,) + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.gin_channels = gin_channels + self.p_dropout = p_dropout + + self.in_layers = torch.nn.ModuleList() + self.res_skip_layers = torch.nn.ModuleList() + self.drop = nn.Dropout(p_dropout) + + if gin_channels != 0: + cond_layer = torch.nn.Conv1d(gin_channels, 2 * hidden_channels * n_layers, 1) + self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight") + + for i in range(n_layers): + dilation = dilation_rate**i + padding = int((kernel_size * dilation - dilation) / 2) + in_layer = torch.nn.Conv1d( + hidden_channels, + 2 * hidden_channels, + kernel_size, + dilation=dilation, + padding=padding, + ) + in_layer = torch.nn.utils.weight_norm(in_layer, name="weight") + self.in_layers.append(in_layer) + + # last one is not necessary + if i < n_layers - 1: + res_skip_channels = 2 * hidden_channels + else: + res_skip_channels = hidden_channels + + res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) + res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight") + self.res_skip_layers.append(res_skip_layer) + + def forward(self, x, x_mask, g=None, **kwargs): + output = torch.zeros_like(x) + n_channels_tensor = torch.IntTensor([self.hidden_channels]) + + if g is not None: + g = self.cond_layer(g) + + for i in range(self.n_layers): + x_in = self.in_layers[i](x) + if g is not None: + cond_offset = i * 2 * self.hidden_channels + g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :] + else: + g_l = torch.zeros_like(x_in) + + acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) + acts = self.drop(acts) + + res_skip_acts = self.res_skip_layers[i](acts) + if i < self.n_layers - 1: + res_acts = res_skip_acts[:, : self.hidden_channels, :] + x = (x + res_acts) * x_mask + output = output + res_skip_acts[:, self.hidden_channels :, :] + else: + output = output + res_skip_acts + return output * x_mask + + def remove_weight_norm(self): + if self.gin_channels != 0: + torch.nn.utils.remove_weight_norm(self.cond_layer) + for l in self.in_layers: + torch.nn.utils.remove_weight_norm(l) + for l in self.res_skip_layers: + torch.nn.utils.remove_weight_norm(l) + + +class ResBlock1(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): + super(ResBlock1, self).__init__() + self.convs1 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[2], + padding=get_padding(kernel_size, dilation[2]), + ) + ), + ] + ) + self.convs1.apply(init_weights) + + self.convs2 = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=1, + padding=get_padding(kernel_size, 1), + ) + ), + ] + ) + self.convs2.apply(init_weights) + + def forward(self, x, x_mask=None): + for c1, c2 in zip(self.convs1, self.convs2): + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c1(xt) + xt = F.leaky_relu(xt, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c2(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs1: + remove_weight_norm(l) + for l in self.convs2: + remove_weight_norm(l) + + +class ResBlock2(torch.nn.Module): + def __init__(self, channels, kernel_size=3, dilation=(1, 3)): + super(ResBlock2, self).__init__() + self.convs = nn.ModuleList( + [ + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[0], + padding=get_padding(kernel_size, dilation[0]), + ) + ), + weight_norm( + Conv1d( + channels, + channels, + kernel_size, + 1, + dilation=dilation[1], + padding=get_padding(kernel_size, dilation[1]), + ) + ), + ] + ) + self.convs.apply(init_weights) + + def forward(self, x, x_mask=None): + for c in self.convs: + xt = F.leaky_relu(x, LRELU_SLOPE) + if x_mask is not None: + xt = xt * x_mask + xt = c(xt) + x = xt + x + if x_mask is not None: + x = x * x_mask + return x + + def remove_weight_norm(self): + for l in self.convs: + remove_weight_norm(l) + + +class Log(nn.Module): + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask + logdet = torch.sum(-y, [1, 2]) + return y, logdet + else: + x = torch.exp(x) * x_mask + return x + + +class Flip(nn.Module): + def forward(self, x, *args, reverse=False, **kwargs): + x = torch.flip(x, [1]) + if not reverse: + logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device) + return x, logdet + else: + return x + + +class ElementwiseAffine(nn.Module): + def __init__(self, channels): + super().__init__() + self.channels = channels + self.m = nn.Parameter(torch.zeros(channels, 1)) + self.logs = nn.Parameter(torch.zeros(channels, 1)) + + def forward(self, x, x_mask, reverse=False, **kwargs): + if not reverse: + y = self.m + torch.exp(self.logs) * x + y = y * x_mask + logdet = torch.sum(self.logs * x_mask, [1, 2]) + return y, logdet + else: + x = (x - self.m) * torch.exp(-self.logs) * x_mask + return x + + +class ResidualCouplingLayer(nn.Module): + def __init__( + self, + channels, + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + p_dropout=0, + gin_channels=0, + mean_only=False, + ): + assert channels % 2 == 0, "channels should be divisible by 2" + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.dilation_rate = dilation_rate + self.n_layers = n_layers + self.half_channels = channels // 2 + self.mean_only = mean_only + + self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) + self.enc = WN( + hidden_channels, + kernel_size, + dilation_rate, + n_layers, + p_dropout=p_dropout, + gin_channels=gin_channels, + ) + self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) + self.post.weight.data.zero_() + self.post.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels] * 2, 1) + h = self.pre(x0) * x_mask + h = self.enc(h, x_mask, g=g) + stats = self.post(h) * x_mask + if not self.mean_only: + m, logs = torch.split(stats, [self.half_channels] * 2, 1) + else: + m = stats + logs = torch.zeros_like(m) + + if not reverse: + x1 = m + x1 * torch.exp(logs) * x_mask + x = torch.cat([x0, x1], 1) + logdet = torch.sum(logs, [1, 2]) + return x, logdet + else: + x1 = (x1 - m) * torch.exp(-logs) * x_mask + x = torch.cat([x0, x1], 1) + return x + + +class ConvFlow(nn.Module): + def __init__( + self, + in_channels, + filter_channels, + kernel_size, + n_layers, + num_bins=10, + tail_bound=5.0, + ): + super().__init__() + self.in_channels = in_channels + self.filter_channels = filter_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.num_bins = num_bins + self.tail_bound = tail_bound + self.half_channels = in_channels // 2 + + self.pre = nn.Conv1d(self.half_channels, filter_channels, 1) + self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.0) + self.proj = nn.Conv1d(filter_channels, self.half_channels * (num_bins * 3 - 1), 1) + self.proj.weight.data.zero_() + self.proj.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels] * 2, 1) + h = self.pre(x0) + h = self.convs(h, x_mask, g=g) + h = self.proj(h) * x_mask + + b, c, t = x0.shape + h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) # [b, cx?, t] -> [b, c, t, ?] + + unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.filter_channels) + unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt(self.filter_channels) + unnormalized_derivatives = h[..., 2 * self.num_bins :] + + x1, logabsdet = piecewise_rational_quadratic_transform( + x1, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=reverse, + tails="linear", + tail_bound=self.tail_bound, + ) + + x = torch.cat([x0, x1], 1) * x_mask + logdet = torch.sum(logabsdet * x_mask, [1, 2]) + if not reverse: + return x, logdet + else: + return x + + +class TransformerCouplingLayer(nn.Module): + def __init__( + self, + channels, + hidden_channels, + kernel_size, + n_layers, + n_heads, + p_dropout=0, + filter_channels=0, + mean_only=False, + wn_sharing_parameter=None, + gin_channels=0, + ): + assert n_layers == 3, n_layers + assert channels % 2 == 0, "channels should be divisible by 2" + super().__init__() + self.channels = channels + self.hidden_channels = hidden_channels + self.kernel_size = kernel_size + self.n_layers = n_layers + self.half_channels = channels // 2 + self.mean_only = mean_only + + self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) + self.enc = ( + Encoder( + hidden_channels, + filter_channels, + n_heads, + n_layers, + kernel_size, + p_dropout, + isflow=True, + gin_channels=gin_channels, + ) + if wn_sharing_parameter is None + else wn_sharing_parameter + ) + self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) + self.post.weight.data.zero_() + self.post.bias.data.zero_() + + def forward(self, x, x_mask, g=None, reverse=False): + x0, x1 = torch.split(x, [self.half_channels] * 2, 1) + h = self.pre(x0) * x_mask + h = self.enc(h, x_mask, g=g) + stats = self.post(h) * x_mask + if not self.mean_only: + m, logs = torch.split(stats, [self.half_channels] * 2, 1) + else: + m = stats + logs = torch.zeros_like(m) + + if not reverse: + x1 = m + x1 * torch.exp(logs) * x_mask + x = torch.cat([x0, x1], 1) + logdet = torch.sum(logs, [1, 2]) + return x, logdet + else: + x1 = (x1 - m) * torch.exp(-logs) * x_mask + x = torch.cat([x0, x1], 1) + return x + + x1, logabsdet = piecewise_rational_quadratic_transform( + x1, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=reverse, + tails="linear", + tail_bound=self.tail_bound, + ) + + x = torch.cat([x0, x1], 1) * x_mask + logdet = torch.sum(logabsdet * x_mask, [1, 2]) + if not reverse: + return x, logdet + else: + return x diff --git a/TTS/vc/modules/openvoice/standalone_api.py b/TTS/vc/modules/openvoice/standalone_api.py new file mode 100644 index 0000000000..831fd4dc43 --- /dev/null +++ b/TTS/vc/modules/openvoice/standalone_api.py @@ -0,0 +1,342 @@ +import json +import os + +import librosa +import torch +import torch.utils.data +from librosa.filters import mel as librosa_mel_fn + +from TTS.vc.modules.openvoice.models import SynthesizerTrn + +# vc_checkpoint=model_path, vc_config=config_path, use_cuda=gpu) + +# vc_config.audio.output_sample_rate + + +class custom_sr_config: + """Class defined to make combatible sampling rate defination with TTS api.py. + + Args: + sampling rate. + """ + + def __init__(self, value): + self.audio = self.Audio(value) + + class Audio: + def __init__(self, value): + self.output_sample_rate = value + + +class OpenVoiceSynthesizer(object): + def __init__(self, vc_checkpoint, vc_config, use_cuda="cpu"): + + if use_cuda: + self.device = "cuda" + else: + self.device = "cpu" + + hps = get_hparams_from_file(vc_config) + self.vc_config = custom_sr_config(hps.data.sampling_rate) + + # vc_config.audio.output_sample_rate + self.model = SynthesizerTrn( + len(getattr(hps, "symbols", [])), + hps.data.filter_length // 2 + 1, + n_speakers=hps.data.n_speakers, + **hps.model, + ).to(torch.device(self.device)) + + self.hps = hps + self.load_ckpt(vc_checkpoint) + self.model.eval() + + def load_ckpt(self, ckpt_path): + checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) + a, b = self.model.load_state_dict(checkpoint_dict["model"], strict=False) + # print("Loaded checkpoint '{}'".format(ckpt_path)) + # print('missing/unexpected keys:', a, b) + + def extract_se(self, fpath): + audio_ref, sr = librosa.load(fpath, sr=self.hps.data.sampling_rate) + y = torch.FloatTensor(audio_ref) + y = y.to(self.device) + y = y.unsqueeze(0) + y = spectrogram_torch( + y, + self.hps.data.filter_length, + self.hps.data.sampling_rate, + self.hps.data.hop_length, + self.hps.data.win_length, + center=False, + ).to(self.device) + with torch.no_grad(): + g = self.model.ref_enc(y.transpose(1, 2)).unsqueeze(-1) + + return g + + # source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav" + def voice_conversion(self, source_wav, target_wav, tau=0.3, message="default"): + + if not os.path.exists(source_wav): + print("source wavpath dont exists") + exit(0) + + if not os.path.exists(target_wav): + print("target wavpath dont exists") + exit(0) + + src_se = self.extract_se(source_wav) + tgt_se = self.extract_se(target_wav) + + # load audio + audio, sample_rate = librosa.load(source_wav, sr=self.hps.data.sampling_rate) + audio = torch.tensor(audio).float() + + with torch.no_grad(): + y = torch.FloatTensor(audio).to(self.device) + y = y.unsqueeze(0) + spec = spectrogram_torch( + y, + self.hps.data.filter_length, + self.hps.data.sampling_rate, + self.hps.data.hop_length, + self.hps.data.win_length, + center=False, + ).to(self.device) + spec_lengths = torch.LongTensor([spec.size(-1)]).to(self.device) + audio = ( + self.model.voice_conversion(spec, spec_lengths, sid_src=src_se, sid_tgt=tgt_se, tau=tau)[0][0, 0] + .data.cpu() + .float() + .numpy() + ) + + return audio + + +def get_hparams_from_file(config_path): + with open(config_path, "r", encoding="utf-8") as f: + data = f.read() + config = json.loads(data) + + hparams = HParams(**config) + return hparams + + +class HParams: + def __init__(self, **kwargs): + for k, v in kwargs.items(): + if isinstance(v, dict): + v = HParams(**v) + self[k] = v + + def keys(self): + return self.__dict__.keys() + + def items(self): + return self.__dict__.items() + + def values(self): + return self.__dict__.values() + + def __len__(self): + return len(self.__dict__) + + def __getitem__(self, key): + return getattr(self, key) + + def __setitem__(self, key, value): + return setattr(self, key, value) + + def __contains__(self, key): + return key in self.__dict__ + + def __repr__(self): + return self.__dict__.__repr__() + + +MAX_WAV_VALUE = 32768.0 + + +def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): + """ + PARAMS + ------ + C: compression factor + """ + return torch.log(torch.clamp(x, min=clip_val) * C) + + +def dynamic_range_decompression_torch(x, C=1): + """ + PARAMS + ------ + C: compression factor used to compress + """ + return torch.exp(x) / C + + +def spectral_normalize_torch(magnitudes): + output = dynamic_range_compression_torch(magnitudes) + return output + + +def spectral_de_normalize_torch(magnitudes): + output = dynamic_range_decompression_torch(magnitudes) + return output + + +mel_basis = {} +hann_window = {} + + +def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): + if torch.min(y) < -1.1: + print("min value is ", torch.min(y)) + if torch.max(y) > 1.1: + print("max value is ", torch.max(y)) + + global hann_window + dtype_device = str(y.dtype) + "_" + str(y.device) + wnsize_dtype_device = str(win_size) + "_" + dtype_device + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad( + y.unsqueeze(1), + (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), + mode="reflect", + ) + y = y.squeeze(1) + + spec = torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=False, + ) + + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + return spec + + +def spectrogram_torch_conv(y, n_fft, sampling_rate, hop_size, win_size, center=False): + # if torch.min(y) < -1.: + # print('min value is ', torch.min(y)) + # if torch.max(y) > 1.: + # print('max value is ', torch.max(y)) + + global hann_window + dtype_device = str(y.dtype) + "_" + str(y.device) + wnsize_dtype_device = str(win_size) + "_" + dtype_device + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad( + y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode="reflect" + ) + + # ******************** original ************************# + # y = y.squeeze(1) + # spec1 = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], + # center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) + + # ******************** ConvSTFT ************************# + freq_cutoff = n_fft // 2 + 1 + fourier_basis = torch.view_as_real(torch.fft.fft(torch.eye(n_fft))) + forward_basis = fourier_basis[:freq_cutoff].permute(2, 0, 1).reshape(-1, 1, fourier_basis.shape[1]) + forward_basis = ( + forward_basis * torch.as_tensor(librosa.util.pad_center(torch.hann_window(win_size), size=n_fft)).float() + ) + + import torch.nn.functional as F + + # if center: + # signal = F.pad(y[:, None, None, :], (n_fft // 2, n_fft // 2, 0, 0), mode = 'reflect').squeeze(1) + assert center is False + + forward_transform_squared = F.conv1d(y, forward_basis.to(y.device), stride=hop_size) + spec2 = torch.stack( + [forward_transform_squared[:, :freq_cutoff, :], forward_transform_squared[:, freq_cutoff:, :]], dim=-1 + ) + + # ******************** Verification ************************# + spec1 = torch.stft( + y.squeeze(1), + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=False, + ) + assert torch.allclose(spec1, spec2, atol=1e-4) + + spec = torch.sqrt(spec2.pow(2).sum(-1) + 1e-6) + return spec + + +def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): + global mel_basis + dtype_device = str(spec.dtype) + "_" + str(spec.device) + fmax_dtype_device = str(fmax) + "_" + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) + spec = torch.matmul(mel_basis[fmax_dtype_device], spec) + spec = spectral_normalize_torch(spec) + return spec + + +def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): + if torch.min(y) < -1.0: + print("min value is ", torch.min(y)) + if torch.max(y) > 1.0: + print("max value is ", torch.max(y)) + + global mel_basis, hann_window + dtype_device = str(y.dtype) + "_" + str(y.device) + fmax_dtype_device = str(fmax) + "_" + dtype_device + wnsize_dtype_device = str(win_size) + "_" + dtype_device + if fmax_dtype_device not in mel_basis: + mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) + mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) + if wnsize_dtype_device not in hann_window: + hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) + + y = torch.nn.functional.pad( + y.unsqueeze(1), + (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), + mode="reflect", + ) + y = y.squeeze(1) + + spec = torch.stft( + y, + n_fft, + hop_length=hop_size, + win_length=win_size, + window=hann_window[wnsize_dtype_device], + center=center, + pad_mode="reflect", + normalized=False, + onesided=True, + return_complex=False, + ) + + spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) + + spec = torch.matmul(mel_basis[fmax_dtype_device], spec) + spec = spectral_normalize_torch(spec) + + return spec diff --git a/TTS/vc/modules/openvoice/transforms.py b/TTS/vc/modules/openvoice/transforms.py new file mode 100644 index 0000000000..4270ebae3f --- /dev/null +++ b/TTS/vc/modules/openvoice/transforms.py @@ -0,0 +1,203 @@ +import numpy as np +import torch +from torch.nn import functional as F + +DEFAULT_MIN_BIN_WIDTH = 1e-3 +DEFAULT_MIN_BIN_HEIGHT = 1e-3 +DEFAULT_MIN_DERIVATIVE = 1e-3 + + +def piecewise_rational_quadratic_transform( + inputs, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=False, + tails=None, + tail_bound=1.0, + min_bin_width=DEFAULT_MIN_BIN_WIDTH, + min_bin_height=DEFAULT_MIN_BIN_HEIGHT, + min_derivative=DEFAULT_MIN_DERIVATIVE, +): + if tails is None: + spline_fn = rational_quadratic_spline + spline_kwargs = {} + else: + spline_fn = unconstrained_rational_quadratic_spline + spline_kwargs = {"tails": tails, "tail_bound": tail_bound} + + outputs, logabsdet = spline_fn( + inputs=inputs, + unnormalized_widths=unnormalized_widths, + unnormalized_heights=unnormalized_heights, + unnormalized_derivatives=unnormalized_derivatives, + inverse=inverse, + min_bin_width=min_bin_width, + min_bin_height=min_bin_height, + min_derivative=min_derivative, + **spline_kwargs, + ) + return outputs, logabsdet + + +def searchsorted(bin_locations, inputs, eps=1e-6): + bin_locations[..., -1] += eps + return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1 + + +def unconstrained_rational_quadratic_spline( + inputs, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=False, + tails="linear", + tail_bound=1.0, + min_bin_width=DEFAULT_MIN_BIN_WIDTH, + min_bin_height=DEFAULT_MIN_BIN_HEIGHT, + min_derivative=DEFAULT_MIN_DERIVATIVE, +): + inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound) + outside_interval_mask = ~inside_interval_mask + + outputs = torch.zeros_like(inputs) + logabsdet = torch.zeros_like(inputs) + + if tails == "linear": + unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1)) + constant = np.log(np.exp(1 - min_derivative) - 1) + unnormalized_derivatives[..., 0] = constant + unnormalized_derivatives[..., -1] = constant + + outputs[outside_interval_mask] = inputs[outside_interval_mask] + logabsdet[outside_interval_mask] = 0 + else: + raise RuntimeError("{} tails are not implemented.".format(tails)) + + ( + outputs[inside_interval_mask], + logabsdet[inside_interval_mask], + ) = rational_quadratic_spline( + inputs=inputs[inside_interval_mask], + unnormalized_widths=unnormalized_widths[inside_interval_mask, :], + unnormalized_heights=unnormalized_heights[inside_interval_mask, :], + unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :], + inverse=inverse, + left=-tail_bound, + right=tail_bound, + bottom=-tail_bound, + top=tail_bound, + min_bin_width=min_bin_width, + min_bin_height=min_bin_height, + min_derivative=min_derivative, + ) + + return outputs, logabsdet + + +def rational_quadratic_spline( + inputs, + unnormalized_widths, + unnormalized_heights, + unnormalized_derivatives, + inverse=False, + left=0.0, + right=1.0, + bottom=0.0, + top=1.0, + min_bin_width=DEFAULT_MIN_BIN_WIDTH, + min_bin_height=DEFAULT_MIN_BIN_HEIGHT, + min_derivative=DEFAULT_MIN_DERIVATIVE, +): + if torch.min(inputs) < left or torch.max(inputs) > right: + raise ValueError("Input to a transform is not within its domain") + + num_bins = unnormalized_widths.shape[-1] + + if min_bin_width * num_bins > 1.0: + raise ValueError("Minimal bin width too large for the number of bins") + if min_bin_height * num_bins > 1.0: + raise ValueError("Minimal bin height too large for the number of bins") + + widths = F.softmax(unnormalized_widths, dim=-1) + widths = min_bin_width + (1 - min_bin_width * num_bins) * widths + cumwidths = torch.cumsum(widths, dim=-1) + cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0) + cumwidths = (right - left) * cumwidths + left + cumwidths[..., 0] = left + cumwidths[..., -1] = right + widths = cumwidths[..., 1:] - cumwidths[..., :-1] + + derivatives = min_derivative + F.softplus(unnormalized_derivatives) + + heights = F.softmax(unnormalized_heights, dim=-1) + heights = min_bin_height + (1 - min_bin_height * num_bins) * heights + cumheights = torch.cumsum(heights, dim=-1) + cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0) + cumheights = (top - bottom) * cumheights + bottom + cumheights[..., 0] = bottom + cumheights[..., -1] = top + heights = cumheights[..., 1:] - cumheights[..., :-1] + + if inverse: + bin_idx = searchsorted(cumheights, inputs)[..., None] + else: + bin_idx = searchsorted(cumwidths, inputs)[..., None] + + input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0] + input_bin_widths = widths.gather(-1, bin_idx)[..., 0] + + input_cumheights = cumheights.gather(-1, bin_idx)[..., 0] + delta = heights / widths + input_delta = delta.gather(-1, bin_idx)[..., 0] + + input_derivatives = derivatives.gather(-1, bin_idx)[..., 0] + input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0] + + input_heights = heights.gather(-1, bin_idx)[..., 0] + + if inverse: + a = (inputs - input_cumheights) * ( + input_derivatives + input_derivatives_plus_one - 2 * input_delta + ) + input_heights * (input_delta - input_derivatives) + b = input_heights * input_derivatives - (inputs - input_cumheights) * ( + input_derivatives + input_derivatives_plus_one - 2 * input_delta + ) + c = -input_delta * (inputs - input_cumheights) + + discriminant = b.pow(2) - 4 * a * c + assert (discriminant >= 0).all() + + root = (2 * c) / (-b - torch.sqrt(discriminant)) + outputs = root * input_bin_widths + input_cumwidths + + theta_one_minus_theta = root * (1 - root) + denominator = input_delta + ( + (input_derivatives + input_derivatives_plus_one - 2 * input_delta) * theta_one_minus_theta + ) + derivative_numerator = input_delta.pow(2) * ( + input_derivatives_plus_one * root.pow(2) + + 2 * input_delta * theta_one_minus_theta + + input_derivatives * (1 - root).pow(2) + ) + logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) + + return outputs, -logabsdet + else: + theta = (inputs - input_cumwidths) / input_bin_widths + theta_one_minus_theta = theta * (1 - theta) + + numerator = input_heights * (input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta) + denominator = input_delta + ( + (input_derivatives + input_derivatives_plus_one - 2 * input_delta) * theta_one_minus_theta + ) + outputs = input_cumheights + numerator / denominator + + derivative_numerator = input_delta.pow(2) * ( + input_derivatives_plus_one * theta.pow(2) + + 2 * input_delta * theta_one_minus_theta + + input_derivatives * (1 - theta).pow(2) + ) + logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) + + return outputs, logabsdet From 4124b9d663b4eea5e7034e96351fe5d4180cfb89 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 25 Jun 2024 22:28:41 +0200 Subject: [PATCH 214/255] feat(vits): add tau parameter to posterior encoder --- TTS/tts/layers/vits/networks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/TTS/tts/layers/vits/networks.py b/TTS/tts/layers/vits/networks.py index 50ed1024de..ab2ca5667a 100644 --- a/TTS/tts/layers/vits/networks.py +++ b/TTS/tts/layers/vits/networks.py @@ -256,7 +256,7 @@ def __init__( ) self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) - def forward(self, x, x_lengths, g=None): + def forward(self, x, x_lengths, g=None, tau=1.0): """ Shapes: - x: :math:`[B, C, T]` @@ -268,5 +268,5 @@ def forward(self, x, x_lengths, g=None): x = self.enc(x, x_mask, g=g) stats = self.proj(x) * x_mask mean, log_scale = torch.split(stats, self.out_channels, dim=1) - z = (mean + torch.randn_like(mean) * torch.exp(log_scale)) * x_mask + z = (mean + torch.randn_like(mean) * tau * torch.exp(log_scale)) * x_mask return z, mean, log_scale, x_mask From b97d5378a534acd7fa57f49cde955bec2e1a8085 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 20 Jun 2024 12:42:25 +0200 Subject: [PATCH 215/255] refactor(openvoice): remove duplicate and unused code --- TTS/vc/modules/openvoice/attentions.py | 423 --------------- TTS/vc/modules/openvoice/commons.py | 151 ------ TTS/vc/modules/openvoice/config.json | 57 -- TTS/vc/modules/openvoice/models.py | 368 +------------ TTS/vc/modules/openvoice/modules.py | 588 --------------------- TTS/vc/modules/openvoice/standalone_api.py | 342 ------------ TTS/vc/modules/openvoice/transforms.py | 203 ------- 7 files changed, 11 insertions(+), 2121 deletions(-) delete mode 100644 TTS/vc/modules/openvoice/attentions.py delete mode 100644 TTS/vc/modules/openvoice/commons.py delete mode 100644 TTS/vc/modules/openvoice/config.json delete mode 100644 TTS/vc/modules/openvoice/modules.py delete mode 100644 TTS/vc/modules/openvoice/standalone_api.py delete mode 100644 TTS/vc/modules/openvoice/transforms.py diff --git a/TTS/vc/modules/openvoice/attentions.py b/TTS/vc/modules/openvoice/attentions.py deleted file mode 100644 index 73c5554c98..0000000000 --- a/TTS/vc/modules/openvoice/attentions.py +++ /dev/null @@ -1,423 +0,0 @@ -import math - -import torch -from torch import nn -from torch.nn import functional as F - -from TTS.vc.modules.openvoice import commons - - -class LayerNorm(nn.Module): - def __init__(self, channels, eps=1e-5): - super().__init__() - self.channels = channels - self.eps = eps - - self.gamma = nn.Parameter(torch.ones(channels)) - self.beta = nn.Parameter(torch.zeros(channels)) - - def forward(self, x): - x = x.transpose(1, -1) - x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) - return x.transpose(1, -1) - - -@torch.jit.script -def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): - n_channels_int = n_channels[0] - in_act = input_a + input_b - t_act = torch.tanh(in_act[:, :n_channels_int, :]) - s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) - acts = t_act * s_act - return acts - - -class Encoder(nn.Module): - def __init__( - self, - hidden_channels, - filter_channels, - n_heads, - n_layers, - kernel_size=1, - p_dropout=0.0, - window_size=4, - isflow=True, - **kwargs, - ): - super().__init__() - self.hidden_channels = hidden_channels - self.filter_channels = filter_channels - self.n_heads = n_heads - self.n_layers = n_layers - self.kernel_size = kernel_size - self.p_dropout = p_dropout - self.window_size = window_size - # if isflow: - # cond_layer = torch.nn.Conv1d(256, 2*hidden_channels*n_layers, 1) - # self.cond_pre = torch.nn.Conv1d(hidden_channels, 2*hidden_channels, 1) - # self.cond_layer = weight_norm(cond_layer, name='weight') - # self.gin_channels = 256 - self.cond_layer_idx = self.n_layers - if "gin_channels" in kwargs: - self.gin_channels = kwargs["gin_channels"] - if self.gin_channels != 0: - self.spk_emb_linear = nn.Linear(self.gin_channels, self.hidden_channels) - # vits2 says 3rd block, so idx is 2 by default - self.cond_layer_idx = kwargs["cond_layer_idx"] if "cond_layer_idx" in kwargs else 2 - # logging.debug(self.gin_channels, self.cond_layer_idx) - assert self.cond_layer_idx < self.n_layers, "cond_layer_idx should be less than n_layers" - self.drop = nn.Dropout(p_dropout) - self.attn_layers = nn.ModuleList() - self.norm_layers_1 = nn.ModuleList() - self.ffn_layers = nn.ModuleList() - self.norm_layers_2 = nn.ModuleList() - - for i in range(self.n_layers): - self.attn_layers.append( - MultiHeadAttention( - hidden_channels, - hidden_channels, - n_heads, - p_dropout=p_dropout, - window_size=window_size, - ) - ) - self.norm_layers_1.append(LayerNorm(hidden_channels)) - self.ffn_layers.append( - FFN( - hidden_channels, - hidden_channels, - filter_channels, - kernel_size, - p_dropout=p_dropout, - ) - ) - self.norm_layers_2.append(LayerNorm(hidden_channels)) - - def forward(self, x, x_mask, g=None): - attn_mask = x_mask.unsqueeze(2) * x_mask.unsqueeze(-1) - x = x * x_mask - for i in range(self.n_layers): - if i == self.cond_layer_idx and g is not None: - g = self.spk_emb_linear(g.transpose(1, 2)) - g = g.transpose(1, 2) - x = x + g - x = x * x_mask - y = self.attn_layers[i](x, x, attn_mask) - y = self.drop(y) - x = self.norm_layers_1[i](x + y) - - y = self.ffn_layers[i](x, x_mask) - y = self.drop(y) - x = self.norm_layers_2[i](x + y) - x = x * x_mask - return x - - -class Decoder(nn.Module): - def __init__( - self, - hidden_channels, - filter_channels, - n_heads, - n_layers, - kernel_size=1, - p_dropout=0.0, - proximal_bias=False, - proximal_init=True, - **kwargs, - ): - super().__init__() - self.hidden_channels = hidden_channels - self.filter_channels = filter_channels - self.n_heads = n_heads - self.n_layers = n_layers - self.kernel_size = kernel_size - self.p_dropout = p_dropout - self.proximal_bias = proximal_bias - self.proximal_init = proximal_init - - self.drop = nn.Dropout(p_dropout) - self.self_attn_layers = nn.ModuleList() - self.norm_layers_0 = nn.ModuleList() - self.encdec_attn_layers = nn.ModuleList() - self.norm_layers_1 = nn.ModuleList() - self.ffn_layers = nn.ModuleList() - self.norm_layers_2 = nn.ModuleList() - for i in range(self.n_layers): - self.self_attn_layers.append( - MultiHeadAttention( - hidden_channels, - hidden_channels, - n_heads, - p_dropout=p_dropout, - proximal_bias=proximal_bias, - proximal_init=proximal_init, - ) - ) - self.norm_layers_0.append(LayerNorm(hidden_channels)) - self.encdec_attn_layers.append( - MultiHeadAttention(hidden_channels, hidden_channels, n_heads, p_dropout=p_dropout) - ) - self.norm_layers_1.append(LayerNorm(hidden_channels)) - self.ffn_layers.append( - FFN( - hidden_channels, - hidden_channels, - filter_channels, - kernel_size, - p_dropout=p_dropout, - causal=True, - ) - ) - self.norm_layers_2.append(LayerNorm(hidden_channels)) - - def forward(self, x, x_mask, h, h_mask): - """ - x: decoder input - h: encoder output - """ - self_attn_mask = commons.subsequent_mask(x_mask.size(2)).to(device=x.device, dtype=x.dtype) - encdec_attn_mask = h_mask.unsqueeze(2) * x_mask.unsqueeze(-1) - x = x * x_mask - for i in range(self.n_layers): - y = self.self_attn_layers[i](x, x, self_attn_mask) - y = self.drop(y) - x = self.norm_layers_0[i](x + y) - - y = self.encdec_attn_layers[i](x, h, encdec_attn_mask) - y = self.drop(y) - x = self.norm_layers_1[i](x + y) - - y = self.ffn_layers[i](x, x_mask) - y = self.drop(y) - x = self.norm_layers_2[i](x + y) - x = x * x_mask - return x - - -class MultiHeadAttention(nn.Module): - def __init__( - self, - channels, - out_channels, - n_heads, - p_dropout=0.0, - window_size=None, - heads_share=True, - block_length=None, - proximal_bias=False, - proximal_init=False, - ): - super().__init__() - assert channels % n_heads == 0 - - self.channels = channels - self.out_channels = out_channels - self.n_heads = n_heads - self.p_dropout = p_dropout - self.window_size = window_size - self.heads_share = heads_share - self.block_length = block_length - self.proximal_bias = proximal_bias - self.proximal_init = proximal_init - self.attn = None - - self.k_channels = channels // n_heads - self.conv_q = nn.Conv1d(channels, channels, 1) - self.conv_k = nn.Conv1d(channels, channels, 1) - self.conv_v = nn.Conv1d(channels, channels, 1) - self.conv_o = nn.Conv1d(channels, out_channels, 1) - self.drop = nn.Dropout(p_dropout) - - if window_size is not None: - n_heads_rel = 1 if heads_share else n_heads - rel_stddev = self.k_channels**-0.5 - self.emb_rel_k = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev) - self.emb_rel_v = nn.Parameter(torch.randn(n_heads_rel, window_size * 2 + 1, self.k_channels) * rel_stddev) - - nn.init.xavier_uniform_(self.conv_q.weight) - nn.init.xavier_uniform_(self.conv_k.weight) - nn.init.xavier_uniform_(self.conv_v.weight) - if proximal_init: - with torch.no_grad(): - self.conv_k.weight.copy_(self.conv_q.weight) - self.conv_k.bias.copy_(self.conv_q.bias) - - def forward(self, x, c, attn_mask=None): - q = self.conv_q(x) - k = self.conv_k(c) - v = self.conv_v(c) - - x, self.attn = self.attention(q, k, v, mask=attn_mask) - - x = self.conv_o(x) - return x - - def attention(self, query, key, value, mask=None): - # reshape [b, d, t] -> [b, n_h, t, d_k] - b, d, t_s, t_t = (*key.size(), query.size(2)) - query = query.view(b, self.n_heads, self.k_channels, t_t).transpose(2, 3) - key = key.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) - value = value.view(b, self.n_heads, self.k_channels, t_s).transpose(2, 3) - - scores = torch.matmul(query / math.sqrt(self.k_channels), key.transpose(-2, -1)) - if self.window_size is not None: - assert t_s == t_t, "Relative attention is only available for self-attention." - key_relative_embeddings = self._get_relative_embeddings(self.emb_rel_k, t_s) - rel_logits = self._matmul_with_relative_keys(query / math.sqrt(self.k_channels), key_relative_embeddings) - scores_local = self._relative_position_to_absolute_position(rel_logits) - scores = scores + scores_local - if self.proximal_bias: - assert t_s == t_t, "Proximal bias is only available for self-attention." - scores = scores + self._attention_bias_proximal(t_s).to(device=scores.device, dtype=scores.dtype) - if mask is not None: - scores = scores.masked_fill(mask == 0, -1e4) - if self.block_length is not None: - assert t_s == t_t, "Local attention is only available for self-attention." - block_mask = torch.ones_like(scores).triu(-self.block_length).tril(self.block_length) - scores = scores.masked_fill(block_mask == 0, -1e4) - p_attn = F.softmax(scores, dim=-1) # [b, n_h, t_t, t_s] - p_attn = self.drop(p_attn) - output = torch.matmul(p_attn, value) - if self.window_size is not None: - relative_weights = self._absolute_position_to_relative_position(p_attn) - value_relative_embeddings = self._get_relative_embeddings(self.emb_rel_v, t_s) - output = output + self._matmul_with_relative_values(relative_weights, value_relative_embeddings) - output = output.transpose(2, 3).contiguous().view(b, d, t_t) # [b, n_h, t_t, d_k] -> [b, d, t_t] - return output, p_attn - - def _matmul_with_relative_values(self, x, y): - """ - x: [b, h, l, m] - y: [h or 1, m, d] - ret: [b, h, l, d] - """ - ret = torch.matmul(x, y.unsqueeze(0)) - return ret - - def _matmul_with_relative_keys(self, x, y): - """ - x: [b, h, l, d] - y: [h or 1, m, d] - ret: [b, h, l, m] - """ - ret = torch.matmul(x, y.unsqueeze(0).transpose(-2, -1)) - return ret - - def _get_relative_embeddings(self, relative_embeddings, length): - 2 * self.window_size + 1 - # Pad first before slice to avoid using cond ops. - pad_length = max(length - (self.window_size + 1), 0) - slice_start_position = max((self.window_size + 1) - length, 0) - slice_end_position = slice_start_position + 2 * length - 1 - if pad_length > 0: - padded_relative_embeddings = F.pad( - relative_embeddings, - commons.convert_pad_shape([[0, 0], [pad_length, pad_length], [0, 0]]), - ) - else: - padded_relative_embeddings = relative_embeddings - used_relative_embeddings = padded_relative_embeddings[:, slice_start_position:slice_end_position] - return used_relative_embeddings - - def _relative_position_to_absolute_position(self, x): - """ - x: [b, h, l, 2*l-1] - ret: [b, h, l, l] - """ - batch, heads, length, _ = x.size() - # Concat columns of pad to shift from relative to absolute indexing. - x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, 1]])) - - # Concat extra elements so to add up to shape (len+1, 2*len-1). - x_flat = x.view([batch, heads, length * 2 * length]) - x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [0, length - 1]])) - - # Reshape and slice out the padded elements. - x_final = x_flat.view([batch, heads, length + 1, 2 * length - 1])[:, :, :length, length - 1 :] - return x_final - - def _absolute_position_to_relative_position(self, x): - """ - x: [b, h, l, l] - ret: [b, h, l, 2*l-1] - """ - batch, heads, length, _ = x.size() - # pad along column - x = F.pad(x, commons.convert_pad_shape([[0, 0], [0, 0], [0, 0], [0, length - 1]])) - x_flat = x.view([batch, heads, length**2 + length * (length - 1)]) - # add 0's in the beginning that will skew the elements after reshape - x_flat = F.pad(x_flat, commons.convert_pad_shape([[0, 0], [0, 0], [length, 0]])) - x_final = x_flat.view([batch, heads, length, 2 * length])[:, :, :, 1:] - return x_final - - def _attention_bias_proximal(self, length): - """Bias for self-attention to encourage attention to close positions. - Args: - length: an integer scalar. - Returns: - a Tensor with shape [1, 1, length, length] - """ - r = torch.arange(length, dtype=torch.float32) - diff = torch.unsqueeze(r, 0) - torch.unsqueeze(r, 1) - return torch.unsqueeze(torch.unsqueeze(-torch.log1p(torch.abs(diff)), 0), 0) - - -class FFN(nn.Module): - def __init__( - self, - in_channels, - out_channels, - filter_channels, - kernel_size, - p_dropout=0.0, - activation=None, - causal=False, - ): - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.filter_channels = filter_channels - self.kernel_size = kernel_size - self.p_dropout = p_dropout - self.activation = activation - self.causal = causal - - if causal: - self.padding = self._causal_padding - else: - self.padding = self._same_padding - - self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size) - self.conv_2 = nn.Conv1d(filter_channels, out_channels, kernel_size) - self.drop = nn.Dropout(p_dropout) - - def forward(self, x, x_mask): - x = self.conv_1(self.padding(x * x_mask)) - if self.activation == "gelu": - x = x * torch.sigmoid(1.702 * x) - else: - x = torch.relu(x) - x = self.drop(x) - x = self.conv_2(self.padding(x * x_mask)) - return x * x_mask - - def _causal_padding(self, x): - if self.kernel_size == 1: - return x - pad_l = self.kernel_size - 1 - pad_r = 0 - padding = [[0, 0], [0, 0], [pad_l, pad_r]] - x = F.pad(x, commons.convert_pad_shape(padding)) - return x - - def _same_padding(self, x): - if self.kernel_size == 1: - return x - pad_l = (self.kernel_size - 1) // 2 - pad_r = self.kernel_size // 2 - padding = [[0, 0], [0, 0], [pad_l, pad_r]] - x = F.pad(x, commons.convert_pad_shape(padding)) - return x diff --git a/TTS/vc/modules/openvoice/commons.py b/TTS/vc/modules/openvoice/commons.py deleted file mode 100644 index 123ee7e156..0000000000 --- a/TTS/vc/modules/openvoice/commons.py +++ /dev/null @@ -1,151 +0,0 @@ -import math - -import torch -from torch.nn import functional as F - - -def init_weights(m, mean=0.0, std=0.01): - classname = m.__class__.__name__ - if classname.find("Conv") != -1: - m.weight.data.normal_(mean, std) - - -def get_padding(kernel_size, dilation=1): - return int((kernel_size * dilation - dilation) / 2) - - -def intersperse(lst, item): - result = [item] * (len(lst) * 2 + 1) - result[1::2] = lst - return result - - -def kl_divergence(m_p, logs_p, m_q, logs_q): - """KL(P||Q)""" - kl = (logs_q - logs_p) - 0.5 - kl += 0.5 * (torch.exp(2.0 * logs_p) + ((m_p - m_q) ** 2)) * torch.exp(-2.0 * logs_q) - return kl - - -def rand_gumbel(shape): - """Sample from the Gumbel distribution, protect from overflows.""" - uniform_samples = torch.rand(shape) * 0.99998 + 0.00001 - return -torch.log(-torch.log(uniform_samples)) - - -def rand_gumbel_like(x): - g = rand_gumbel(x.size()).to(dtype=x.dtype, device=x.device) - return g - - -def slice_segments(x, ids_str, segment_size=4): - ret = torch.zeros_like(x[:, :, :segment_size]) - for i in range(x.size(0)): - idx_str = ids_str[i] - idx_end = idx_str + segment_size - ret[i] = x[i, :, idx_str:idx_end] - return ret - - -def rand_slice_segments(x, x_lengths=None, segment_size=4): - b, d, t = x.size() - if x_lengths is None: - x_lengths = t - ids_str_max = x_lengths - segment_size + 1 - ids_str = (torch.rand([b]).to(device=x.device) * ids_str_max).to(dtype=torch.long) - ret = slice_segments(x, ids_str, segment_size) - return ret, ids_str - - -def get_timing_signal_1d(length, channels, min_timescale=1.0, max_timescale=1.0e4): - position = torch.arange(length, dtype=torch.float) - num_timescales = channels // 2 - log_timescale_increment = math.log(float(max_timescale) / float(min_timescale)) / (num_timescales - 1) - inv_timescales = min_timescale * torch.exp( - torch.arange(num_timescales, dtype=torch.float) * -log_timescale_increment - ) - scaled_time = position.unsqueeze(0) * inv_timescales.unsqueeze(1) - signal = torch.cat([torch.sin(scaled_time), torch.cos(scaled_time)], 0) - signal = F.pad(signal, [0, 0, 0, channels % 2]) - signal = signal.view(1, channels, length) - return signal - - -def add_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4): - b, channels, length = x.size() - signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) - return x + signal.to(dtype=x.dtype, device=x.device) - - -def cat_timing_signal_1d(x, min_timescale=1.0, max_timescale=1.0e4, axis=1): - b, channels, length = x.size() - signal = get_timing_signal_1d(length, channels, min_timescale, max_timescale) - return torch.cat([x, signal.to(dtype=x.dtype, device=x.device)], axis) - - -def subsequent_mask(length): - mask = torch.tril(torch.ones(length, length)).unsqueeze(0).unsqueeze(0) - return mask - - -@torch.jit.script -def fused_add_tanh_sigmoid_multiply(input_a, input_b, n_channels): - n_channels_int = n_channels[0] - in_act = input_a + input_b - t_act = torch.tanh(in_act[:, :n_channels_int, :]) - s_act = torch.sigmoid(in_act[:, n_channels_int:, :]) - acts = t_act * s_act - return acts - - -def convert_pad_shape(pad_shape): - layer = pad_shape[::-1] - pad_shape = [item for sublist in layer for item in sublist] - return pad_shape - - -def shift_1d(x): - x = F.pad(x, convert_pad_shape([[0, 0], [0, 0], [1, 0]]))[:, :, :-1] - return x - - -def sequence_mask(length, max_length=None): - if max_length is None: - max_length = length.max() - x = torch.arange(max_length, dtype=length.dtype, device=length.device) - return x.unsqueeze(0) < length.unsqueeze(1) - - -def generate_path(duration, mask): - """ - duration: [b, 1, t_x] - mask: [b, 1, t_y, t_x] - """ - - b, _, t_y, t_x = mask.shape - cum_duration = torch.cumsum(duration, -1) - - cum_duration_flat = cum_duration.view(b * t_x) - path = sequence_mask(cum_duration_flat, t_y).to(mask.dtype) - path = path.view(b, t_x, t_y) - path = path - F.pad(path, convert_pad_shape([[0, 0], [1, 0], [0, 0]]))[:, :-1] - path = path.unsqueeze(1).transpose(2, 3) * mask - return path - - -def clip_grad_value_(parameters, clip_value, norm_type=2): - if isinstance(parameters, torch.Tensor): - parameters = [parameters] - parameters = list(filter(lambda p: p.grad is not None, parameters)) - norm_type = float(norm_type) - if clip_value is not None: - clip_value = float(clip_value) - - total_norm = 0 - for p in parameters: - param_norm = p.grad.data.norm(norm_type) - total_norm += param_norm.item() ** norm_type - if clip_value is not None: - p.grad.data.clamp_(min=-clip_value, max=clip_value) - total_norm = total_norm ** (1.0 / norm_type) - return total_norm diff --git a/TTS/vc/modules/openvoice/config.json b/TTS/vc/modules/openvoice/config.json deleted file mode 100644 index 3e33566b0d..0000000000 --- a/TTS/vc/modules/openvoice/config.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "_version_": "v2", - "data": { - "sampling_rate": 22050, - "filter_length": 1024, - "hop_length": 256, - "win_length": 1024, - "n_speakers": 0 - }, - "model": { - "zero_g": true, - "inter_channels": 192, - "hidden_channels": 192, - "filter_channels": 768, - "n_heads": 2, - "n_layers": 6, - "kernel_size": 3, - "p_dropout": 0.1, - "resblock": "1", - "resblock_kernel_sizes": [ - 3, - 7, - 11 - ], - "resblock_dilation_sizes": [ - [ - 1, - 3, - 5 - ], - [ - 1, - 3, - 5 - ], - [ - 1, - 3, - 5 - ] - ], - "upsample_rates": [ - 8, - 8, - 2, - 2 - ], - "upsample_initial_channel": 512, - "upsample_kernel_sizes": [ - 16, - 16, - 4, - 4 - ], - "gin_channels": 256 - } -} \ No newline at end of file diff --git a/TTS/vc/modules/openvoice/models.py b/TTS/vc/modules/openvoice/models.py index c1ae7574ce..89a1c3a40c 100644 --- a/TTS/vc/modules/openvoice/models.py +++ b/TTS/vc/modules/openvoice/models.py @@ -1,276 +1,9 @@ -import math - import torch from torch import nn -from torch.nn import Conv1d, ConvTranspose1d from torch.nn import functional as F -from torch.nn.utils import remove_weight_norm, weight_norm - -from TTS.vc.modules.openvoice import attentions, commons, modules -from TTS.vc.modules.openvoice.commons import init_weights - - -class TextEncoder(nn.Module): - def __init__( - self, n_vocab, out_channels, hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout - ): - super().__init__() - self.n_vocab = n_vocab - self.out_channels = out_channels - self.hidden_channels = hidden_channels - self.filter_channels = filter_channels - self.n_heads = n_heads - self.n_layers = n_layers - self.kernel_size = kernel_size - self.p_dropout = p_dropout - - self.emb = nn.Embedding(n_vocab, hidden_channels) - nn.init.normal_(self.emb.weight, 0.0, hidden_channels**-0.5) - - self.encoder = attentions.Encoder(hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout) - self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) - - def forward(self, x, x_lengths): - x = self.emb(x) * math.sqrt(self.hidden_channels) # [b, t, h] - x = torch.transpose(x, 1, -1) # [b, h, t] - x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) - - x = self.encoder(x * x_mask, x_mask) - stats = self.proj(x) * x_mask - - m, logs = torch.split(stats, self.out_channels, dim=1) - return x, m, logs, x_mask - - -class DurationPredictor(nn.Module): - def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, gin_channels=0): - super().__init__() - - self.in_channels = in_channels - self.filter_channels = filter_channels - self.kernel_size = kernel_size - self.p_dropout = p_dropout - self.gin_channels = gin_channels - - self.drop = nn.Dropout(p_dropout) - self.conv_1 = nn.Conv1d(in_channels, filter_channels, kernel_size, padding=kernel_size // 2) - self.norm_1 = modules.LayerNorm(filter_channels) - self.conv_2 = nn.Conv1d(filter_channels, filter_channels, kernel_size, padding=kernel_size // 2) - self.norm_2 = modules.LayerNorm(filter_channels) - self.proj = nn.Conv1d(filter_channels, 1, 1) - - if gin_channels != 0: - self.cond = nn.Conv1d(gin_channels, in_channels, 1) - - def forward(self, x, x_mask, g=None): - x = torch.detach(x) - if g is not None: - g = torch.detach(g) - x = x + self.cond(g) - x = self.conv_1(x * x_mask) - x = torch.relu(x) - x = self.norm_1(x) - x = self.drop(x) - x = self.conv_2(x * x_mask) - x = torch.relu(x) - x = self.norm_2(x) - x = self.drop(x) - x = self.proj(x * x_mask) - return x * x_mask - - -class StochasticDurationPredictor(nn.Module): - def __init__(self, in_channels, filter_channels, kernel_size, p_dropout, n_flows=4, gin_channels=0): - super().__init__() - filter_channels = in_channels # it needs to be removed from future version. - self.in_channels = in_channels - self.filter_channels = filter_channels - self.kernel_size = kernel_size - self.p_dropout = p_dropout - self.n_flows = n_flows - self.gin_channels = gin_channels - - self.log_flow = modules.Log() - self.flows = nn.ModuleList() - self.flows.append(modules.ElementwiseAffine(2)) - for i in range(n_flows): - self.flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3)) - self.flows.append(modules.Flip()) - - self.post_pre = nn.Conv1d(1, filter_channels, 1) - self.post_proj = nn.Conv1d(filter_channels, filter_channels, 1) - self.post_convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout) - self.post_flows = nn.ModuleList() - self.post_flows.append(modules.ElementwiseAffine(2)) - for i in range(4): - self.post_flows.append(modules.ConvFlow(2, filter_channels, kernel_size, n_layers=3)) - self.post_flows.append(modules.Flip()) - - self.pre = nn.Conv1d(in_channels, filter_channels, 1) - self.proj = nn.Conv1d(filter_channels, filter_channels, 1) - self.convs = modules.DDSConv(filter_channels, kernel_size, n_layers=3, p_dropout=p_dropout) - if gin_channels != 0: - self.cond = nn.Conv1d(gin_channels, filter_channels, 1) - - def forward(self, x, x_mask, w=None, g=None, reverse=False, noise_scale=1.0): - x = torch.detach(x) - x = self.pre(x) - if g is not None: - g = torch.detach(g) - x = x + self.cond(g) - x = self.convs(x, x_mask) - x = self.proj(x) * x_mask - - if not reverse: - flows = self.flows - assert w is not None - - logdet_tot_q = 0 - h_w = self.post_pre(w) - h_w = self.post_convs(h_w, x_mask) - h_w = self.post_proj(h_w) * x_mask - e_q = torch.randn(w.size(0), 2, w.size(2)).to(device=x.device, dtype=x.dtype) * x_mask - z_q = e_q - for flow in self.post_flows: - z_q, logdet_q = flow(z_q, x_mask, g=(x + h_w)) - logdet_tot_q += logdet_q - z_u, z1 = torch.split(z_q, [1, 1], 1) - u = torch.sigmoid(z_u) * x_mask - z0 = (w - u) * x_mask - logdet_tot_q += torch.sum((F.logsigmoid(z_u) + F.logsigmoid(-z_u)) * x_mask, [1, 2]) - logq = torch.sum(-0.5 * (math.log(2 * math.pi) + (e_q**2)) * x_mask, [1, 2]) - logdet_tot_q - - logdet_tot = 0 - z0, logdet = self.log_flow(z0, x_mask) - logdet_tot += logdet - z = torch.cat([z0, z1], 1) - for flow in flows: - z, logdet = flow(z, x_mask, g=x, reverse=reverse) - logdet_tot = logdet_tot + logdet - nll = torch.sum(0.5 * (math.log(2 * math.pi) + (z**2)) * x_mask, [1, 2]) - logdet_tot - return nll + logq # [b] - else: - flows = list(reversed(self.flows)) - flows = flows[:-2] + [flows[-1]] # remove a useless vflow - z = torch.randn(x.size(0), 2, x.size(2)).to(device=x.device, dtype=x.dtype) * noise_scale - for flow in flows: - z = flow(z, x_mask, g=x, reverse=reverse) - z0, z1 = torch.split(z, [1, 1], 1) - logw = z0 - return logw - - -class PosteriorEncoder(nn.Module): - def __init__( - self, - in_channels, - out_channels, - hidden_channels, - kernel_size, - dilation_rate, - n_layers, - gin_channels=0, - ): - super().__init__() - self.in_channels = in_channels - self.out_channels = out_channels - self.hidden_channels = hidden_channels - self.kernel_size = kernel_size - self.dilation_rate = dilation_rate - self.n_layers = n_layers - self.gin_channels = gin_channels - - self.pre = nn.Conv1d(in_channels, hidden_channels, 1) - self.enc = modules.WN( - hidden_channels, - kernel_size, - dilation_rate, - n_layers, - gin_channels=gin_channels, - ) - self.proj = nn.Conv1d(hidden_channels, out_channels * 2, 1) - - def forward(self, x, x_lengths, g=None, tau=1.0): - x_mask = torch.unsqueeze(commons.sequence_mask(x_lengths, x.size(2)), 1).to(x.dtype) - x = self.pre(x) * x_mask - x = self.enc(x, x_mask, g=g) - stats = self.proj(x) * x_mask - m, logs = torch.split(stats, self.out_channels, dim=1) - z = (m + torch.randn_like(m) * tau * torch.exp(logs)) * x_mask - return z, m, logs, x_mask - - -class Generator(torch.nn.Module): - def __init__( - self, - initial_channel, - resblock, - resblock_kernel_sizes, - resblock_dilation_sizes, - upsample_rates, - upsample_initial_channel, - upsample_kernel_sizes, - gin_channels=0, - ): - super(Generator, self).__init__() - self.num_kernels = len(resblock_kernel_sizes) - self.num_upsamples = len(upsample_rates) - self.conv_pre = Conv1d(initial_channel, upsample_initial_channel, 7, 1, padding=3) - resblock = modules.ResBlock1 if resblock == "1" else modules.ResBlock2 - - self.ups = nn.ModuleList() - for i, (u, k) in enumerate(zip(upsample_rates, upsample_kernel_sizes)): - self.ups.append( - weight_norm( - ConvTranspose1d( - upsample_initial_channel // (2**i), - upsample_initial_channel // (2 ** (i + 1)), - k, - u, - padding=(k - u) // 2, - ) - ) - ) - - self.resblocks = nn.ModuleList() - for i in range(len(self.ups)): - ch = upsample_initial_channel // (2 ** (i + 1)) - for j, (k, d) in enumerate(zip(resblock_kernel_sizes, resblock_dilation_sizes)): - self.resblocks.append(resblock(ch, k, d)) - - self.conv_post = Conv1d(ch, 1, 7, 1, padding=3, bias=False) - self.ups.apply(init_weights) - - if gin_channels != 0: - self.cond = nn.Conv1d(gin_channels, upsample_initial_channel, 1) - - def forward(self, x, g=None): - x = self.conv_pre(x) - if g is not None: - x = x + self.cond(g) - for i in range(self.num_upsamples): - x = F.leaky_relu(x, modules.LRELU_SLOPE) - x = self.ups[i](x) - xs = None - for j in range(self.num_kernels): - if xs is None: - xs = self.resblocks[i * self.num_kernels + j](x) - else: - xs += self.resblocks[i * self.num_kernels + j](x) - x = xs / self.num_kernels - x = F.leaky_relu(x) - x = self.conv_post(x) - x = torch.tanh(x) - - return x - - def remove_weight_norm(self): - print("Removing weight norm...") - for layer in self.ups: - remove_weight_norm(layer) - for layer in self.resblocks: - layer.remove_weight_norm() +from TTS.tts.layers.vits.networks import PosteriorEncoder +from TTS.vc.models.freevc import Generator, ResidualCouplingBlock class ReferenceEncoder(nn.Module): @@ -286,7 +19,7 @@ def __init__(self, spec_channels, gin_channels=0, layernorm=True): K = len(ref_enc_filters) filters = [1] + ref_enc_filters convs = [ - weight_norm( + torch.nn.utils.parametrizations.weight_norm( nn.Conv2d( in_channels=filters[i], out_channels=filters[i + 1], @@ -311,7 +44,7 @@ def __init__(self, spec_channels, gin_channels=0, layernorm=True): else: self.layernorm = None - def forward(self, inputs, mask=None): + def forward(self, inputs): N = inputs.size(0) out = inputs.view(N, 1, -1, self.spec_channels) # [N, 1, Ty, n_freqs] @@ -320,7 +53,6 @@ def forward(self, inputs, mask=None): for conv in self.convs: out = conv(out) - # out = wn(out) out = F.relu(out) # [N, 128, Ty//2^K, n_mels//2^K] out = out.transpose(1, 2) # [N, Ty//2^K, 128, n_mels//2^K] @@ -329,52 +61,16 @@ def forward(self, inputs, mask=None): out = out.contiguous().view(N, T, -1) # [N, Ty//2^K, 128*n_mels//2^K] self.gru.flatten_parameters() - memory, out = self.gru(out) # out --- [1, N, 128] + _memory, out = self.gru(out) # out --- [1, N, 128] return self.proj(out.squeeze(0)) def calculate_channels(self, L, kernel_size, stride, pad, n_convs): - for i in range(n_convs): + for _ in range(n_convs): L = (L - kernel_size + 2 * pad) // stride + 1 return L -class ResidualCouplingBlock(nn.Module): - def __init__(self, channels, hidden_channels, kernel_size, dilation_rate, n_layers, n_flows=4, gin_channels=0): - super().__init__() - self.channels = channels - self.hidden_channels = hidden_channels - self.kernel_size = kernel_size - self.dilation_rate = dilation_rate - self.n_layers = n_layers - self.n_flows = n_flows - self.gin_channels = gin_channels - - self.flows = nn.ModuleList() - for i in range(n_flows): - self.flows.append( - modules.ResidualCouplingLayer( - channels, - hidden_channels, - kernel_size, - dilation_rate, - n_layers, - gin_channels=gin_channels, - mean_only=True, - ) - ) - self.flows.append(modules.Flip()) - - def forward(self, x, x_mask, g=None, reverse=False): - if not reverse: - for flow in self.flows: - x, _ = flow(x, x_mask, g=g, reverse=reverse) - else: - for flow in reversed(self.flows): - x = flow(x, x_mask, g=g, reverse=reverse) - return x - - class SynthesizerTrn(nn.Module): """ Synthesizer for Training @@ -382,22 +78,16 @@ class SynthesizerTrn(nn.Module): def __init__( self, - n_vocab, spec_channels, inter_channels, hidden_channels, - filter_channels, - n_heads, - n_layers, - kernel_size, - p_dropout, resblock, resblock_kernel_sizes, resblock_dilation_sizes, upsample_rates, upsample_initial_channel, upsample_kernel_sizes, - n_speakers=256, + n_speakers=0, gin_channels=256, zero_g=False, **kwargs, @@ -421,53 +111,17 @@ def __init__( 5, 1, 16, - gin_channels=gin_channels, + cond_channels=gin_channels, ) self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels) self.n_speakers = n_speakers - if n_speakers == 0: - self.ref_enc = ReferenceEncoder(spec_channels, gin_channels) - else: - self.enc_p = TextEncoder( - n_vocab, inter_channels, hidden_channels, filter_channels, n_heads, n_layers, kernel_size, p_dropout - ) - self.sdp = StochasticDurationPredictor(hidden_channels, 192, 3, 0.5, 4, gin_channels=gin_channels) - self.dp = DurationPredictor(hidden_channels, 256, 3, 0.5, gin_channels=gin_channels) - self.emb_g = nn.Embedding(n_speakers, gin_channels) + if n_speakers != 0: + raise ValueError("OpenVoice inference only supports n_speaker==0") + self.ref_enc = ReferenceEncoder(spec_channels, gin_channels) self.zero_g = zero_g - def infer( - self, x, x_lengths, sid=None, noise_scale=1, length_scale=1, noise_scale_w=1.0, sdp_ratio=0.2, max_len=None - ): - x, m_p, logs_p, x_mask = self.enc_p(x, x_lengths) - if self.n_speakers > 0: - g = self.emb_g(sid).unsqueeze(-1) # [b, h, 1] - else: - g = None - - logw = self.sdp(x, x_mask, g=g, reverse=True, noise_scale=noise_scale_w) * sdp_ratio + self.dp( - x, x_mask, g=g - ) * (1 - sdp_ratio) - - w = torch.exp(logw) * x_mask * length_scale - w_ceil = torch.ceil(w) - y_lengths = torch.clamp_min(torch.sum(w_ceil, [1, 2]), 1).long() - y_mask = torch.unsqueeze(commons.sequence_mask(y_lengths, None), 1).to(x_mask.dtype) - attn_mask = torch.unsqueeze(x_mask, 2) * torch.unsqueeze(y_mask, -1) - attn = commons.generate_path(w_ceil, attn_mask) - - m_p = torch.matmul(attn.squeeze(1), m_p.transpose(1, 2)).transpose(1, 2) # [b, t', t], [b, t, d] -> [b, d, t'] - logs_p = torch.matmul(attn.squeeze(1), logs_p.transpose(1, 2)).transpose( - 1, 2 - ) # [b, t', t], [b, t, d] -> [b, d, t'] - - z_p = m_p + torch.randn_like(m_p) * torch.exp(logs_p) * noise_scale - z = self.flow(z_p, y_mask, g=g, reverse=True) - o = self.dec((z * y_mask)[:, :, :max_len], g=g) - return o, attn, y_mask, (z, z_p, m_p, logs_p) - def voice_conversion(self, y, y_lengths, sid_src, sid_tgt, tau=1.0): g_src = sid_src g_tgt = sid_tgt diff --git a/TTS/vc/modules/openvoice/modules.py b/TTS/vc/modules/openvoice/modules.py deleted file mode 100644 index b3a60d5b12..0000000000 --- a/TTS/vc/modules/openvoice/modules.py +++ /dev/null @@ -1,588 +0,0 @@ -import math - -import torch -from torch import nn -from torch.nn import Conv1d -from torch.nn import functional as F -from torch.nn.utils import remove_weight_norm, weight_norm - -from TTS.vc.modules.openvoice import commons -from TTS.vc.modules.openvoice.attentions import Encoder -from TTS.vc.modules.openvoice.commons import get_padding, init_weights -from TTS.vc.modules.openvoice.transforms import piecewise_rational_quadratic_transform - -LRELU_SLOPE = 0.1 - - -class LayerNorm(nn.Module): - def __init__(self, channels, eps=1e-5): - super().__init__() - self.channels = channels - self.eps = eps - - self.gamma = nn.Parameter(torch.ones(channels)) - self.beta = nn.Parameter(torch.zeros(channels)) - - def forward(self, x): - x = x.transpose(1, -1) - x = F.layer_norm(x, (self.channels,), self.gamma, self.beta, self.eps) - return x.transpose(1, -1) - - -class ConvReluNorm(nn.Module): - def __init__( - self, - in_channels, - hidden_channels, - out_channels, - kernel_size, - n_layers, - p_dropout, - ): - super().__init__() - self.in_channels = in_channels - self.hidden_channels = hidden_channels - self.out_channels = out_channels - self.kernel_size = kernel_size - self.n_layers = n_layers - self.p_dropout = p_dropout - assert n_layers > 1, "Number of layers should be larger than 0." - - self.conv_layers = nn.ModuleList() - self.norm_layers = nn.ModuleList() - self.conv_layers.append(nn.Conv1d(in_channels, hidden_channels, kernel_size, padding=kernel_size // 2)) - self.norm_layers.append(LayerNorm(hidden_channels)) - self.relu_drop = nn.Sequential(nn.ReLU(), nn.Dropout(p_dropout)) - for _ in range(n_layers - 1): - self.conv_layers.append( - nn.Conv1d( - hidden_channels, - hidden_channels, - kernel_size, - padding=kernel_size // 2, - ) - ) - self.norm_layers.append(LayerNorm(hidden_channels)) - self.proj = nn.Conv1d(hidden_channels, out_channels, 1) - self.proj.weight.data.zero_() - self.proj.bias.data.zero_() - - def forward(self, x, x_mask): - x_org = x - for i in range(self.n_layers): - x = self.conv_layers[i](x * x_mask) - x = self.norm_layers[i](x) - x = self.relu_drop(x) - x = x_org + self.proj(x) - return x * x_mask - - -class DDSConv(nn.Module): - """ - Dilated and Depth-Separable Convolution - """ - - def __init__(self, channels, kernel_size, n_layers, p_dropout=0.0): - super().__init__() - self.channels = channels - self.kernel_size = kernel_size - self.n_layers = n_layers - self.p_dropout = p_dropout - - self.drop = nn.Dropout(p_dropout) - self.convs_sep = nn.ModuleList() - self.convs_1x1 = nn.ModuleList() - self.norms_1 = nn.ModuleList() - self.norms_2 = nn.ModuleList() - for i in range(n_layers): - dilation = kernel_size**i - padding = (kernel_size * dilation - dilation) // 2 - self.convs_sep.append( - nn.Conv1d( - channels, - channels, - kernel_size, - groups=channels, - dilation=dilation, - padding=padding, - ) - ) - self.convs_1x1.append(nn.Conv1d(channels, channels, 1)) - self.norms_1.append(LayerNorm(channels)) - self.norms_2.append(LayerNorm(channels)) - - def forward(self, x, x_mask, g=None): - if g is not None: - x = x + g - for i in range(self.n_layers): - y = self.convs_sep[i](x * x_mask) - y = self.norms_1[i](y) - y = F.gelu(y) - y = self.convs_1x1[i](y) - y = self.norms_2[i](y) - y = F.gelu(y) - y = self.drop(y) - x = x + y - return x * x_mask - - -class WN(torch.nn.Module): - def __init__( - self, - hidden_channels, - kernel_size, - dilation_rate, - n_layers, - gin_channels=0, - p_dropout=0, - ): - super(WN, self).__init__() - assert kernel_size % 2 == 1 - self.hidden_channels = hidden_channels - self.kernel_size = (kernel_size,) - self.dilation_rate = dilation_rate - self.n_layers = n_layers - self.gin_channels = gin_channels - self.p_dropout = p_dropout - - self.in_layers = torch.nn.ModuleList() - self.res_skip_layers = torch.nn.ModuleList() - self.drop = nn.Dropout(p_dropout) - - if gin_channels != 0: - cond_layer = torch.nn.Conv1d(gin_channels, 2 * hidden_channels * n_layers, 1) - self.cond_layer = torch.nn.utils.weight_norm(cond_layer, name="weight") - - for i in range(n_layers): - dilation = dilation_rate**i - padding = int((kernel_size * dilation - dilation) / 2) - in_layer = torch.nn.Conv1d( - hidden_channels, - 2 * hidden_channels, - kernel_size, - dilation=dilation, - padding=padding, - ) - in_layer = torch.nn.utils.weight_norm(in_layer, name="weight") - self.in_layers.append(in_layer) - - # last one is not necessary - if i < n_layers - 1: - res_skip_channels = 2 * hidden_channels - else: - res_skip_channels = hidden_channels - - res_skip_layer = torch.nn.Conv1d(hidden_channels, res_skip_channels, 1) - res_skip_layer = torch.nn.utils.weight_norm(res_skip_layer, name="weight") - self.res_skip_layers.append(res_skip_layer) - - def forward(self, x, x_mask, g=None, **kwargs): - output = torch.zeros_like(x) - n_channels_tensor = torch.IntTensor([self.hidden_channels]) - - if g is not None: - g = self.cond_layer(g) - - for i in range(self.n_layers): - x_in = self.in_layers[i](x) - if g is not None: - cond_offset = i * 2 * self.hidden_channels - g_l = g[:, cond_offset : cond_offset + 2 * self.hidden_channels, :] - else: - g_l = torch.zeros_like(x_in) - - acts = commons.fused_add_tanh_sigmoid_multiply(x_in, g_l, n_channels_tensor) - acts = self.drop(acts) - - res_skip_acts = self.res_skip_layers[i](acts) - if i < self.n_layers - 1: - res_acts = res_skip_acts[:, : self.hidden_channels, :] - x = (x + res_acts) * x_mask - output = output + res_skip_acts[:, self.hidden_channels :, :] - else: - output = output + res_skip_acts - return output * x_mask - - def remove_weight_norm(self): - if self.gin_channels != 0: - torch.nn.utils.remove_weight_norm(self.cond_layer) - for l in self.in_layers: - torch.nn.utils.remove_weight_norm(l) - for l in self.res_skip_layers: - torch.nn.utils.remove_weight_norm(l) - - -class ResBlock1(torch.nn.Module): - def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)): - super(ResBlock1, self).__init__() - self.convs1 = nn.ModuleList( - [ - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[0], - padding=get_padding(kernel_size, dilation[0]), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[1], - padding=get_padding(kernel_size, dilation[1]), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[2], - padding=get_padding(kernel_size, dilation[2]), - ) - ), - ] - ) - self.convs1.apply(init_weights) - - self.convs2 = nn.ModuleList( - [ - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=1, - padding=get_padding(kernel_size, 1), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=1, - padding=get_padding(kernel_size, 1), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=1, - padding=get_padding(kernel_size, 1), - ) - ), - ] - ) - self.convs2.apply(init_weights) - - def forward(self, x, x_mask=None): - for c1, c2 in zip(self.convs1, self.convs2): - xt = F.leaky_relu(x, LRELU_SLOPE) - if x_mask is not None: - xt = xt * x_mask - xt = c1(xt) - xt = F.leaky_relu(xt, LRELU_SLOPE) - if x_mask is not None: - xt = xt * x_mask - xt = c2(xt) - x = xt + x - if x_mask is not None: - x = x * x_mask - return x - - def remove_weight_norm(self): - for l in self.convs1: - remove_weight_norm(l) - for l in self.convs2: - remove_weight_norm(l) - - -class ResBlock2(torch.nn.Module): - def __init__(self, channels, kernel_size=3, dilation=(1, 3)): - super(ResBlock2, self).__init__() - self.convs = nn.ModuleList( - [ - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[0], - padding=get_padding(kernel_size, dilation[0]), - ) - ), - weight_norm( - Conv1d( - channels, - channels, - kernel_size, - 1, - dilation=dilation[1], - padding=get_padding(kernel_size, dilation[1]), - ) - ), - ] - ) - self.convs.apply(init_weights) - - def forward(self, x, x_mask=None): - for c in self.convs: - xt = F.leaky_relu(x, LRELU_SLOPE) - if x_mask is not None: - xt = xt * x_mask - xt = c(xt) - x = xt + x - if x_mask is not None: - x = x * x_mask - return x - - def remove_weight_norm(self): - for l in self.convs: - remove_weight_norm(l) - - -class Log(nn.Module): - def forward(self, x, x_mask, reverse=False, **kwargs): - if not reverse: - y = torch.log(torch.clamp_min(x, 1e-5)) * x_mask - logdet = torch.sum(-y, [1, 2]) - return y, logdet - else: - x = torch.exp(x) * x_mask - return x - - -class Flip(nn.Module): - def forward(self, x, *args, reverse=False, **kwargs): - x = torch.flip(x, [1]) - if not reverse: - logdet = torch.zeros(x.size(0)).to(dtype=x.dtype, device=x.device) - return x, logdet - else: - return x - - -class ElementwiseAffine(nn.Module): - def __init__(self, channels): - super().__init__() - self.channels = channels - self.m = nn.Parameter(torch.zeros(channels, 1)) - self.logs = nn.Parameter(torch.zeros(channels, 1)) - - def forward(self, x, x_mask, reverse=False, **kwargs): - if not reverse: - y = self.m + torch.exp(self.logs) * x - y = y * x_mask - logdet = torch.sum(self.logs * x_mask, [1, 2]) - return y, logdet - else: - x = (x - self.m) * torch.exp(-self.logs) * x_mask - return x - - -class ResidualCouplingLayer(nn.Module): - def __init__( - self, - channels, - hidden_channels, - kernel_size, - dilation_rate, - n_layers, - p_dropout=0, - gin_channels=0, - mean_only=False, - ): - assert channels % 2 == 0, "channels should be divisible by 2" - super().__init__() - self.channels = channels - self.hidden_channels = hidden_channels - self.kernel_size = kernel_size - self.dilation_rate = dilation_rate - self.n_layers = n_layers - self.half_channels = channels // 2 - self.mean_only = mean_only - - self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) - self.enc = WN( - hidden_channels, - kernel_size, - dilation_rate, - n_layers, - p_dropout=p_dropout, - gin_channels=gin_channels, - ) - self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) - self.post.weight.data.zero_() - self.post.bias.data.zero_() - - def forward(self, x, x_mask, g=None, reverse=False): - x0, x1 = torch.split(x, [self.half_channels] * 2, 1) - h = self.pre(x0) * x_mask - h = self.enc(h, x_mask, g=g) - stats = self.post(h) * x_mask - if not self.mean_only: - m, logs = torch.split(stats, [self.half_channels] * 2, 1) - else: - m = stats - logs = torch.zeros_like(m) - - if not reverse: - x1 = m + x1 * torch.exp(logs) * x_mask - x = torch.cat([x0, x1], 1) - logdet = torch.sum(logs, [1, 2]) - return x, logdet - else: - x1 = (x1 - m) * torch.exp(-logs) * x_mask - x = torch.cat([x0, x1], 1) - return x - - -class ConvFlow(nn.Module): - def __init__( - self, - in_channels, - filter_channels, - kernel_size, - n_layers, - num_bins=10, - tail_bound=5.0, - ): - super().__init__() - self.in_channels = in_channels - self.filter_channels = filter_channels - self.kernel_size = kernel_size - self.n_layers = n_layers - self.num_bins = num_bins - self.tail_bound = tail_bound - self.half_channels = in_channels // 2 - - self.pre = nn.Conv1d(self.half_channels, filter_channels, 1) - self.convs = DDSConv(filter_channels, kernel_size, n_layers, p_dropout=0.0) - self.proj = nn.Conv1d(filter_channels, self.half_channels * (num_bins * 3 - 1), 1) - self.proj.weight.data.zero_() - self.proj.bias.data.zero_() - - def forward(self, x, x_mask, g=None, reverse=False): - x0, x1 = torch.split(x, [self.half_channels] * 2, 1) - h = self.pre(x0) - h = self.convs(h, x_mask, g=g) - h = self.proj(h) * x_mask - - b, c, t = x0.shape - h = h.reshape(b, c, -1, t).permute(0, 1, 3, 2) # [b, cx?, t] -> [b, c, t, ?] - - unnormalized_widths = h[..., : self.num_bins] / math.sqrt(self.filter_channels) - unnormalized_heights = h[..., self.num_bins : 2 * self.num_bins] / math.sqrt(self.filter_channels) - unnormalized_derivatives = h[..., 2 * self.num_bins :] - - x1, logabsdet = piecewise_rational_quadratic_transform( - x1, - unnormalized_widths, - unnormalized_heights, - unnormalized_derivatives, - inverse=reverse, - tails="linear", - tail_bound=self.tail_bound, - ) - - x = torch.cat([x0, x1], 1) * x_mask - logdet = torch.sum(logabsdet * x_mask, [1, 2]) - if not reverse: - return x, logdet - else: - return x - - -class TransformerCouplingLayer(nn.Module): - def __init__( - self, - channels, - hidden_channels, - kernel_size, - n_layers, - n_heads, - p_dropout=0, - filter_channels=0, - mean_only=False, - wn_sharing_parameter=None, - gin_channels=0, - ): - assert n_layers == 3, n_layers - assert channels % 2 == 0, "channels should be divisible by 2" - super().__init__() - self.channels = channels - self.hidden_channels = hidden_channels - self.kernel_size = kernel_size - self.n_layers = n_layers - self.half_channels = channels // 2 - self.mean_only = mean_only - - self.pre = nn.Conv1d(self.half_channels, hidden_channels, 1) - self.enc = ( - Encoder( - hidden_channels, - filter_channels, - n_heads, - n_layers, - kernel_size, - p_dropout, - isflow=True, - gin_channels=gin_channels, - ) - if wn_sharing_parameter is None - else wn_sharing_parameter - ) - self.post = nn.Conv1d(hidden_channels, self.half_channels * (2 - mean_only), 1) - self.post.weight.data.zero_() - self.post.bias.data.zero_() - - def forward(self, x, x_mask, g=None, reverse=False): - x0, x1 = torch.split(x, [self.half_channels] * 2, 1) - h = self.pre(x0) * x_mask - h = self.enc(h, x_mask, g=g) - stats = self.post(h) * x_mask - if not self.mean_only: - m, logs = torch.split(stats, [self.half_channels] * 2, 1) - else: - m = stats - logs = torch.zeros_like(m) - - if not reverse: - x1 = m + x1 * torch.exp(logs) * x_mask - x = torch.cat([x0, x1], 1) - logdet = torch.sum(logs, [1, 2]) - return x, logdet - else: - x1 = (x1 - m) * torch.exp(-logs) * x_mask - x = torch.cat([x0, x1], 1) - return x - - x1, logabsdet = piecewise_rational_quadratic_transform( - x1, - unnormalized_widths, - unnormalized_heights, - unnormalized_derivatives, - inverse=reverse, - tails="linear", - tail_bound=self.tail_bound, - ) - - x = torch.cat([x0, x1], 1) * x_mask - logdet = torch.sum(logabsdet * x_mask, [1, 2]) - if not reverse: - return x, logdet - else: - return x diff --git a/TTS/vc/modules/openvoice/standalone_api.py b/TTS/vc/modules/openvoice/standalone_api.py deleted file mode 100644 index 831fd4dc43..0000000000 --- a/TTS/vc/modules/openvoice/standalone_api.py +++ /dev/null @@ -1,342 +0,0 @@ -import json -import os - -import librosa -import torch -import torch.utils.data -from librosa.filters import mel as librosa_mel_fn - -from TTS.vc.modules.openvoice.models import SynthesizerTrn - -# vc_checkpoint=model_path, vc_config=config_path, use_cuda=gpu) - -# vc_config.audio.output_sample_rate - - -class custom_sr_config: - """Class defined to make combatible sampling rate defination with TTS api.py. - - Args: - sampling rate. - """ - - def __init__(self, value): - self.audio = self.Audio(value) - - class Audio: - def __init__(self, value): - self.output_sample_rate = value - - -class OpenVoiceSynthesizer(object): - def __init__(self, vc_checkpoint, vc_config, use_cuda="cpu"): - - if use_cuda: - self.device = "cuda" - else: - self.device = "cpu" - - hps = get_hparams_from_file(vc_config) - self.vc_config = custom_sr_config(hps.data.sampling_rate) - - # vc_config.audio.output_sample_rate - self.model = SynthesizerTrn( - len(getattr(hps, "symbols", [])), - hps.data.filter_length // 2 + 1, - n_speakers=hps.data.n_speakers, - **hps.model, - ).to(torch.device(self.device)) - - self.hps = hps - self.load_ckpt(vc_checkpoint) - self.model.eval() - - def load_ckpt(self, ckpt_path): - checkpoint_dict = torch.load(ckpt_path, map_location=torch.device(self.device)) - a, b = self.model.load_state_dict(checkpoint_dict["model"], strict=False) - # print("Loaded checkpoint '{}'".format(ckpt_path)) - # print('missing/unexpected keys:', a, b) - - def extract_se(self, fpath): - audio_ref, sr = librosa.load(fpath, sr=self.hps.data.sampling_rate) - y = torch.FloatTensor(audio_ref) - y = y.to(self.device) - y = y.unsqueeze(0) - y = spectrogram_torch( - y, - self.hps.data.filter_length, - self.hps.data.sampling_rate, - self.hps.data.hop_length, - self.hps.data.win_length, - center=False, - ).to(self.device) - with torch.no_grad(): - g = self.model.ref_enc(y.transpose(1, 2)).unsqueeze(-1) - - return g - - # source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav" - def voice_conversion(self, source_wav, target_wav, tau=0.3, message="default"): - - if not os.path.exists(source_wav): - print("source wavpath dont exists") - exit(0) - - if not os.path.exists(target_wav): - print("target wavpath dont exists") - exit(0) - - src_se = self.extract_se(source_wav) - tgt_se = self.extract_se(target_wav) - - # load audio - audio, sample_rate = librosa.load(source_wav, sr=self.hps.data.sampling_rate) - audio = torch.tensor(audio).float() - - with torch.no_grad(): - y = torch.FloatTensor(audio).to(self.device) - y = y.unsqueeze(0) - spec = spectrogram_torch( - y, - self.hps.data.filter_length, - self.hps.data.sampling_rate, - self.hps.data.hop_length, - self.hps.data.win_length, - center=False, - ).to(self.device) - spec_lengths = torch.LongTensor([spec.size(-1)]).to(self.device) - audio = ( - self.model.voice_conversion(spec, spec_lengths, sid_src=src_se, sid_tgt=tgt_se, tau=tau)[0][0, 0] - .data.cpu() - .float() - .numpy() - ) - - return audio - - -def get_hparams_from_file(config_path): - with open(config_path, "r", encoding="utf-8") as f: - data = f.read() - config = json.loads(data) - - hparams = HParams(**config) - return hparams - - -class HParams: - def __init__(self, **kwargs): - for k, v in kwargs.items(): - if isinstance(v, dict): - v = HParams(**v) - self[k] = v - - def keys(self): - return self.__dict__.keys() - - def items(self): - return self.__dict__.items() - - def values(self): - return self.__dict__.values() - - def __len__(self): - return len(self.__dict__) - - def __getitem__(self, key): - return getattr(self, key) - - def __setitem__(self, key, value): - return setattr(self, key, value) - - def __contains__(self, key): - return key in self.__dict__ - - def __repr__(self): - return self.__dict__.__repr__() - - -MAX_WAV_VALUE = 32768.0 - - -def dynamic_range_compression_torch(x, C=1, clip_val=1e-5): - """ - PARAMS - ------ - C: compression factor - """ - return torch.log(torch.clamp(x, min=clip_val) * C) - - -def dynamic_range_decompression_torch(x, C=1): - """ - PARAMS - ------ - C: compression factor used to compress - """ - return torch.exp(x) / C - - -def spectral_normalize_torch(magnitudes): - output = dynamic_range_compression_torch(magnitudes) - return output - - -def spectral_de_normalize_torch(magnitudes): - output = dynamic_range_decompression_torch(magnitudes) - return output - - -mel_basis = {} -hann_window = {} - - -def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False): - if torch.min(y) < -1.1: - print("min value is ", torch.min(y)) - if torch.max(y) > 1.1: - print("max value is ", torch.max(y)) - - global hann_window - dtype_device = str(y.dtype) + "_" + str(y.device) - wnsize_dtype_device = str(win_size) + "_" + dtype_device - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), - (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), - mode="reflect", - ) - y = y.squeeze(1) - - spec = torch.stft( - y, - n_fft, - hop_length=hop_size, - win_length=win_size, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, - ) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - return spec - - -def spectrogram_torch_conv(y, n_fft, sampling_rate, hop_size, win_size, center=False): - # if torch.min(y) < -1.: - # print('min value is ', torch.min(y)) - # if torch.max(y) > 1.: - # print('max value is ', torch.max(y)) - - global hann_window - dtype_device = str(y.dtype) + "_" + str(y.device) - wnsize_dtype_device = str(win_size) + "_" + dtype_device - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), mode="reflect" - ) - - # ******************** original ************************# - # y = y.squeeze(1) - # spec1 = torch.stft(y, n_fft, hop_length=hop_size, win_length=win_size, window=hann_window[wnsize_dtype_device], - # center=center, pad_mode='reflect', normalized=False, onesided=True, return_complex=False) - - # ******************** ConvSTFT ************************# - freq_cutoff = n_fft // 2 + 1 - fourier_basis = torch.view_as_real(torch.fft.fft(torch.eye(n_fft))) - forward_basis = fourier_basis[:freq_cutoff].permute(2, 0, 1).reshape(-1, 1, fourier_basis.shape[1]) - forward_basis = ( - forward_basis * torch.as_tensor(librosa.util.pad_center(torch.hann_window(win_size), size=n_fft)).float() - ) - - import torch.nn.functional as F - - # if center: - # signal = F.pad(y[:, None, None, :], (n_fft // 2, n_fft // 2, 0, 0), mode = 'reflect').squeeze(1) - assert center is False - - forward_transform_squared = F.conv1d(y, forward_basis.to(y.device), stride=hop_size) - spec2 = torch.stack( - [forward_transform_squared[:, :freq_cutoff, :], forward_transform_squared[:, freq_cutoff:, :]], dim=-1 - ) - - # ******************** Verification ************************# - spec1 = torch.stft( - y.squeeze(1), - n_fft, - hop_length=hop_size, - win_length=win_size, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, - ) - assert torch.allclose(spec1, spec2, atol=1e-4) - - spec = torch.sqrt(spec2.pow(2).sum(-1) + 1e-6) - return spec - - -def spec_to_mel_torch(spec, n_fft, num_mels, sampling_rate, fmin, fmax): - global mel_basis - dtype_device = str(spec.dtype) + "_" + str(spec.device) - fmax_dtype_device = str(fmax) + "_" + dtype_device - if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) - mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=spec.dtype, device=spec.device) - spec = torch.matmul(mel_basis[fmax_dtype_device], spec) - spec = spectral_normalize_torch(spec) - return spec - - -def mel_spectrogram_torch(y, n_fft, num_mels, sampling_rate, hop_size, win_size, fmin, fmax, center=False): - if torch.min(y) < -1.0: - print("min value is ", torch.min(y)) - if torch.max(y) > 1.0: - print("max value is ", torch.max(y)) - - global mel_basis, hann_window - dtype_device = str(y.dtype) + "_" + str(y.device) - fmax_dtype_device = str(fmax) + "_" + dtype_device - wnsize_dtype_device = str(win_size) + "_" + dtype_device - if fmax_dtype_device not in mel_basis: - mel = librosa_mel_fn(sampling_rate, n_fft, num_mels, fmin, fmax) - mel_basis[fmax_dtype_device] = torch.from_numpy(mel).to(dtype=y.dtype, device=y.device) - if wnsize_dtype_device not in hann_window: - hann_window[wnsize_dtype_device] = torch.hann_window(win_size).to(dtype=y.dtype, device=y.device) - - y = torch.nn.functional.pad( - y.unsqueeze(1), - (int((n_fft - hop_size) / 2), int((n_fft - hop_size) / 2)), - mode="reflect", - ) - y = y.squeeze(1) - - spec = torch.stft( - y, - n_fft, - hop_length=hop_size, - win_length=win_size, - window=hann_window[wnsize_dtype_device], - center=center, - pad_mode="reflect", - normalized=False, - onesided=True, - return_complex=False, - ) - - spec = torch.sqrt(spec.pow(2).sum(-1) + 1e-6) - - spec = torch.matmul(mel_basis[fmax_dtype_device], spec) - spec = spectral_normalize_torch(spec) - - return spec diff --git a/TTS/vc/modules/openvoice/transforms.py b/TTS/vc/modules/openvoice/transforms.py deleted file mode 100644 index 4270ebae3f..0000000000 --- a/TTS/vc/modules/openvoice/transforms.py +++ /dev/null @@ -1,203 +0,0 @@ -import numpy as np -import torch -from torch.nn import functional as F - -DEFAULT_MIN_BIN_WIDTH = 1e-3 -DEFAULT_MIN_BIN_HEIGHT = 1e-3 -DEFAULT_MIN_DERIVATIVE = 1e-3 - - -def piecewise_rational_quadratic_transform( - inputs, - unnormalized_widths, - unnormalized_heights, - unnormalized_derivatives, - inverse=False, - tails=None, - tail_bound=1.0, - min_bin_width=DEFAULT_MIN_BIN_WIDTH, - min_bin_height=DEFAULT_MIN_BIN_HEIGHT, - min_derivative=DEFAULT_MIN_DERIVATIVE, -): - if tails is None: - spline_fn = rational_quadratic_spline - spline_kwargs = {} - else: - spline_fn = unconstrained_rational_quadratic_spline - spline_kwargs = {"tails": tails, "tail_bound": tail_bound} - - outputs, logabsdet = spline_fn( - inputs=inputs, - unnormalized_widths=unnormalized_widths, - unnormalized_heights=unnormalized_heights, - unnormalized_derivatives=unnormalized_derivatives, - inverse=inverse, - min_bin_width=min_bin_width, - min_bin_height=min_bin_height, - min_derivative=min_derivative, - **spline_kwargs, - ) - return outputs, logabsdet - - -def searchsorted(bin_locations, inputs, eps=1e-6): - bin_locations[..., -1] += eps - return torch.sum(inputs[..., None] >= bin_locations, dim=-1) - 1 - - -def unconstrained_rational_quadratic_spline( - inputs, - unnormalized_widths, - unnormalized_heights, - unnormalized_derivatives, - inverse=False, - tails="linear", - tail_bound=1.0, - min_bin_width=DEFAULT_MIN_BIN_WIDTH, - min_bin_height=DEFAULT_MIN_BIN_HEIGHT, - min_derivative=DEFAULT_MIN_DERIVATIVE, -): - inside_interval_mask = (inputs >= -tail_bound) & (inputs <= tail_bound) - outside_interval_mask = ~inside_interval_mask - - outputs = torch.zeros_like(inputs) - logabsdet = torch.zeros_like(inputs) - - if tails == "linear": - unnormalized_derivatives = F.pad(unnormalized_derivatives, pad=(1, 1)) - constant = np.log(np.exp(1 - min_derivative) - 1) - unnormalized_derivatives[..., 0] = constant - unnormalized_derivatives[..., -1] = constant - - outputs[outside_interval_mask] = inputs[outside_interval_mask] - logabsdet[outside_interval_mask] = 0 - else: - raise RuntimeError("{} tails are not implemented.".format(tails)) - - ( - outputs[inside_interval_mask], - logabsdet[inside_interval_mask], - ) = rational_quadratic_spline( - inputs=inputs[inside_interval_mask], - unnormalized_widths=unnormalized_widths[inside_interval_mask, :], - unnormalized_heights=unnormalized_heights[inside_interval_mask, :], - unnormalized_derivatives=unnormalized_derivatives[inside_interval_mask, :], - inverse=inverse, - left=-tail_bound, - right=tail_bound, - bottom=-tail_bound, - top=tail_bound, - min_bin_width=min_bin_width, - min_bin_height=min_bin_height, - min_derivative=min_derivative, - ) - - return outputs, logabsdet - - -def rational_quadratic_spline( - inputs, - unnormalized_widths, - unnormalized_heights, - unnormalized_derivatives, - inverse=False, - left=0.0, - right=1.0, - bottom=0.0, - top=1.0, - min_bin_width=DEFAULT_MIN_BIN_WIDTH, - min_bin_height=DEFAULT_MIN_BIN_HEIGHT, - min_derivative=DEFAULT_MIN_DERIVATIVE, -): - if torch.min(inputs) < left or torch.max(inputs) > right: - raise ValueError("Input to a transform is not within its domain") - - num_bins = unnormalized_widths.shape[-1] - - if min_bin_width * num_bins > 1.0: - raise ValueError("Minimal bin width too large for the number of bins") - if min_bin_height * num_bins > 1.0: - raise ValueError("Minimal bin height too large for the number of bins") - - widths = F.softmax(unnormalized_widths, dim=-1) - widths = min_bin_width + (1 - min_bin_width * num_bins) * widths - cumwidths = torch.cumsum(widths, dim=-1) - cumwidths = F.pad(cumwidths, pad=(1, 0), mode="constant", value=0.0) - cumwidths = (right - left) * cumwidths + left - cumwidths[..., 0] = left - cumwidths[..., -1] = right - widths = cumwidths[..., 1:] - cumwidths[..., :-1] - - derivatives = min_derivative + F.softplus(unnormalized_derivatives) - - heights = F.softmax(unnormalized_heights, dim=-1) - heights = min_bin_height + (1 - min_bin_height * num_bins) * heights - cumheights = torch.cumsum(heights, dim=-1) - cumheights = F.pad(cumheights, pad=(1, 0), mode="constant", value=0.0) - cumheights = (top - bottom) * cumheights + bottom - cumheights[..., 0] = bottom - cumheights[..., -1] = top - heights = cumheights[..., 1:] - cumheights[..., :-1] - - if inverse: - bin_idx = searchsorted(cumheights, inputs)[..., None] - else: - bin_idx = searchsorted(cumwidths, inputs)[..., None] - - input_cumwidths = cumwidths.gather(-1, bin_idx)[..., 0] - input_bin_widths = widths.gather(-1, bin_idx)[..., 0] - - input_cumheights = cumheights.gather(-1, bin_idx)[..., 0] - delta = heights / widths - input_delta = delta.gather(-1, bin_idx)[..., 0] - - input_derivatives = derivatives.gather(-1, bin_idx)[..., 0] - input_derivatives_plus_one = derivatives[..., 1:].gather(-1, bin_idx)[..., 0] - - input_heights = heights.gather(-1, bin_idx)[..., 0] - - if inverse: - a = (inputs - input_cumheights) * ( - input_derivatives + input_derivatives_plus_one - 2 * input_delta - ) + input_heights * (input_delta - input_derivatives) - b = input_heights * input_derivatives - (inputs - input_cumheights) * ( - input_derivatives + input_derivatives_plus_one - 2 * input_delta - ) - c = -input_delta * (inputs - input_cumheights) - - discriminant = b.pow(2) - 4 * a * c - assert (discriminant >= 0).all() - - root = (2 * c) / (-b - torch.sqrt(discriminant)) - outputs = root * input_bin_widths + input_cumwidths - - theta_one_minus_theta = root * (1 - root) - denominator = input_delta + ( - (input_derivatives + input_derivatives_plus_one - 2 * input_delta) * theta_one_minus_theta - ) - derivative_numerator = input_delta.pow(2) * ( - input_derivatives_plus_one * root.pow(2) - + 2 * input_delta * theta_one_minus_theta - + input_derivatives * (1 - root).pow(2) - ) - logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) - - return outputs, -logabsdet - else: - theta = (inputs - input_cumwidths) / input_bin_widths - theta_one_minus_theta = theta * (1 - theta) - - numerator = input_heights * (input_delta * theta.pow(2) + input_derivatives * theta_one_minus_theta) - denominator = input_delta + ( - (input_derivatives + input_derivatives_plus_one - 2 * input_delta) * theta_one_minus_theta - ) - outputs = input_cumheights + numerator / denominator - - derivative_numerator = input_delta.pow(2) * ( - input_derivatives_plus_one * theta.pow(2) - + 2 * input_delta * theta_one_minus_theta - + input_derivatives * (1 - theta).pow(2) - ) - logabsdet = torch.log(derivative_numerator) - 2 * torch.log(denominator) - - return outputs, logabsdet From 95998374bf7d7aeb2ff8556e9b03c4d29475c189 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 20 Jun 2024 10:16:35 +0200 Subject: [PATCH 216/255] feat(openvoice): add config classes --- TTS/vc/configs/freevc_config.py | 2 +- TTS/vc/configs/openvoice_config.py | 201 +++++++++++++++++++++++++++++ 2 files changed, 202 insertions(+), 1 deletion(-) create mode 100644 TTS/vc/configs/openvoice_config.py diff --git a/TTS/vc/configs/freevc_config.py b/TTS/vc/configs/freevc_config.py index 207181b303..d600bfb1f4 100644 --- a/TTS/vc/configs/freevc_config.py +++ b/TTS/vc/configs/freevc_config.py @@ -229,7 +229,7 @@ class FreeVCConfig(BaseVCConfig): If true, language embedding is used. Defaults to `False`. Note: - Check :class:`TTS.tts.configs.shared_configs.BaseTTSConfig` for the inherited parameters. + Check :class:`TTS.tts.configs.shared_configs.BaseVCConfig` for the inherited parameters. Example: diff --git a/TTS/vc/configs/openvoice_config.py b/TTS/vc/configs/openvoice_config.py new file mode 100644 index 0000000000..261cdd6f47 --- /dev/null +++ b/TTS/vc/configs/openvoice_config.py @@ -0,0 +1,201 @@ +from dataclasses import dataclass, field +from typing import Optional + +from coqpit import Coqpit + +from TTS.vc.configs.shared_configs import BaseVCConfig + + +@dataclass +class OpenVoiceAudioConfig(Coqpit): + """Audio configuration + + Args: + input_sample_rate (int): + The sampling rate of the input waveform. + + output_sample_rate (int): + The sampling rate of the output waveform. + + fft_size (int): + The length of the filter. + + hop_length (int): + The hop length. + + win_length (int): + The window length. + """ + + input_sample_rate: int = field(default=22050) + output_sample_rate: int = field(default=22050) + fft_size: int = field(default=1024) + hop_length: int = field(default=256) + win_length: int = field(default=1024) + + +@dataclass +class OpenVoiceArgs(Coqpit): + """OpenVoice model arguments. + + zero_g (bool): + Whether to zero the gradients. + + inter_channels (int): + The number of channels in the intermediate layers. + + hidden_channels (int): + The number of channels in the hidden layers. + + filter_channels (int): + The number of channels in the filter layers. + + n_heads (int): + The number of attention heads. + + n_layers (int): + The number of layers. + + kernel_size (int): + The size of the kernel. + + p_dropout (float): + The dropout probability. + + resblock (str): + The type of residual block. + + resblock_kernel_sizes (List[int]): + The kernel sizes for the residual blocks. + + resblock_dilation_sizes (List[List[int]]): + The dilation sizes for the residual blocks. + + upsample_rates (List[int]): + The upsample rates. + + upsample_initial_channel (int): + The number of channels in the initial upsample layer. + + upsample_kernel_sizes (List[int]): + The kernel sizes for the upsample layers. + + n_layers_q (int): + The number of layers in the quantization network. + + use_spectral_norm (bool): + Whether to use spectral normalization. + + gin_channels (int): + The number of channels in the global conditioning vector. + + tau (float): + Tau parameter for the posterior encoder + """ + + zero_g: bool = field(default=True) + inter_channels: int = field(default=192) + hidden_channels: int = field(default=192) + filter_channels: int = field(default=768) + n_heads: int = field(default=2) + n_layers: int = field(default=6) + kernel_size: int = field(default=3) + p_dropout: float = field(default=0.1) + resblock: str = field(default="1") + resblock_kernel_sizes: list[int] = field(default_factory=lambda: [3, 7, 11]) + resblock_dilation_sizes: list[list[int]] = field(default_factory=lambda: [[1, 3, 5], [1, 3, 5], [1, 3, 5]]) + upsample_rates: list[int] = field(default_factory=lambda: [8, 8, 2, 2]) + upsample_initial_channel: int = field(default=512) + upsample_kernel_sizes: list[int] = field(default_factory=lambda: [16, 16, 4, 4]) + n_layers_q: int = field(default=3) + use_spectral_norm: bool = field(default=False) + gin_channels: int = field(default=256) + tau: float = field(default=0.3) + + +@dataclass +class OpenVoiceConfig(BaseVCConfig): + """Defines parameters for OpenVoice VC model. + + Args: + model (str): + Model name. Do not change unless you know what you are doing. + + model_args (OpenVoiceArgs): + Model architecture arguments. Defaults to `OpenVoiceArgs()`. + + audio (OpenVoiceAudioConfig): + Audio processing configuration. Defaults to `OpenVoiceAudioConfig()`. + + return_wav (bool): + If true, data loader returns the waveform as well as the other outputs. Do not change. Defaults to `True`. + + compute_linear_spec (bool): + If true, the linear spectrogram is computed and returned alongside the mel output. Do not change. Defaults to `True`. + + use_weighted_sampler (bool): + If true, use weighted sampler with bucketing for balancing samples between datasets used in training. Defaults to `False`. + + weighted_sampler_attrs (dict): + Key retuned by the formatter to be used for weighted sampler. For example `{"root_path": 2.0, "speaker_name": 1.0}` sets sample probabilities + by overweighting `root_path` by 2.0. Defaults to `{}`. + + weighted_sampler_multipliers (dict): + Weight each unique value of a key returned by the formatter for weighted sampling. + For example `{"root_path":{"/raid/datasets/libritts-clean-16khz-bwe-coqui_44khz/LibriTTS/train-clean-100/":1.0, "/raid/datasets/libritts-clean-16khz-bwe-coqui_44khz/LibriTTS/train-clean-360/": 0.5}`. + It will sample instances from `train-clean-100` 2 times more than `train-clean-360`. Defaults to `{}`. + + r (int): + Number of spectrogram frames to be generated at a time. Do not change. Defaults to `1`. + + add_blank (bool): + If true, a blank token is added in between every character. Defaults to `True`. + + Note: + Check :class:`TTS.tts.configs.shared_configs.BaseVCConfig` for the inherited parameters. + + Example: + + >>> from TTS.vc.configs.openvoice_config import OpenVoiceConfig + >>> config = OpenVoiceConfig() + """ + + model: str = "openvoice" + # model specific params + model_args: OpenVoiceArgs = field(default_factory=OpenVoiceArgs) + audio: OpenVoiceAudioConfig = field(default_factory=OpenVoiceAudioConfig) + + # optimizer + # TODO with training support + + # loss params + # TODO with training support + + # data loader params + return_wav: bool = True + compute_linear_spec: bool = True + + # sampler params + use_weighted_sampler: bool = False # TODO: move it to the base config + weighted_sampler_attrs: dict = field(default_factory=lambda: {}) + weighted_sampler_multipliers: dict = field(default_factory=lambda: {}) + + # overrides + r: int = 1 # DO NOT CHANGE + add_blank: bool = True + + # multi-speaker settings + # use speaker embedding layer + num_speakers: int = 0 + speakers_file: Optional[str] = None + speaker_embedding_channels: int = 256 + + # use d-vectors + use_d_vector_file: bool = False + d_vector_file: Optional[list[str]] = None + d_vector_dim: Optional[int] = None + + def __post_init__(self) -> None: + for key, val in self.model_args.items(): + if hasattr(self, key): + self[key] = val From ca02d0352bd5c9118d56fdbb06aef1c15782cf11 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 13 Nov 2024 19:47:32 +0100 Subject: [PATCH 217/255] feat(openvoice): add to .models.json --- TTS/.models.json | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/TTS/.models.json b/TTS/.models.json index 7c3a498bff..36654d0555 100644 --- a/TTS/.models.json +++ b/TTS/.models.json @@ -931,6 +931,28 @@ "license": "MIT", "commit": null } + }, + "multi-dataset": { + "openvoice_v1": { + "hf_url": [ + "https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/config.json", + "https://huggingface.co/myshell-ai/OpenVoice/resolve/main/checkpoints/converter/checkpoint.pth" + ], + "description": "OpenVoice VC model from https://huggingface.co/myshell-ai/OpenVoiceV2", + "author": "MyShell.ai", + "license": "MIT", + "commit": null + }, + "openvoice_v2": { + "hf_url": [ + "https://huggingface.co/myshell-ai/OpenVoiceV2/resolve/main/converter/config.json", + "https://huggingface.co/myshell-ai/OpenVoiceV2/resolve/main/converter/checkpoint.pth" + ], + "description": "OpenVoice VC model from https://huggingface.co/myshell-ai/OpenVoiceV2", + "author": "MyShell.ai", + "license": "MIT", + "commit": null + } } } } From 1a21853b9022596ba1e609b687e278ec0beed0d8 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 13 Nov 2024 19:58:30 +0100 Subject: [PATCH 218/255] ci: validate .models.json file --- .pre-commit-config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 92f6f3ab3c..62420e9958 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,6 +2,8 @@ repos: - repo: "https://github.com/pre-commit/pre-commit-hooks" rev: v5.0.0 hooks: + - id: check-json + files: "TTS/.models.json" - id: check-yaml - id: end-of-file-fixer - id: trailing-whitespace From fce3137e0d0d0cad101bd0264673dc60447c3a8a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 25 Jun 2024 23:01:47 +0200 Subject: [PATCH 219/255] feat: add openvoice vc model --- README.md | 1 + TTS/api.py | 6 +- TTS/utils/manage.py | 2 +- TTS/utils/synthesizer.py | 21 +- TTS/vc/models/openvoice.py | 320 +++++++++++++++++++++++++++ TTS/vc/modules/openvoice/__init__.py | 0 TTS/vc/modules/openvoice/models.py | 134 ----------- 7 files changed, 346 insertions(+), 138 deletions(-) create mode 100644 TTS/vc/models/openvoice.py delete mode 100644 TTS/vc/modules/openvoice/__init__.py delete mode 100644 TTS/vc/modules/openvoice/models.py diff --git a/README.md b/README.md index 5ca825b6ba..381a8e95f2 100644 --- a/README.md +++ b/README.md @@ -121,6 +121,7 @@ repository are also still a useful source of information. ### Voice Conversion - FreeVC: [paper](https://arxiv.org/abs/2210.15418) +- OpenVoice: [technical report](https://arxiv.org/abs/2312.01479) You can also help us implement more models. diff --git a/TTS/api.py b/TTS/api.py index 250ed1a0d9..12e82af52c 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -155,8 +155,10 @@ def load_vc_model_by_name(self, model_name: str, gpu: bool = False): gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False. """ self.model_name = model_name - model_path, config_path, _, _, _ = self.download_model_by_name(model_name) - self.voice_converter = Synthesizer(vc_checkpoint=model_path, vc_config=config_path, use_cuda=gpu) + model_path, config_path, _, _, model_dir = self.download_model_by_name(model_name) + self.voice_converter = Synthesizer( + vc_checkpoint=model_path, vc_config=config_path, model_dir=model_dir, use_cuda=gpu + ) def load_tts_model_by_name(self, model_name: str, gpu: bool = False): """Load one of 🐸TTS models by name. diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index bd445b3a2f..38fcfd60e9 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -424,7 +424,7 @@ def _find_files(output_path: str) -> Tuple[str, str]: model_file = None config_file = None for file_name in os.listdir(output_path): - if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth"]: + if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth", "checkpoint.pth"]: model_file = os.path.join(output_path, file_name) elif file_name == "config.json": config_file = os.path.join(output_path, file_name) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 90af4f48f9..a158df60e1 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -1,6 +1,7 @@ import logging import os import time +from pathlib import Path from typing import List import numpy as np @@ -15,7 +16,9 @@ from TTS.tts.utils.synthesis import synthesis, transfer_voice, trim_silence from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import save_wav +from TTS.vc.configs.openvoice_config import OpenVoiceConfig from TTS.vc.models import setup_model as setup_vc_model +from TTS.vc.models.openvoice import OpenVoice from TTS.vocoder.models import setup_model as setup_vocoder_model from TTS.vocoder.utils.generic_utils import interpolate_vocoder_input @@ -97,7 +100,7 @@ def __init__( self._load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda) self.output_sample_rate = self.vocoder_config.audio["sample_rate"] - if vc_checkpoint: + if vc_checkpoint and model_dir is None: self._load_vc(vc_checkpoint, vc_config, use_cuda) self.output_sample_rate = self.vc_config.audio["output_sample_rate"] @@ -105,6 +108,9 @@ def __init__( if "fairseq" in model_dir: self._load_fairseq_from_dir(model_dir, use_cuda) self.output_sample_rate = self.tts_config.audio["sample_rate"] + elif "openvoice" in model_dir: + self._load_openvoice_from_dir(Path(model_dir), use_cuda) + self.output_sample_rate = self.vc_config.audio["output_sample_rate"] else: self._load_tts_from_dir(model_dir, use_cuda) self.output_sample_rate = self.tts_config.audio["output_sample_rate"] @@ -153,6 +159,19 @@ def _load_fairseq_from_dir(self, model_dir: str, use_cuda: bool) -> None: if use_cuda: self.tts_model.cuda() + def _load_openvoice_from_dir(self, checkpoint: Path, use_cuda: bool) -> None: + """Load the OpenVoice model from a directory. + + We assume the model knows how to load itself from the directory and + there is a config.json file in the directory. + """ + self.vc_config = OpenVoiceConfig() + self.vc_model = OpenVoice.init_from_config(self.vc_config) + self.vc_model.load_checkpoint(self.vc_config, checkpoint, eval=True) + self.vc_config = self.vc_model.config + if use_cuda: + self.vc_model.cuda() + def _load_tts_from_dir(self, model_dir: str, use_cuda: bool) -> None: """Load the TTS model from a directory. diff --git a/TTS/vc/models/openvoice.py b/TTS/vc/models/openvoice.py new file mode 100644 index 0000000000..135b0861b9 --- /dev/null +++ b/TTS/vc/models/openvoice.py @@ -0,0 +1,320 @@ +import json +import logging +import os +from pathlib import Path +from typing import Any, Mapping, Optional, Union + +import librosa +import numpy as np +import numpy.typing as npt +import torch +from coqpit import Coqpit +from torch import nn +from torch.nn import functional as F +from trainer.io import load_fsspec + +from TTS.tts.layers.vits.networks import PosteriorEncoder +from TTS.tts.utils.speakers import SpeakerManager +from TTS.utils.audio.torch_transforms import wav_to_spec +from TTS.vc.configs.openvoice_config import OpenVoiceConfig +from TTS.vc.models.base_vc import BaseVC +from TTS.vc.models.freevc import Generator, ResidualCouplingBlock + +logger = logging.getLogger(__name__) + + +class ReferenceEncoder(nn.Module): + """NN module creating a fixed size prosody embedding from a spectrogram. + + inputs: mel spectrograms [batch_size, num_spec_frames, num_mel] + outputs: [batch_size, embedding_dim] + """ + + def __init__(self, spec_channels: int, embedding_dim: int = 0, layernorm: bool = True) -> None: + super().__init__() + self.spec_channels = spec_channels + ref_enc_filters = [32, 32, 64, 64, 128, 128] + K = len(ref_enc_filters) + filters = [1] + ref_enc_filters + convs = [ + torch.nn.utils.parametrizations.weight_norm( + nn.Conv2d( + in_channels=filters[i], + out_channels=filters[i + 1], + kernel_size=(3, 3), + stride=(2, 2), + padding=(1, 1), + ) + ) + for i in range(K) + ] + self.convs = nn.ModuleList(convs) + + out_channels = self.calculate_channels(spec_channels, 3, 2, 1, K) + self.gru = nn.GRU( + input_size=ref_enc_filters[-1] * out_channels, + hidden_size=256 // 2, + batch_first=True, + ) + self.proj = nn.Linear(128, embedding_dim) + self.layernorm = nn.LayerNorm(self.spec_channels) if layernorm else None + + def forward(self, inputs: torch.Tensor) -> torch.Tensor: + N = inputs.size(0) + + out = inputs.view(N, 1, -1, self.spec_channels) # [N, 1, Ty, n_freqs] + if self.layernorm is not None: + out = self.layernorm(out) + + for conv in self.convs: + out = conv(out) + out = F.relu(out) # [N, 128, Ty//2^K, n_mels//2^K] + + out = out.transpose(1, 2) # [N, Ty//2^K, 128, n_mels//2^K] + T = out.size(1) + N = out.size(0) + out = out.contiguous().view(N, T, -1) # [N, Ty//2^K, 128*n_mels//2^K] + + self.gru.flatten_parameters() + _memory, out = self.gru(out) # out --- [1, N, 128] + + return self.proj(out.squeeze(0)) + + def calculate_channels(self, L: int, kernel_size: int, stride: int, pad: int, n_convs: int) -> int: + for _ in range(n_convs): + L = (L - kernel_size + 2 * pad) // stride + 1 + return L + + +class OpenVoice(BaseVC): + """ + OpenVoice voice conversion model (inference only). + + Source: https://github.com/myshell-ai/OpenVoice + Paper: https://arxiv.org/abs/2312.01479 + + Paper abstract: + We introduce OpenVoice, a versatile voice cloning approach that requires + only a short audio clip from the reference speaker to replicate their voice and + generate speech in multiple languages. OpenVoice represents a significant + advancement in addressing the following open challenges in the field: 1) + Flexible Voice Style Control. OpenVoice enables granular control over voice + styles, including emotion, accent, rhythm, pauses, and intonation, in addition + to replicating the tone color of the reference speaker. The voice styles are not + directly copied from and constrained by the style of the reference speaker. + Previous approaches lacked the ability to flexibly manipulate voice styles after + cloning. 2) Zero-Shot Cross-Lingual Voice Cloning. OpenVoice achieves zero-shot + cross-lingual voice cloning for languages not included in the massive-speaker + training set. Unlike previous approaches, which typically require extensive + massive-speaker multi-lingual (MSML) dataset for all languages, OpenVoice can + clone voices into a new language without any massive-speaker training data for + that language. OpenVoice is also computationally efficient, costing tens of + times less than commercially available APIs that offer even inferior + performance. To foster further research in the field, we have made the source + code and trained model publicly accessible. We also provide qualitative results + in our demo website. Prior to its public release, our internal version of + OpenVoice was used tens of millions of times by users worldwide between May and + October 2023, serving as the backend of MyShell. + """ + + def __init__(self, config: Coqpit, speaker_manager: Optional[SpeakerManager] = None) -> None: + super().__init__(config, None, speaker_manager, None) + + self.init_multispeaker(config) + + self.zero_g = self.args.zero_g + self.inter_channels = self.args.inter_channels + self.hidden_channels = self.args.hidden_channels + self.filter_channels = self.args.filter_channels + self.n_heads = self.args.n_heads + self.n_layers = self.args.n_layers + self.kernel_size = self.args.kernel_size + self.p_dropout = self.args.p_dropout + self.resblock = self.args.resblock + self.resblock_kernel_sizes = self.args.resblock_kernel_sizes + self.resblock_dilation_sizes = self.args.resblock_dilation_sizes + self.upsample_rates = self.args.upsample_rates + self.upsample_initial_channel = self.args.upsample_initial_channel + self.upsample_kernel_sizes = self.args.upsample_kernel_sizes + self.n_layers_q = self.args.n_layers_q + self.use_spectral_norm = self.args.use_spectral_norm + self.gin_channels = self.args.gin_channels + self.tau = self.args.tau + + self.spec_channels = config.audio.fft_size // 2 + 1 + + self.dec = Generator( + self.inter_channels, + self.resblock, + self.resblock_kernel_sizes, + self.resblock_dilation_sizes, + self.upsample_rates, + self.upsample_initial_channel, + self.upsample_kernel_sizes, + gin_channels=self.gin_channels, + ) + self.enc_q = PosteriorEncoder( + self.spec_channels, + self.inter_channels, + self.hidden_channels, + kernel_size=5, + dilation_rate=1, + num_layers=16, + cond_channels=self.gin_channels, + ) + + self.flow = ResidualCouplingBlock( + self.inter_channels, + self.hidden_channels, + kernel_size=5, + dilation_rate=1, + n_layers=4, + gin_channels=self.gin_channels, + ) + + self.ref_enc = ReferenceEncoder(self.spec_channels, self.gin_channels) + + @property + def device(self) -> torch.device: + return next(self.parameters()).device + + @staticmethod + def init_from_config(config: OpenVoiceConfig) -> "OpenVoice": + return OpenVoice(config) + + def init_multispeaker(self, config: Coqpit, data: Optional[list[Any]] = None) -> None: + """Initialize multi-speaker modules of a model. A model can be trained either with a speaker embedding layer + or with external `d_vectors` computed from a speaker encoder model. + + You must provide a `speaker_manager` at initialization to set up the multi-speaker modules. + + Args: + config (Coqpit): Model configuration. + data (list, optional): Dataset items to infer number of speakers. Defaults to None. + """ + self.num_spks = config.num_speakers + if self.speaker_manager: + self.num_spks = self.speaker_manager.num_speakers + + def load_checkpoint( + self, + config: OpenVoiceConfig, + checkpoint_path: Union[str, os.PathLike[Any]], + eval: bool = False, + strict: bool = True, + cache: bool = False, + ) -> None: + """Map from OpenVoice's config structure.""" + config_path = Path(checkpoint_path).parent / "config.json" + with open(config_path, encoding="utf-8") as f: + config_org = json.load(f) + self.config.audio.input_sample_rate = config_org["data"]["sampling_rate"] + self.config.audio.output_sample_rate = config_org["data"]["sampling_rate"] + self.config.audio.fft_size = config_org["data"]["filter_length"] + self.config.audio.hop_length = config_org["data"]["hop_length"] + self.config.audio.win_length = config_org["data"]["win_length"] + state = load_fsspec(str(checkpoint_path), map_location=torch.device("cpu"), cache=cache) + self.load_state_dict(state["model"], strict=strict) + if eval: + self.eval() + + def forward(self) -> None: ... + def train_step(self) -> None: ... + def eval_step(self) -> None: ... + + @staticmethod + def _set_x_lengths(x: torch.Tensor, aux_input: Mapping[str, Optional[torch.Tensor]]) -> torch.Tensor: + if "x_lengths" in aux_input and aux_input["x_lengths"] is not None: + return aux_input["x_lengths"] + return torch.tensor(x.shape[1:2]).to(x.device) + + @torch.no_grad() + def inference( + self, + x: torch.Tensor, + aux_input: Mapping[str, Optional[torch.Tensor]] = {"x_lengths": None, "g_src": None, "g_tgt": None}, + ) -> dict[str, torch.Tensor]: + """ + Inference pass of the model + + Args: + x (torch.Tensor): Input tensor. Shape: (batch_size, c_seq_len). + x_lengths (torch.Tensor): Lengths of the input tensor. Shape: (batch_size,). + g_src (torch.Tensor): Source speaker embedding tensor. Shape: (batch_size, spk_emb_dim). + g_tgt (torch.Tensor): Target speaker embedding tensor. Shape: (batch_size, spk_emb_dim). + + Returns: + o_hat: Output spectrogram tensor. Shape: (batch_size, spec_seq_len, spec_dim). + x_mask: Spectrogram mask. Shape: (batch_size, spec_seq_len). + (z, z_p, z_hat): A tuple of latent variables. + """ + x_lengths = self._set_x_lengths(x, aux_input) + if "g_src" in aux_input and aux_input["g_src"] is not None: + g_src = aux_input["g_src"] + else: + raise ValueError("aux_input must define g_src") + if "g_tgt" in aux_input and aux_input["g_tgt"] is not None: + g_tgt = aux_input["g_tgt"] + else: + raise ValueError("aux_input must define g_tgt") + z, _m_q, _logs_q, y_mask = self.enc_q( + x, x_lengths, g=g_src if not self.zero_g else torch.zeros_like(g_src), tau=self.tau + ) + z_p = self.flow(z, y_mask, g=g_src) + z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True) + o_hat = self.dec(z_hat * y_mask, g=g_tgt if not self.zero_g else torch.zeros_like(g_tgt)) + return { + "model_outputs": o_hat, + "y_mask": y_mask, + "z": z, + "z_p": z_p, + "z_hat": z_hat, + } + + def load_audio(self, wav: Union[str, npt.NDArray[np.float32], torch.Tensor, list[float]]) -> torch.Tensor: + """Read and format the input audio.""" + if isinstance(wav, str): + out = torch.from_numpy(librosa.load(wav, sr=self.config.audio.input_sample_rate)[0]) + elif isinstance(wav, np.ndarray): + out = torch.from_numpy(wav) + elif isinstance(wav, list): + out = torch.from_numpy(np.array(wav)) + else: + out = wav + return out.to(self.device).float() + + def extract_se(self, audio: Union[str, torch.Tensor]) -> tuple[torch.Tensor, torch.Tensor]: + audio_ref = self.load_audio(audio) + y = torch.FloatTensor(audio_ref) + y = y.to(self.device) + y = y.unsqueeze(0) + spec = wav_to_spec( + y, + n_fft=self.config.audio.fft_size, + hop_length=self.config.audio.hop_length, + win_length=self.config.audio.win_length, + center=False, + ).to(self.device) + with torch.no_grad(): + g = self.ref_enc(spec.transpose(1, 2)).unsqueeze(-1) + + return g, spec + + @torch.inference_mode() + def voice_conversion(self, src: Union[str, torch.Tensor], tgt: Union[str, torch.Tensor]) -> npt.NDArray[np.float32]: + """ + Voice conversion pass of the model. + + Args: + src (str or torch.Tensor): Source utterance. + tgt (str or torch.Tensor): Target utterance. + + Returns: + Output numpy array. + """ + src_se, src_spec = self.extract_se(src) + tgt_se, _ = self.extract_se(tgt) + + aux_input = {"g_src": src_se, "g_tgt": tgt_se} + audio = self.inference(src_spec, aux_input) + return audio["model_outputs"][0, 0].data.cpu().float().numpy() diff --git a/TTS/vc/modules/openvoice/__init__.py b/TTS/vc/modules/openvoice/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/TTS/vc/modules/openvoice/models.py b/TTS/vc/modules/openvoice/models.py deleted file mode 100644 index 89a1c3a40c..0000000000 --- a/TTS/vc/modules/openvoice/models.py +++ /dev/null @@ -1,134 +0,0 @@ -import torch -from torch import nn -from torch.nn import functional as F - -from TTS.tts.layers.vits.networks import PosteriorEncoder -from TTS.vc.models.freevc import Generator, ResidualCouplingBlock - - -class ReferenceEncoder(nn.Module): - """ - inputs --- [N, Ty/r, n_mels*r] mels - outputs --- [N, ref_enc_gru_size] - """ - - def __init__(self, spec_channels, gin_channels=0, layernorm=True): - super().__init__() - self.spec_channels = spec_channels - ref_enc_filters = [32, 32, 64, 64, 128, 128] - K = len(ref_enc_filters) - filters = [1] + ref_enc_filters - convs = [ - torch.nn.utils.parametrizations.weight_norm( - nn.Conv2d( - in_channels=filters[i], - out_channels=filters[i + 1], - kernel_size=(3, 3), - stride=(2, 2), - padding=(1, 1), - ) - ) - for i in range(K) - ] - self.convs = nn.ModuleList(convs) - - out_channels = self.calculate_channels(spec_channels, 3, 2, 1, K) - self.gru = nn.GRU( - input_size=ref_enc_filters[-1] * out_channels, - hidden_size=256 // 2, - batch_first=True, - ) - self.proj = nn.Linear(128, gin_channels) - if layernorm: - self.layernorm = nn.LayerNorm(self.spec_channels) - else: - self.layernorm = None - - def forward(self, inputs): - N = inputs.size(0) - - out = inputs.view(N, 1, -1, self.spec_channels) # [N, 1, Ty, n_freqs] - if self.layernorm is not None: - out = self.layernorm(out) - - for conv in self.convs: - out = conv(out) - out = F.relu(out) # [N, 128, Ty//2^K, n_mels//2^K] - - out = out.transpose(1, 2) # [N, Ty//2^K, 128, n_mels//2^K] - T = out.size(1) - N = out.size(0) - out = out.contiguous().view(N, T, -1) # [N, Ty//2^K, 128*n_mels//2^K] - - self.gru.flatten_parameters() - _memory, out = self.gru(out) # out --- [1, N, 128] - - return self.proj(out.squeeze(0)) - - def calculate_channels(self, L, kernel_size, stride, pad, n_convs): - for _ in range(n_convs): - L = (L - kernel_size + 2 * pad) // stride + 1 - return L - - -class SynthesizerTrn(nn.Module): - """ - Synthesizer for Training - """ - - def __init__( - self, - spec_channels, - inter_channels, - hidden_channels, - resblock, - resblock_kernel_sizes, - resblock_dilation_sizes, - upsample_rates, - upsample_initial_channel, - upsample_kernel_sizes, - n_speakers=0, - gin_channels=256, - zero_g=False, - **kwargs, - ): - super().__init__() - - self.dec = Generator( - inter_channels, - resblock, - resblock_kernel_sizes, - resblock_dilation_sizes, - upsample_rates, - upsample_initial_channel, - upsample_kernel_sizes, - gin_channels=gin_channels, - ) - self.enc_q = PosteriorEncoder( - spec_channels, - inter_channels, - hidden_channels, - 5, - 1, - 16, - cond_channels=gin_channels, - ) - - self.flow = ResidualCouplingBlock(inter_channels, hidden_channels, 5, 1, 4, gin_channels=gin_channels) - - self.n_speakers = n_speakers - if n_speakers != 0: - raise ValueError("OpenVoice inference only supports n_speaker==0") - self.ref_enc = ReferenceEncoder(spec_channels, gin_channels) - self.zero_g = zero_g - - def voice_conversion(self, y, y_lengths, sid_src, sid_tgt, tau=1.0): - g_src = sid_src - g_tgt = sid_tgt - z, m_q, logs_q, y_mask = self.enc_q( - y, y_lengths, g=g_src if not self.zero_g else torch.zeros_like(g_src), tau=tau - ) - z_p = self.flow(z, y_mask, g=g_src) - z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True) - o_hat = self.dec(z_hat * y_mask, g=g_tgt if not self.zero_g else torch.zeros_like(g_tgt)) - return o_hat, y_mask, (z, z_p, z_hat) From d488441b756570ff4b82c1fe5e27d4406bf553a7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 13 Nov 2024 22:55:46 +0100 Subject: [PATCH 220/255] test(freevc): remove unused code --- tests/vc_tests/test_freevc.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/tests/vc_tests/test_freevc.py b/tests/vc_tests/test_freevc.py index c90551b494..914237b520 100644 --- a/tests/vc_tests/test_freevc.py +++ b/tests/vc_tests/test_freevc.py @@ -22,15 +22,12 @@ class TestFreeVC(unittest.TestCase): def _create_inputs(self, config, batch_size=2): - input_dummy = torch.rand(batch_size, 30 * config.audio["hop_length"]).to(device) - input_lengths = torch.randint(100, 30 * config.audio["hop_length"], (batch_size,)).long().to(device) - input_lengths[-1] = 30 * config.audio["hop_length"] spec = torch.rand(batch_size, 30, config.audio["filter_length"] // 2 + 1).to(device) mel = torch.rand(batch_size, 30, config.audio["n_mel_channels"]).to(device) spec_lengths = torch.randint(20, 30, (batch_size,)).long().to(device) spec_lengths[-1] = spec.size(2) waveform = torch.rand(batch_size, spec.size(2) * config.audio["hop_length"]).to(device) - return input_dummy, input_lengths, mel, spec, spec_lengths, waveform + return mel, spec, spec_lengths, waveform @staticmethod def _create_inputs_inference(): @@ -38,15 +35,6 @@ def _create_inputs_inference(): target_wav = torch.rand(16000) return source_wav, target_wav - @staticmethod - def _check_parameter_changes(model, model_ref): - count = 0 - for param, param_ref in zip(model.parameters(), model_ref.parameters()): - assert (param != param_ref).any(), "param {} with shape {} not updated!! \n{}\n{}".format( - count, param.shape, param, param_ref - ) - count += 1 - def test_methods(self): config = FreeVCConfig() model = FreeVC(config).to(device) @@ -69,7 +57,7 @@ def _test_forward(self, batch_size): model.train() print(" > Num parameters for FreeVC model:%s" % (count_parameters(model))) - _, _, mel, spec, spec_lengths, waveform = self._create_inputs(config, batch_size) + mel, spec, spec_lengths, waveform = self._create_inputs(config, batch_size) wavlm_vec = model.extract_wavlm_features(waveform) wavlm_vec_lengths = torch.ones(batch_size, dtype=torch.long) @@ -86,7 +74,7 @@ def _test_inference(self, batch_size): model = FreeVC(config).to(device) model.eval() - _, _, mel, _, _, waveform = self._create_inputs(config, batch_size) + mel, _, _, waveform = self._create_inputs(config, batch_size) wavlm_vec = model.extract_wavlm_features(waveform) wavlm_vec_lengths = torch.ones(batch_size, dtype=torch.long) From 6927e0bb89f0c76dbbf5d14716cebd72ee13b2a5 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 29 Nov 2024 16:17:02 +0100 Subject: [PATCH 221/255] fix(api): clearer error message when model doesn't support VC --- TTS/api.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 12e82af52c..ed82825007 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -357,15 +357,17 @@ def voice_conversion( target_wav (str):` Path to the target wav file. """ - wav = self.voice_converter.voice_conversion(source_wav=source_wav, target_wav=target_wav) - return wav + if self.voice_converter is None: + msg = "The selected model does not support voice conversion." + raise RuntimeError(msg) + return self.voice_converter.voice_conversion(source_wav=source_wav, target_wav=target_wav) def voice_conversion_to_file( self, source_wav: str, target_wav: str, file_path: str = "output.wav", - ): + ) -> str: """Voice conversion with FreeVC. Convert source wav to target speaker. Args: From 546f43cb254793366f996deab33eb1cc88e915bd Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 29 Nov 2024 16:27:14 +0100 Subject: [PATCH 222/255] refactor: only use keyword args in Synthesizer --- TTS/bin/synthesize.py | 24 +++++++++++------------ TTS/utils/synthesizer.py | 1 + tests/inference_tests/test_synthesizer.py | 2 +- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 20e429df04..454f528ab4 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -407,18 +407,18 @@ def main(): # load models synthesizer = Synthesizer( - tts_path, - tts_config_path, - speakers_file_path, - language_ids_file_path, - vocoder_path, - vocoder_config_path, - encoder_path, - encoder_config_path, - vc_path, - vc_config_path, - model_dir, - args.voice_dir, + tts_checkpoint=tts_path, + tts_config_path=tts_config_path, + tts_speakers_file=speakers_file_path, + tts_languages_file=language_ids_file_path, + vocoder_checkpoint=vocoder_path, + vocoder_config=vocoder_config_path, + encoder_checkpoint=encoder_path, + encoder_config=encoder_config_path, + vc_checkpoint=vc_path, + vc_config=vc_config_path, + model_dir=model_dir, + voice_dir=args.voice_dir, ).to(device) # query speaker ids of a multi-speaker model. diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index a158df60e1..73f596d167 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -28,6 +28,7 @@ class Synthesizer(nn.Module): def __init__( self, + *, tts_checkpoint: str = "", tts_config_path: str = "", tts_speakers_file: str = "", diff --git a/tests/inference_tests/test_synthesizer.py b/tests/inference_tests/test_synthesizer.py index ce4fc751c2..21cc194131 100644 --- a/tests/inference_tests/test_synthesizer.py +++ b/tests/inference_tests/test_synthesizer.py @@ -23,7 +23,7 @@ def test_in_out(self): tts_root_path = get_tests_input_path() tts_checkpoint = os.path.join(tts_root_path, "checkpoint_10.pth") tts_config = os.path.join(tts_root_path, "dummy_model_config.json") - synthesizer = Synthesizer(tts_checkpoint, tts_config, None, None) + synthesizer = Synthesizer(tts_checkpoint=tts_checkpoint, tts_config_path=tts_config) synthesizer.tts("Better this test works!!") def test_split_into_sentences(self): From 9ef2c7ed624fda8ac8052ea3824132b5ab6b4481 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 2 Dec 2024 00:09:39 +0100 Subject: [PATCH 223/255] test(freevc): fix output length check --- tests/vc_tests/test_freevc.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/vc_tests/test_freevc.py b/tests/vc_tests/test_freevc.py index 914237b520..fe07b2723c 100644 --- a/tests/vc_tests/test_freevc.py +++ b/tests/vc_tests/test_freevc.py @@ -31,7 +31,7 @@ def _create_inputs(self, config, batch_size=2): @staticmethod def _create_inputs_inference(): - source_wav = torch.rand(16000) + source_wav = torch.rand(15999) target_wav = torch.rand(16000) return source_wav, target_wav @@ -96,8 +96,8 @@ def test_voice_conversion(self): source_wav, target_wav = self._create_inputs_inference() output_wav = model.voice_conversion(source_wav, target_wav) assert ( - output_wav.shape[0] + config.audio.hop_length == source_wav.shape[0] - ), f"{output_wav.shape} != {source_wav.shape}" + output_wav.shape[0] == source_wav.shape[0] - source_wav.shape[0] % config.audio.hop_length + ), f"{output_wav.shape} != {source_wav.shape}, {config.audio.hop_length}" def test_train_step(self): ... From 5f8ad4c64b26960dad6b1399deae5f9a0a4aade2 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 29 Nov 2024 17:23:30 +0100 Subject: [PATCH 224/255] test(openvoice): add sanity check --- tests/vc_tests/test_openvoice.py | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/vc_tests/test_openvoice.py diff --git a/tests/vc_tests/test_openvoice.py b/tests/vc_tests/test_openvoice.py new file mode 100644 index 0000000000..c9f7ae3931 --- /dev/null +++ b/tests/vc_tests/test_openvoice.py @@ -0,0 +1,42 @@ +import os +import unittest + +import torch + +from tests import get_tests_input_path +from TTS.vc.models.openvoice import OpenVoice, OpenVoiceConfig + +torch.manual_seed(1) +use_cuda = torch.cuda.is_available() +device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + +c = OpenVoiceConfig() + +WAV_FILE = os.path.join(get_tests_input_path(), "example_1.wav") + + +class TestOpenVoice(unittest.TestCase): + + @staticmethod + def _create_inputs_inference(): + source_wav = torch.rand(16100) + target_wav = torch.rand(16000) + return source_wav, target_wav + + def test_load_audio(self): + config = OpenVoiceConfig() + model = OpenVoice(config).to(device) + wav = model.load_audio(WAV_FILE) + wav2 = model.load_audio(wav) + assert all(torch.isclose(wav, wav2)) + + def test_voice_conversion(self): + config = OpenVoiceConfig() + model = OpenVoice(config).to(device) + model.eval() + + source_wav, target_wav = self._create_inputs_inference() + output_wav = model.voice_conversion(source_wav, target_wav) + assert ( + output_wav.shape[0] == source_wav.shape[0] - source_wav.shape[0] % config.audio.hop_length + ), f"{output_wav.shape} != {source_wav.shape}" From 32c99e8e66d06055ddb44a321481447bffcf8bb1 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 13 Jun 2024 16:35:59 +0200 Subject: [PATCH 225/255] docs(readme): mention openvoice vc --- README.md | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 381a8e95f2..7dddf3a37b 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,12 @@ ## 🐸Coqui TTS News - 📣 Fork of the [original, unmaintained repository](https://github.com/coqui-ai/TTS). New PyPI package: [coqui-tts](https://pypi.org/project/coqui-tts) +- 📣 [OpenVoice](https://github.com/myshell-ai/OpenVoice) models now available for voice conversion. - 📣 Prebuilt wheels are now also published for Mac and Windows (in addition to Linux as before) for easier installation across platforms. -- 📣 ⓍTTSv2 is here with 16 languages and better performance across the board. +- 📣 ⓍTTSv2 is here with 17 languages and better performance across the board. ⓍTTS can stream with <200ms latency. - 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). -- 📣 ⓍTTS can now stream with <200ms latency. -- 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://coqui-tts.readthedocs.io/en/latest/models/xtts.html) - 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/latest/models/bark.html) -- 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. +- 📣 You can use [Fairseq models in ~1100 languages](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. ## @@ -245,8 +244,14 @@ tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progr tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav") ``` -#### Example voice cloning together with the voice conversion model. -This way, you can clone voices by using any model in 🐸TTS. +Other available voice conversion models: +- `voice_conversion_models/multilingual/multi-dataset/openvoice_v1` +- `voice_conversion_models/multilingual/multi-dataset/openvoice_v2` + +#### Example voice cloning together with the default voice conversion model. + +This way, you can clone voices by using any model in 🐸TTS. The FreeVC model is +used for voice conversion after synthesizing speech. ```python @@ -413,4 +418,6 @@ $ tts --out_path output/path/speech.wav --model_name "// Date: Mon, 2 Dec 2024 00:16:39 +0100 Subject: [PATCH 226/255] refactor(vc): rename TTS.vc.modules to TTS.vc.layers for consistency Same as in TTS.tts and TTS.vocoder --- TTS/vc/{modules => layers}/__init__.py | 0 TTS/vc/{modules => layers}/freevc/__init__.py | 0 TTS/vc/{modules => layers}/freevc/commons.py | 0 TTS/vc/{modules => layers}/freevc/mel_processing.py | 0 TTS/vc/{modules => layers}/freevc/modules.py | 2 +- .../freevc/speaker_encoder/__init__.py | 0 .../freevc/speaker_encoder/audio.py | 2 +- .../freevc/speaker_encoder/hparams.py | 0 .../freevc/speaker_encoder/speaker_encoder.py | 4 ++-- TTS/vc/{modules => layers}/freevc/wavlm/__init__.py | 2 +- TTS/vc/{modules => layers}/freevc/wavlm/config.json | 0 TTS/vc/{modules => layers}/freevc/wavlm/modules.py | 0 TTS/vc/{modules => layers}/freevc/wavlm/wavlm.py | 2 +- TTS/vc/models/freevc.py | 13 ++++++------- 14 files changed, 12 insertions(+), 13 deletions(-) rename TTS/vc/{modules => layers}/__init__.py (100%) rename TTS/vc/{modules => layers}/freevc/__init__.py (100%) rename TTS/vc/{modules => layers}/freevc/commons.py (100%) rename TTS/vc/{modules => layers}/freevc/mel_processing.py (100%) rename TTS/vc/{modules => layers}/freevc/modules.py (99%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/__init__.py (100%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/audio.py (97%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/hparams.py (100%) rename TTS/vc/{modules => layers}/freevc/speaker_encoder/speaker_encoder.py (98%) rename TTS/vc/{modules => layers}/freevc/wavlm/__init__.py (94%) rename TTS/vc/{modules => layers}/freevc/wavlm/config.json (100%) rename TTS/vc/{modules => layers}/freevc/wavlm/modules.py (100%) rename TTS/vc/{modules => layers}/freevc/wavlm/wavlm.py (99%) diff --git a/TTS/vc/modules/__init__.py b/TTS/vc/layers/__init__.py similarity index 100% rename from TTS/vc/modules/__init__.py rename to TTS/vc/layers/__init__.py diff --git a/TTS/vc/modules/freevc/__init__.py b/TTS/vc/layers/freevc/__init__.py similarity index 100% rename from TTS/vc/modules/freevc/__init__.py rename to TTS/vc/layers/freevc/__init__.py diff --git a/TTS/vc/modules/freevc/commons.py b/TTS/vc/layers/freevc/commons.py similarity index 100% rename from TTS/vc/modules/freevc/commons.py rename to TTS/vc/layers/freevc/commons.py diff --git a/TTS/vc/modules/freevc/mel_processing.py b/TTS/vc/layers/freevc/mel_processing.py similarity index 100% rename from TTS/vc/modules/freevc/mel_processing.py rename to TTS/vc/layers/freevc/mel_processing.py diff --git a/TTS/vc/modules/freevc/modules.py b/TTS/vc/layers/freevc/modules.py similarity index 99% rename from TTS/vc/modules/freevc/modules.py rename to TTS/vc/layers/freevc/modules.py index ea17be24d6..c34f22d701 100644 --- a/TTS/vc/modules/freevc/modules.py +++ b/TTS/vc/layers/freevc/modules.py @@ -7,7 +7,7 @@ from TTS.tts.layers.generic.normalization import LayerNorm2 from TTS.tts.layers.generic.wavenet import fused_add_tanh_sigmoid_multiply -from TTS.vc.modules.freevc.commons import init_weights +from TTS.vc.layers.freevc.commons import init_weights from TTS.vocoder.models.hifigan_generator import get_padding LRELU_SLOPE = 0.1 diff --git a/TTS/vc/modules/freevc/speaker_encoder/__init__.py b/TTS/vc/layers/freevc/speaker_encoder/__init__.py similarity index 100% rename from TTS/vc/modules/freevc/speaker_encoder/__init__.py rename to TTS/vc/layers/freevc/speaker_encoder/__init__.py diff --git a/TTS/vc/modules/freevc/speaker_encoder/audio.py b/TTS/vc/layers/freevc/speaker_encoder/audio.py similarity index 97% rename from TTS/vc/modules/freevc/speaker_encoder/audio.py rename to TTS/vc/layers/freevc/speaker_encoder/audio.py index 5b23a4dbb6..5fa317ce45 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/audio.py +++ b/TTS/vc/layers/freevc/speaker_encoder/audio.py @@ -5,7 +5,7 @@ import librosa import numpy as np -from TTS.vc.modules.freevc.speaker_encoder.hparams import ( +from TTS.vc.layers.freevc.speaker_encoder.hparams import ( audio_norm_target_dBFS, mel_n_channels, mel_window_length, diff --git a/TTS/vc/modules/freevc/speaker_encoder/hparams.py b/TTS/vc/layers/freevc/speaker_encoder/hparams.py similarity index 100% rename from TTS/vc/modules/freevc/speaker_encoder/hparams.py rename to TTS/vc/layers/freevc/speaker_encoder/hparams.py diff --git a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py b/TTS/vc/layers/freevc/speaker_encoder/speaker_encoder.py similarity index 98% rename from TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py rename to TTS/vc/layers/freevc/speaker_encoder/speaker_encoder.py index 294bf322cb..a6d5bcf942 100644 --- a/TTS/vc/modules/freevc/speaker_encoder/speaker_encoder.py +++ b/TTS/vc/layers/freevc/speaker_encoder/speaker_encoder.py @@ -7,8 +7,8 @@ from torch import nn from trainer.io import load_fsspec -from TTS.vc.modules.freevc.speaker_encoder import audio -from TTS.vc.modules.freevc.speaker_encoder.hparams import ( +from TTS.vc.layers.freevc.speaker_encoder import audio +from TTS.vc.layers.freevc.speaker_encoder.hparams import ( mel_n_channels, mel_window_step, model_embedding_size, diff --git a/TTS/vc/modules/freevc/wavlm/__init__.py b/TTS/vc/layers/freevc/wavlm/__init__.py similarity index 94% rename from TTS/vc/modules/freevc/wavlm/__init__.py rename to TTS/vc/layers/freevc/wavlm/__init__.py index 4046e137f5..62f7e74aaf 100644 --- a/TTS/vc/modules/freevc/wavlm/__init__.py +++ b/TTS/vc/layers/freevc/wavlm/__init__.py @@ -6,7 +6,7 @@ from trainer.io import get_user_data_dir from TTS.utils.generic_utils import is_pytorch_at_least_2_4 -from TTS.vc.modules.freevc.wavlm.wavlm import WavLM, WavLMConfig +from TTS.vc.layers.freevc.wavlm.wavlm import WavLM, WavLMConfig logger = logging.getLogger(__name__) diff --git a/TTS/vc/modules/freevc/wavlm/config.json b/TTS/vc/layers/freevc/wavlm/config.json similarity index 100% rename from TTS/vc/modules/freevc/wavlm/config.json rename to TTS/vc/layers/freevc/wavlm/config.json diff --git a/TTS/vc/modules/freevc/wavlm/modules.py b/TTS/vc/layers/freevc/wavlm/modules.py similarity index 100% rename from TTS/vc/modules/freevc/wavlm/modules.py rename to TTS/vc/layers/freevc/wavlm/modules.py diff --git a/TTS/vc/modules/freevc/wavlm/wavlm.py b/TTS/vc/layers/freevc/wavlm/wavlm.py similarity index 99% rename from TTS/vc/modules/freevc/wavlm/wavlm.py rename to TTS/vc/layers/freevc/wavlm/wavlm.py index 10dd09ed0c..775f3e5979 100644 --- a/TTS/vc/modules/freevc/wavlm/wavlm.py +++ b/TTS/vc/layers/freevc/wavlm/wavlm.py @@ -17,7 +17,7 @@ import torch.nn.functional as F from torch.nn import LayerNorm -from TTS.vc.modules.freevc.wavlm.modules import ( +from TTS.vc.layers.freevc.wavlm.modules import ( Fp32GroupNorm, Fp32LayerNorm, GLU_Linear, diff --git a/TTS/vc/models/freevc.py b/TTS/vc/models/freevc.py index 62559de534..c654219c39 100644 --- a/TTS/vc/models/freevc.py +++ b/TTS/vc/models/freevc.py @@ -12,17 +12,16 @@ from torch.nn.utils.parametrize import remove_parametrizations from trainer.io import load_fsspec -import TTS.vc.modules.freevc.commons as commons -import TTS.vc.modules.freevc.modules as modules +import TTS.vc.layers.freevc.modules as modules from TTS.tts.layers.vits.discriminator import DiscriminatorS from TTS.tts.utils.helpers import sequence_mask from TTS.tts.utils.speakers import SpeakerManager from TTS.vc.configs.freevc_config import FreeVCConfig +from TTS.vc.layers.freevc.commons import init_weights, rand_slice_segments +from TTS.vc.layers.freevc.mel_processing import mel_spectrogram_torch +from TTS.vc.layers.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx +from TTS.vc.layers.freevc.wavlm import get_wavlm from TTS.vc.models.base_vc import BaseVC -from TTS.vc.modules.freevc.commons import init_weights -from TTS.vc.modules.freevc.mel_processing import mel_spectrogram_torch -from TTS.vc.modules.freevc.speaker_encoder.speaker_encoder import SpeakerEncoder as SpeakerEncoderEx -from TTS.vc.modules.freevc.wavlm import get_wavlm from TTS.vocoder.models.hifigan_discriminator import DiscriminatorP logger = logging.getLogger(__name__) @@ -385,7 +384,7 @@ def forward( z_p = self.flow(z, spec_mask, g=g) # Randomly slice z and compute o using dec - z_slice, ids_slice = commons.rand_slice_segments(z, spec_lengths, self.segment_size) + z_slice, ids_slice = rand_slice_segments(z, spec_lengths, self.segment_size) o = self.dec(z_slice, g=g) return o, ids_slice, spec_mask, (z, z_p, m_p, logs_p, m_q, logs_q) From 3539e65d8e9d31d44c57b2c4a84ae1f372ade611 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 2 Dec 2024 22:50:33 +0100 Subject: [PATCH 227/255] refactor(synthesizer): set sample rate in loading methods --- TTS/utils/synthesizer.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 73f596d167..a9b9feffc1 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -95,26 +95,20 @@ def __init__( if tts_checkpoint: self._load_tts(tts_checkpoint, tts_config_path, use_cuda) - self.output_sample_rate = self.tts_config.audio["sample_rate"] if vocoder_checkpoint: self._load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda) - self.output_sample_rate = self.vocoder_config.audio["sample_rate"] if vc_checkpoint and model_dir is None: self._load_vc(vc_checkpoint, vc_config, use_cuda) - self.output_sample_rate = self.vc_config.audio["output_sample_rate"] if model_dir: if "fairseq" in model_dir: self._load_fairseq_from_dir(model_dir, use_cuda) - self.output_sample_rate = self.tts_config.audio["sample_rate"] elif "openvoice" in model_dir: self._load_openvoice_from_dir(Path(model_dir), use_cuda) - self.output_sample_rate = self.vc_config.audio["output_sample_rate"] else: self._load_tts_from_dir(model_dir, use_cuda) - self.output_sample_rate = self.tts_config.audio["output_sample_rate"] @staticmethod def _get_segmenter(lang: str): @@ -143,6 +137,7 @@ def _load_vc(self, vc_checkpoint: str, vc_config_path: str, use_cuda: bool) -> N """ # pylint: disable=global-statement self.vc_config = load_config(vc_config_path) + self.output_sample_rate = self.vc_config.audio["output_sample_rate"] self.vc_model = setup_vc_model(config=self.vc_config) self.vc_model.load_checkpoint(self.vc_config, vc_checkpoint) if use_cuda: @@ -157,6 +152,7 @@ def _load_fairseq_from_dir(self, model_dir: str, use_cuda: bool) -> None: self.tts_model = Vits.init_from_config(self.tts_config) self.tts_model.load_fairseq_checkpoint(self.tts_config, checkpoint_dir=model_dir, eval=True) self.tts_config = self.tts_model.config + self.output_sample_rate = self.tts_config.audio["sample_rate"] if use_cuda: self.tts_model.cuda() @@ -170,6 +166,7 @@ def _load_openvoice_from_dir(self, checkpoint: Path, use_cuda: bool) -> None: self.vc_model = OpenVoice.init_from_config(self.vc_config) self.vc_model.load_checkpoint(self.vc_config, checkpoint, eval=True) self.vc_config = self.vc_model.config + self.output_sample_rate = self.vc_config.audio["output_sample_rate"] if use_cuda: self.vc_model.cuda() @@ -180,6 +177,7 @@ def _load_tts_from_dir(self, model_dir: str, use_cuda: bool) -> None: """ config = load_config(os.path.join(model_dir, "config.json")) self.tts_config = config + self.output_sample_rate = self.tts_config.audio["output_sample_rate"] self.tts_model = setup_tts_model(config) self.tts_model.load_checkpoint(config, checkpoint_dir=model_dir, eval=True) if use_cuda: @@ -201,6 +199,7 @@ def _load_tts(self, tts_checkpoint: str, tts_config_path: str, use_cuda: bool) - """ # pylint: disable=global-statement self.tts_config = load_config(tts_config_path) + self.output_sample_rate = self.tts_config.audio["sample_rate"] if self.tts_config["use_phonemes"] and self.tts_config["phonemizer"] is None: raise ValueError("Phonemizer is not defined in the TTS config.") @@ -238,6 +237,7 @@ def _load_vocoder(self, model_file: str, model_config: str, use_cuda: bool) -> N use_cuda (bool): enable/disable CUDA use. """ self.vocoder_config = load_config(model_config) + self.output_sample_rate = self.vocoder_config.audio["sample_rate"] self.vocoder_ap = AudioProcessor(**self.vocoder_config.audio) self.vocoder_model = setup_vocoder_model(self.vocoder_config) self.vocoder_model.load_checkpoint(self.vocoder_config, model_file, eval=True) From 834d41bf77cf6d66a494d0ac1abbb060f4784641 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 21 Oct 2024 00:04:44 +0200 Subject: [PATCH 228/255] build: switch to forked coqpit --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d66f33d602..5f1a43d5f8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,8 +75,8 @@ dependencies = [ # Training "matplotlib>=3.7.0", # Coqui stack - "coqui-tts-trainer>=0.1.4,<0.2.0", - "coqpit>=0.0.16", + "coqui-tts-trainer>=0.2.0,<0.3.0", + "coqpit-config>=0.1.1,<0.2.0", "monotonic-alignment-search>=0.1.0", # Gruut + supported languages "gruut[de,es,fr]>=2.4.0", From d4ffff4f6d89848619417cc77bcae36f2dd780e6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 3 Dec 2024 07:57:27 +0100 Subject: [PATCH 229/255] chore: bump version to 0.25.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5f1a43d5f8..5386d274ac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ build-backend = "hatchling.build" [project] name = "coqui-tts" -version = "0.24.3" +version = "0.25.0" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" From 8241d55e70f29b510a104dea46da59bc0fee7c97 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 4 Dec 2024 10:53:29 +0100 Subject: [PATCH 230/255] fix(pypi-release): fix publishing workflow (#191) --- .github/workflows/pypi-release.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pypi-release.yml b/.github/workflows/pypi-release.yml index 1b7f44654c..ef74c60da6 100644 --- a/.github/workflows/pypi-release.yml +++ b/.github/workflows/pypi-release.yml @@ -46,8 +46,8 @@ jobs: steps: - uses: actions/download-artifact@v4 with: - path: dist - pattern: build + path: "dist/" + name: build - run: | ls -lh dist/ - name: Publish package distributions to PyPI From fe14ca6b68f8757f581ec04d2d0becddd7031d05 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 5 Dec 2024 15:38:50 +0100 Subject: [PATCH 231/255] refactor(xtts): remove duplicate xtts audio config --- TTS/demos/xtts_ft_demo/utils/gpt_train.py | 3 ++- TTS/tts/layers/xtts/trainer/gpt_trainer.py | 7 +------ TTS/tts/models/xtts.py | 5 +++-- recipes/ljspeech/xtts_v1/train_gpt_xtts.py | 3 ++- recipes/ljspeech/xtts_v2/train_gpt_xtts.py | 3 ++- tests/xtts_tests/test_xtts_gpt_train.py | 3 ++- tests/xtts_tests/test_xtts_v2-0_gpt_train.py | 3 ++- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/TTS/demos/xtts_ft_demo/utils/gpt_train.py b/TTS/demos/xtts_ft_demo/utils/gpt_train.py index f838297af3..411a9b0dbe 100644 --- a/TTS/demos/xtts_ft_demo/utils/gpt_train.py +++ b/TTS/demos/xtts_ft_demo/utils/gpt_train.py @@ -5,7 +5,8 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig +from TTS.tts.models.xtts import XttsAudioConfig from TTS.utils.manage import ModelManager diff --git a/TTS/tts/layers/xtts/trainer/gpt_trainer.py b/TTS/tts/layers/xtts/trainer/gpt_trainer.py index 0253d65ddd..107054189c 100644 --- a/TTS/tts/layers/xtts/trainer/gpt_trainer.py +++ b/TTS/tts/layers/xtts/trainer/gpt_trainer.py @@ -18,7 +18,7 @@ from TTS.tts.layers.xtts.tokenizer import VoiceBpeTokenizer from TTS.tts.layers.xtts.trainer.dataset import XTTSDataset from TTS.tts.models.base_tts import BaseTTS -from TTS.tts.models.xtts import Xtts, XttsArgs, XttsAudioConfig +from TTS.tts.models.xtts import Xtts, XttsArgs from TTS.utils.generic_utils import is_pytorch_at_least_2_4 logger = logging.getLogger(__name__) @@ -34,11 +34,6 @@ class GPTTrainerConfig(XttsConfig): test_sentences: List[dict] = field(default_factory=lambda: []) -@dataclass -class XttsAudioConfig(XttsAudioConfig): - dvae_sample_rate: int = 22050 - - @dataclass class GPTArgs(XttsArgs): min_conditioning_length: int = 66150 diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index d780e2b323..f05863ae1d 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -11,7 +11,6 @@ from coqpit import Coqpit from trainer.io import load_fsspec -from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.layers.xtts.gpt import GPT from TTS.tts.layers.xtts.hifigan_decoder import HifiDecoder from TTS.tts.layers.xtts.stream_generator import init_stream_support @@ -103,10 +102,12 @@ class XttsAudioConfig(Coqpit): Args: sample_rate (int): The sample rate in which the GPT operates. output_sample_rate (int): The sample rate of the output audio waveform. + dvae_sample_rate (int): The sample rate of the DVAE """ sample_rate: int = 22050 output_sample_rate: int = 24000 + dvae_sample_rate: int = 22050 @dataclass @@ -721,7 +722,7 @@ def get_compatible_checkpoint_state_dict(self, model_path): def load_checkpoint( self, - config: XttsConfig, + config: "XttsConfig", checkpoint_dir: Optional[str] = None, checkpoint_path: Optional[str] = None, vocab_path: Optional[str] = None, diff --git a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py index d31ec8f1ed..a077a18064 100644 --- a/recipes/ljspeech/xtts_v1/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -4,7 +4,8 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig +from TTS.tts.models.xtts import XttsAudioConfig from TTS.utils.manage import ModelManager # Logging parameters diff --git a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py index ccaa97f1e4..362f45008e 100644 --- a/recipes/ljspeech/xtts_v2/train_gpt_xtts.py +++ b/recipes/ljspeech/xtts_v2/train_gpt_xtts.py @@ -4,7 +4,8 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig +from TTS.tts.models.xtts import XttsAudioConfig from TTS.utils.manage import ModelManager # Logging parameters diff --git a/tests/xtts_tests/test_xtts_gpt_train.py b/tests/xtts_tests/test_xtts_gpt_train.py index b8b9a4e388..bb592f1f2d 100644 --- a/tests/xtts_tests/test_xtts_gpt_train.py +++ b/tests/xtts_tests/test_xtts_gpt_train.py @@ -8,7 +8,8 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples from TTS.tts.layers.xtts.dvae import DiscreteVAE -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig +from TTS.tts.models.xtts import XttsAudioConfig config_dataset = BaseDatasetConfig( formatter="ljspeech", diff --git a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py index 6663433c12..454e867385 100644 --- a/tests/xtts_tests/test_xtts_v2-0_gpt_train.py +++ b/tests/xtts_tests/test_xtts_v2-0_gpt_train.py @@ -8,7 +8,8 @@ from TTS.config.shared_configs import BaseDatasetConfig from TTS.tts.datasets import load_tts_samples from TTS.tts.layers.xtts.dvae import DiscreteVAE -from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig, XttsAudioConfig +from TTS.tts.layers.xtts.trainer.gpt_trainer import GPTArgs, GPTTrainer, GPTTrainerConfig +from TTS.tts.models.xtts import XttsAudioConfig config_dataset = BaseDatasetConfig( formatter="ljspeech", From 8c381e3e48662097e661c0d45b61ad19de56cd30 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 3 Dec 2024 22:06:39 +0100 Subject: [PATCH 232/255] docs: use .to("cuda") instead of deprecated gpu=True --- TTS/api.py | 8 ++++---- docs/source/models/bark.md | 4 ++-- docs/source/models/xtts.md | 10 +++++----- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index ed82825007..86787e0364 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -35,21 +35,21 @@ def __init__( >>> tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav") Example with a single-speaker model: - >>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False) + >>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False) >>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav") Example loading a model from a path: - >>> tts = TTS(model_path="/path/to/checkpoint_100000.pth", config_path="/path/to/config.json", progress_bar=False, gpu=False) + >>> tts = TTS(model_path="/path/to/checkpoint_100000.pth", config_path="/path/to/config.json", progress_bar=False) >>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav") Example voice cloning with YourTTS in English, French and Portuguese: - >>> tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True) + >>> tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda") >>> tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="thisisit.wav") >>> tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="thisisit.wav") >>> tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="thisisit.wav") Example Fairseq TTS models (uses ISO language codes in https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html): - >>> tts = TTS(model_name="tts_models/eng/fairseq/vits", progress_bar=False, gpu=True) + >>> tts = TTS(model_name="tts_models/eng/fairseq/vits", progress_bar=False).to("cuda") >>> tts.tts_to_file("This is a test.", file_path="output.wav") Args: diff --git a/docs/source/models/bark.md b/docs/source/models/bark.md index a180afbb91..77f99c0d3a 100644 --- a/docs/source/models/bark.md +++ b/docs/source/models/bark.md @@ -37,7 +37,7 @@ from TTS.api import TTS # Load the model to GPU # Bark is really slow on CPU, so we recommend using GPU. -tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/bark").to("cuda") # Cloning a new speaker @@ -57,7 +57,7 @@ tts.tts_to_file(text="Hello, my name is Manmay , how are you?", # random speaker -tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/bark").to("cuda") tts.tts_to_file("hello world", file_path="out.wav") ``` diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index c07d879f7c..7c0f1c4a60 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -118,7 +118,7 @@ You can optionally disable sentence splitting for better coherence but more VRAM ```python from TTS.api import TTS -tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda") # generate speech by cloning a voice using default settings tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", @@ -137,15 +137,15 @@ You can pass multiple audio files to the `speaker_wav` argument for better voice from TTS.api import TTS # using the default version set in 🐸TTS -tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda") # using a specific version # 👀 see the branch names for versions on https://huggingface.co/coqui/XTTS-v2/tree/main # ❗some versions might be incompatible with the API -tts = TTS("xtts_v2.0.2", gpu=True) +tts = TTS("xtts_v2.0.2").to("cuda") # getting the latest XTTS_v2 -tts = TTS("xtts", gpu=True) +tts = TTS("xtts").to("cuda") # generate speech by cloning a voice using default settings tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", @@ -160,7 +160,7 @@ You can do inference using one of the available speakers using the following cod ```python from TTS.api import TTS -tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True) +tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda") # generate speech by cloning a voice using default settings tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", From 5cfb4ecccdec909fd4e92b8e8c833dd33870d38e Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 3 Dec 2024 22:09:03 +0100 Subject: [PATCH 233/255] refactor(api): require keyword arguments except for model_name --- TTS/api.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 86787e0364..62dab32922 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -2,6 +2,7 @@ import tempfile import warnings from pathlib import Path +from typing import Optional from torch import nn @@ -19,12 +20,13 @@ class TTS(nn.Module): def __init__( self, model_name: str = "", - model_path: str = None, - config_path: str = None, - vocoder_path: str = None, - vocoder_config_path: str = None, + *, + model_path: Optional[str] = None, + config_path: Optional[str] = None, + vocoder_path: Optional[str] = None, + vocoder_config_path: Optional[str] = None, progress_bar: bool = True, - gpu=False, + gpu: bool = False, ): """🐸TTS python interface that allows to load and use the released models. From 42ad9b00c684666080c406840a8ccab5316734ce Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 4 Dec 2024 10:44:49 +0100 Subject: [PATCH 234/255] feat(api): support specifying vocoders by name --- TTS/api.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 62dab32922..49b9a6b78f 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -23,6 +23,7 @@ def __init__( *, model_path: Optional[str] = None, config_path: Optional[str] = None, + vocoder_name: Optional[str] = None, vocoder_path: Optional[str] = None, vocoder_config_path: Optional[str] = None, progress_bar: bool = True, @@ -58,6 +59,7 @@ def __init__( model_name (str, optional): Model name to load. You can list models by ```tts.models```. Defaults to None. model_path (str, optional): Path to the model checkpoint. Defaults to None. config_path (str, optional): Path to the model config. Defaults to None. + vocoder_name (str, optional): Pre-trained vocoder to use. Defaults to None, i.e. using the default vocoder. vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None. vocoder_config_path (str, optional): Path to the vocoder config. Defaults to None. progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True. @@ -74,11 +76,12 @@ def __init__( if model_name is not None and len(model_name) > 0: if "tts_models" in model_name: - self.load_tts_model_by_name(model_name, gpu) + self.load_tts_model_by_name(model_name, vocoder_name, gpu=gpu) elif "voice_conversion_models" in model_name: - self.load_vc_model_by_name(model_name, gpu) + self.load_vc_model_by_name(model_name, gpu=gpu) + # To allow just TTS("xtts") else: - self.load_model_by_name(model_name, gpu) + self.load_model_by_name(model_name, vocoder_name, gpu=gpu) if model_path: self.load_tts_model_by_path( @@ -129,7 +132,9 @@ def get_models_file_path(): def list_models(): return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models() - def download_model_by_name(self, model_name: str): + def download_model_by_name( + self, model_name: str, vocoder_name: Optional[str] = None + ) -> tuple[Optional[str], Optional[str], Optional[str], Optional[str], Optional[str]]: model_path, config_path, model_item = self.manager.download_model(model_name) if "fairseq" in model_name or (model_item is not None and isinstance(model_item["model_url"], list)): # return model directory if there are multiple files @@ -137,19 +142,21 @@ def download_model_by_name(self, model_name: str): return None, None, None, None, model_path if model_item.get("default_vocoder") is None: return model_path, config_path, None, None, None - vocoder_path, vocoder_config_path, _ = self.manager.download_model(model_item["default_vocoder"]) + if vocoder_name is None: + vocoder_name = model_item["default_vocoder"] + vocoder_path, vocoder_config_path, _ = self.manager.download_model(vocoder_name) return model_path, config_path, vocoder_path, vocoder_config_path, None - def load_model_by_name(self, model_name: str, gpu: bool = False): + def load_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False): """Load one of the 🐸TTS models by name. Args: model_name (str): Model name to load. You can list models by ```tts.models```. gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False. """ - self.load_tts_model_by_name(model_name, gpu) + self.load_tts_model_by_name(model_name, vocoder_name, gpu=gpu) - def load_vc_model_by_name(self, model_name: str, gpu: bool = False): + def load_vc_model_by_name(self, model_name: str, *, gpu: bool = False): """Load one of the voice conversion models by name. Args: @@ -162,7 +169,7 @@ def load_vc_model_by_name(self, model_name: str, gpu: bool = False): vc_checkpoint=model_path, vc_config=config_path, model_dir=model_dir, use_cuda=gpu ) - def load_tts_model_by_name(self, model_name: str, gpu: bool = False): + def load_tts_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False): """Load one of 🐸TTS models by name. Args: @@ -174,7 +181,9 @@ def load_tts_model_by_name(self, model_name: str, gpu: bool = False): self.synthesizer = None self.model_name = model_name - model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name(model_name) + model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name( + model_name, vocoder_name + ) # init synthesizer # None values are fetch from the model From 5daed879e05178836f6836e7923a6034a1c061e4 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 4 Dec 2024 14:47:46 +0100 Subject: [PATCH 235/255] chore(bin.synthesize): remove unused argument --- TTS/bin/synthesize.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 454f528ab4..59ceb1db4f 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -253,11 +253,6 @@ def parse_args() -> argparse.Namespace: action="store_true", ) # aux args - parser.add_argument( - "--save_spectogram", - action="store_true", - help="Save raw spectogram for further (vocoder) processing in out_path.", - ) parser.add_argument( "--reference_wav", type=str, From 1a4e58d0ce1caa15b1d476be4da6e379ef46f084 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 4 Dec 2024 15:39:15 +0100 Subject: [PATCH 236/255] feat(api): support passing a custom speaker encoder by path --- TTS/api.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 49b9a6b78f..7ca79405cd 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -26,6 +26,8 @@ def __init__( vocoder_name: Optional[str] = None, vocoder_path: Optional[str] = None, vocoder_config_path: Optional[str] = None, + encoder_path: Optional[str] = None, + encoder_config_path: Optional[str] = None, progress_bar: bool = True, gpu: bool = False, ): @@ -62,6 +64,8 @@ def __init__( vocoder_name (str, optional): Pre-trained vocoder to use. Defaults to None, i.e. using the default vocoder. vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None. vocoder_config_path (str, optional): Path to the vocoder config. Defaults to None. + encoder_path: Path to speaker encoder checkpoint. Default to None. + encoder_config_path: Path to speaker encoder config file. Defaults to None. progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True. gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False. """ @@ -71,6 +75,8 @@ def __init__( self.synthesizer = None self.voice_converter = None self.model_name = "" + self.encoder_path = encoder_path + self.encoder_config_path = encoder_config_path if gpu: warnings.warn("`gpu` will be deprecated. Please use `tts.to(device)` instead.") @@ -194,8 +200,8 @@ def load_tts_model_by_name(self, model_name: str, vocoder_name: Optional[str] = tts_languages_file=None, vocoder_checkpoint=vocoder_path, vocoder_config=vocoder_config_path, - encoder_checkpoint=None, - encoder_config=None, + encoder_checkpoint=self.encoder_path, + encoder_config=self.encoder_config_path, model_dir=model_dir, use_cuda=gpu, ) @@ -220,8 +226,8 @@ def load_tts_model_by_path( tts_languages_file=None, vocoder_checkpoint=vocoder_path, vocoder_config=vocoder_config, - encoder_checkpoint=None, - encoder_config=None, + encoder_checkpoint=self.encoder_path, + encoder_config=self.encoder_config_path, use_cuda=gpu, ) From 85dbb3b8b3c9bfa9fb863996e2acf55a2d45c568 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 4 Dec 2024 16:02:07 +0100 Subject: [PATCH 237/255] feat(api): allow mixing TTS and vocoder model name and path --- TTS/api.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 7ca79405cd..86593f3fb6 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -75,8 +75,12 @@ def __init__( self.synthesizer = None self.voice_converter = None self.model_name = "" + + self.vocoder_path = vocoder_path + self.vocoder_config_path = vocoder_config_path self.encoder_path = encoder_path self.encoder_config_path = encoder_config_path + if gpu: warnings.warn("`gpu` will be deprecated. Please use `tts.to(device)` instead.") @@ -90,9 +94,7 @@ def __init__( self.load_model_by_name(model_name, vocoder_name, gpu=gpu) if model_path: - self.load_tts_model_by_path( - model_path, config_path, vocoder_path=vocoder_path, vocoder_config=vocoder_config_path, gpu=gpu - ) + self.load_tts_model_by_path(model_path, config_path, gpu=gpu) @property def models(self): @@ -140,18 +142,22 @@ def list_models(): def download_model_by_name( self, model_name: str, vocoder_name: Optional[str] = None - ) -> tuple[Optional[str], Optional[str], Optional[str], Optional[str], Optional[str]]: + ) -> tuple[Optional[str], Optional[str], Optional[str]]: model_path, config_path, model_item = self.manager.download_model(model_name) if "fairseq" in model_name or (model_item is not None and isinstance(model_item["model_url"], list)): # return model directory if there are multiple files # we assume that the model knows how to load itself - return None, None, None, None, model_path + return None, None, model_path if model_item.get("default_vocoder") is None: - return model_path, config_path, None, None, None + return model_path, config_path, None if vocoder_name is None: vocoder_name = model_item["default_vocoder"] vocoder_path, vocoder_config_path, _ = self.manager.download_model(vocoder_name) - return model_path, config_path, vocoder_path, vocoder_config_path, None + # A local vocoder model will take precedence if specified via vocoder_path + if self.vocoder_path is None or self.vocoder_config_path is None: + self.vocoder_path = vocoder_path + self.vocoder_config_path = vocoder_config_path + return model_path, config_path, None def load_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False): """Load one of the 🐸TTS models by name. @@ -170,7 +176,7 @@ def load_vc_model_by_name(self, model_name: str, *, gpu: bool = False): gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False. """ self.model_name = model_name - model_path, config_path, _, _, model_dir = self.download_model_by_name(model_name) + model_path, config_path, model_dir = self.download_model_by_name(model_name) self.voice_converter = Synthesizer( vc_checkpoint=model_path, vc_config=config_path, model_dir=model_dir, use_cuda=gpu ) @@ -187,9 +193,7 @@ def load_tts_model_by_name(self, model_name: str, vocoder_name: Optional[str] = self.synthesizer = None self.model_name = model_name - model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name( - model_name, vocoder_name - ) + model_path, config_path, model_dir = self.download_model_by_name(model_name, vocoder_name) # init synthesizer # None values are fetch from the model @@ -198,17 +202,15 @@ def load_tts_model_by_name(self, model_name: str, vocoder_name: Optional[str] = tts_config_path=config_path, tts_speakers_file=None, tts_languages_file=None, - vocoder_checkpoint=vocoder_path, - vocoder_config=vocoder_config_path, + vocoder_checkpoint=self.vocoder_path, + vocoder_config=self.vocoder_config_path, encoder_checkpoint=self.encoder_path, encoder_config=self.encoder_config_path, model_dir=model_dir, use_cuda=gpu, ) - def load_tts_model_by_path( - self, model_path: str, config_path: str, vocoder_path: str = None, vocoder_config: str = None, gpu: bool = False - ): + def load_tts_model_by_path(self, model_path: str, config_path: str, *, gpu: bool = False) -> None: """Load a model from a path. Args: @@ -224,8 +226,8 @@ def load_tts_model_by_path( tts_config_path=config_path, tts_speakers_file=None, tts_languages_file=None, - vocoder_checkpoint=vocoder_path, - vocoder_config=vocoder_config, + vocoder_checkpoint=self.vocoder_path, + vocoder_config=self.vocoder_config_path, encoder_checkpoint=self.encoder_path, encoder_config=self.encoder_config_path, use_cuda=gpu, From a05177ce713a73112b40cb283ad9d3328a08f7cc Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 4 Dec 2024 16:11:43 +0100 Subject: [PATCH 238/255] chore(api): add type hints --- TTS/api.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 86593f3fb6..d16012f849 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -1,3 +1,5 @@ +"""Coqui TTS Python API.""" + import logging import tempfile import warnings @@ -30,7 +32,7 @@ def __init__( encoder_config_path: Optional[str] = None, progress_bar: bool = True, gpu: bool = False, - ): + ) -> None: """🐸TTS python interface that allows to load and use the released models. Example with a multi-speaker model: @@ -97,17 +99,17 @@ def __init__( self.load_tts_model_by_path(model_path, config_path, gpu=gpu) @property - def models(self): + def models(self) -> list[str]: return self.manager.list_tts_models() @property - def is_multi_speaker(self): + def is_multi_speaker(self) -> bool: if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager: return self.synthesizer.tts_model.speaker_manager.num_speakers > 1 return False @property - def is_multi_lingual(self): + def is_multi_lingual(self) -> bool: # Not sure what sets this to None, but applied a fix to prevent crashing. if ( isinstance(self.model_name, str) @@ -121,23 +123,23 @@ def is_multi_lingual(self): return False @property - def speakers(self): + def speakers(self) -> list[str]: if not self.is_multi_speaker: return None return self.synthesizer.tts_model.speaker_manager.speaker_names @property - def languages(self): + def languages(self) -> list[str]: if not self.is_multi_lingual: return None return self.synthesizer.tts_model.language_manager.language_names @staticmethod - def get_models_file_path(): + def get_models_file_path() -> Path: return Path(__file__).parent / ".models.json" @staticmethod - def list_models(): + def list_models() -> list[str]: return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False).list_models() def download_model_by_name( @@ -159,7 +161,7 @@ def download_model_by_name( self.vocoder_config_path = vocoder_config_path return model_path, config_path, None - def load_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False): + def load_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False) -> None: """Load one of the 🐸TTS models by name. Args: @@ -168,7 +170,7 @@ def load_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None """ self.load_tts_model_by_name(model_name, vocoder_name, gpu=gpu) - def load_vc_model_by_name(self, model_name: str, *, gpu: bool = False): + def load_vc_model_by_name(self, model_name: str, *, gpu: bool = False) -> None: """Load one of the voice conversion models by name. Args: @@ -181,7 +183,7 @@ def load_vc_model_by_name(self, model_name: str, *, gpu: bool = False): vc_checkpoint=model_path, vc_config=config_path, model_dir=model_dir, use_cuda=gpu ) - def load_tts_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False): + def load_tts_model_by_name(self, model_name: str, vocoder_name: Optional[str] = None, *, gpu: bool = False) -> None: """Load one of 🐸TTS models by name. Args: @@ -235,11 +237,11 @@ def load_tts_model_by_path(self, model_path: str, config_path: str, *, gpu: bool def _check_arguments( self, - speaker: str = None, - language: str = None, - speaker_wav: str = None, - emotion: str = None, - speed: float = None, + speaker: Optional[str] = None, + language: Optional[str] = None, + speaker_wav: Optional[str] = None, + emotion: Optional[str] = None, + speed: Optional[float] = None, **kwargs, ) -> None: """Check if the arguments are valid for the model.""" @@ -320,7 +322,7 @@ def tts_to_file( file_path: str = "output.wav", split_sentences: bool = True, **kwargs, - ): + ) -> str: """Convert text to speech. Args: @@ -451,7 +453,7 @@ def tts_with_vc_to_file( file_path: str = "output.wav", speaker: str = None, split_sentences: bool = True, - ): + ) -> str: """Convert text to speech with voice conversion and save to file. Check `tts_with_vc` for more details. @@ -479,3 +481,4 @@ def tts_with_vc_to_file( text=text, language=language, speaker_wav=speaker_wav, speaker=speaker, split_sentences=split_sentences ) save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) + return file_path From 89abd9862009f642385af96e7c0e2fdd0b50a5f6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 5 Dec 2024 21:34:04 +0100 Subject: [PATCH 239/255] feat(api): support passing speaker/language id file paths --- TTS/api.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index d16012f849..90f167dc52 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -30,6 +30,8 @@ def __init__( vocoder_config_path: Optional[str] = None, encoder_path: Optional[str] = None, encoder_config_path: Optional[str] = None, + speakers_file_path: Optional[str] = None, + language_ids_file_path: Optional[str] = None, progress_bar: bool = True, gpu: bool = False, ) -> None: @@ -68,8 +70,10 @@ def __init__( vocoder_config_path (str, optional): Path to the vocoder config. Defaults to None. encoder_path: Path to speaker encoder checkpoint. Default to None. encoder_config_path: Path to speaker encoder config file. Defaults to None. - progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True. - gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False. + speakers_file_path: JSON file for multi-speaker model. Defaults to None. + language_ids_file_path: JSON file for multilingual model. Defaults to None + progress_bar (bool, optional): Whether to print a progress bar while downloading a model. Defaults to True. + gpu (bool, optional): Enable/disable GPU. Defaults to False. DEPRECATED, use TTS(...).to("cuda") """ super().__init__() self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar) @@ -82,6 +86,8 @@ def __init__( self.vocoder_config_path = vocoder_config_path self.encoder_path = encoder_path self.encoder_config_path = encoder_config_path + self.speakers_file_path = speakers_file_path + self.language_ids_file_path = language_ids_file_path if gpu: warnings.warn("`gpu` will be deprecated. Please use `tts.to(device)` instead.") @@ -226,8 +232,8 @@ def load_tts_model_by_path(self, model_path: str, config_path: str, *, gpu: bool self.synthesizer = Synthesizer( tts_checkpoint=model_path, tts_config_path=config_path, - tts_speakers_file=None, - tts_languages_file=None, + tts_speakers_file=self.speakers_file_path, + tts_languages_file=self.language_ids_file_path, vocoder_checkpoint=self.vocoder_path, vocoder_config=self.vocoder_config_path, encoder_checkpoint=self.encoder_path, From 806af96e4c993eae45d2129773784a5624fb4bb5 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Fri, 6 Dec 2024 11:56:54 +0100 Subject: [PATCH 240/255] refactor(api): use save_wav() from Synthesizer instance --- TTS/api.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 90f167dc52..be6141d312 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -9,7 +9,6 @@ from torch import nn from TTS.config import load_config -from TTS.utils.audio.numpy_transforms import save_wav from TTS.utils.manage import ModelManager from TTS.utils.synthesizer import Synthesizer @@ -394,6 +393,7 @@ def voice_conversion_to_file( source_wav: str, target_wav: str, file_path: str = "output.wav", + pipe_out=None, ) -> str: """Voice conversion with FreeVC. Convert source wav to target speaker. @@ -404,9 +404,11 @@ def voice_conversion_to_file( Path to the target wav file. file_path (str, optional): Output file path. Defaults to "output.wav". + pipe_out (BytesIO, optional): + Flag to stdout the generated TTS wav file for shell pipe. """ wav = self.voice_conversion(source_wav=source_wav, target_wav=target_wav) - save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) + self.voice_converter.save_wav(wav=wav, path=file_path, pipe_out=pipe_out) return file_path def tts_with_vc( @@ -459,6 +461,7 @@ def tts_with_vc_to_file( file_path: str = "output.wav", speaker: str = None, split_sentences: bool = True, + pipe_out=None, ) -> str: """Convert text to speech with voice conversion and save to file. @@ -482,9 +485,11 @@ def tts_with_vc_to_file( Split text into sentences, synthesize them separately and concatenate the file audio. Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only applicable to the 🐸TTS models. Defaults to True. + pipe_out (BytesIO, optional): + Flag to stdout the generated TTS wav file for shell pipe. """ wav = self.tts_with_vc( text=text, language=language, speaker_wav=speaker_wav, speaker=speaker, split_sentences=split_sentences ) - save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate) + self.voice_converter.save_wav(wav=wav, path=file_path, pipe_out=pipe_out) return file_path From e0f621180f328eac461e9fee978073db3c7b8421 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 2 Dec 2024 22:34:19 +0100 Subject: [PATCH 241/255] refactor(bin.synthesize): use Python API for CLI --- TTS/api.py | 16 +++-- TTS/bin/synthesize.py | 125 +++++++++++---------------------- tests/zoo_tests/test_models.py | 33 ++++----- 3 files changed, 65 insertions(+), 109 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index be6141d312..83189482cb 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -109,7 +109,11 @@ def models(self) -> list[str]: @property def is_multi_speaker(self) -> bool: - if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager: + if ( + self.synthesizer is not None + and hasattr(self.synthesizer.tts_model, "speaker_manager") + and self.synthesizer.tts_model.speaker_manager + ): return self.synthesizer.tts_model.speaker_manager.num_speakers > 1 return False @@ -123,7 +127,11 @@ def is_multi_lingual(self) -> bool: and ("xtts" in self.config.model or "languages" in self.config and len(self.config.languages) > 1) ): return True - if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager: + if ( + self.synthesizer is not None + and hasattr(self.synthesizer.tts_model, "language_manager") + and self.synthesizer.tts_model.language_manager + ): return self.synthesizer.tts_model.language_manager.num_languages > 1 return False @@ -306,10 +314,6 @@ def tts( speaker_name=speaker, language_name=language, speaker_wav=speaker_wav, - reference_wav=None, - style_wav=None, - style_text=None, - reference_speaker_name=None, split_sentences=split_sentences, **kwargs, ) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 59ceb1db4f..885f6d6f0c 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -9,8 +9,6 @@ from argparse import RawTextHelpFormatter # pylint: disable=redefined-outer-name, unused-argument -from pathlib import Path - from TTS.utils.generic_utils import ConsoleFormatter, setup_logger logger = logging.getLogger(__name__) @@ -312,7 +310,8 @@ def parse_args() -> argparse.Namespace: return args -def main(): +def main() -> None: + """Entry point for `tts` command line interface.""" setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) args = parse_args() @@ -320,12 +319,11 @@ def main(): with contextlib.redirect_stdout(None if args.pipe_out else sys.stdout): # Late-import to make things load faster + from TTS.api import TTS from TTS.utils.manage import ModelManager - from TTS.utils.synthesizer import Synthesizer # load model manager - path = Path(__file__).parent / "../.models.json" - manager = ModelManager(path, progress_bar=args.progress_bar) + manager = ModelManager(models_file=TTS.get_models_file_path(), progress_bar=args.progress_bar) tts_path = None tts_config_path = None @@ -339,12 +337,12 @@ def main(): vc_config_path = None model_dir = None - # CASE1 #list : list pre-trained TTS models + # 1) List pre-trained TTS models if args.list_models: manager.list_models() sys.exit() - # CASE2 #info : model info for pre-trained TTS models + # 2) Info about pre-trained TTS models (without loading a model) if args.model_info_by_idx: model_query = args.model_info_by_idx manager.model_info_by_idx(model_query) @@ -355,91 +353,50 @@ def main(): manager.model_info_by_full_name(model_query_full_name) sys.exit() - # CASE3: load pre-trained model paths - if args.model_name is not None and not args.model_path: - model_path, config_path, model_item = manager.download_model(args.model_name) - # tts model - if model_item["model_type"] == "tts_models": - tts_path = model_path - tts_config_path = config_path - if args.vocoder_name is None and "default_vocoder" in model_item: - args.vocoder_name = model_item["default_vocoder"] - - # voice conversion model - if model_item["model_type"] == "voice_conversion_models": - vc_path = model_path - vc_config_path = config_path - - # tts model with multiple files to be loaded from the directory path - if model_item.get("author", None) == "fairseq" or isinstance(model_item["model_url"], list): - model_dir = model_path - tts_path = None - tts_config_path = None - args.vocoder_name = None - - # load vocoder - if args.vocoder_name is not None and not args.vocoder_path: - vocoder_path, vocoder_config_path, _ = manager.download_model(args.vocoder_name) - - # CASE4: set custom model paths - if args.model_path is not None: - tts_path = args.model_path - tts_config_path = args.config_path - speakers_file_path = args.speakers_file_path - language_ids_file_path = args.language_ids_file_path - - if args.vocoder_path is not None: - vocoder_path = args.vocoder_path - vocoder_config_path = args.vocoder_config_path - - if args.encoder_path is not None: - encoder_path = args.encoder_path - encoder_config_path = args.encoder_config_path - + # 3) Load a model for further info or TTS/VC device = args.device if args.use_cuda: device = "cuda" - - # load models - synthesizer = Synthesizer( - tts_checkpoint=tts_path, - tts_config_path=tts_config_path, - tts_speakers_file=speakers_file_path, - tts_languages_file=language_ids_file_path, - vocoder_checkpoint=vocoder_path, - vocoder_config=vocoder_config_path, - encoder_checkpoint=encoder_path, - encoder_config=encoder_config_path, - vc_checkpoint=vc_path, - vc_config=vc_config_path, - model_dir=model_dir, - voice_dir=args.voice_dir, + # A local model will take precedence if specified via modeL_path + model_name = args.model_name if args.model_path is None else None + api = TTS( + model_name=model_name, + model_path=args.model_path, + config_path=args.config_path, + vocoder_name=args.vocoder_name, + vocoder_path=args.vocoder_path, + vocoder_config_path=args.vocoder_config_path, + encoder_path=args.encoder_path, + encoder_config_path=args.encoder_config_path, + speakers_file_path=args.speakers_file_path, + language_ids_file_path=args.language_ids_file_path, + progress_bar=args.progress_bar, ).to(device) # query speaker ids of a multi-speaker model. if args.list_speaker_idxs: - if synthesizer.tts_model.speaker_manager is None: + if not api.is_multi_speaker: logger.info("Model only has a single speaker.") return logger.info( "Available speaker ids: (Set --speaker_idx flag to one of these values to use the multi-speaker model." ) - logger.info(list(synthesizer.tts_model.speaker_manager.name_to_id.keys())) + logger.info(api.speakers) return # query langauge ids of a multi-lingual model. if args.list_language_idxs: - if synthesizer.tts_model.language_manager is None: + if not api.is_multi_lingual: logger.info("Monolingual model.") return logger.info( "Available language ids: (Set --language_idx flag to one of these values to use the multi-lingual model." ) - logger.info(synthesizer.tts_model.language_manager.name_to_id) + logger.info(api.languages) return # check the arguments against a multi-speaker model. - if synthesizer.tts_speakers_file and (not args.speaker_idx and not args.speaker_wav): + if api.is_multi_speaker and (not args.speaker_idx and not args.speaker_wav): logger.error( "Looks like you use a multi-speaker model. Define `--speaker_idx` to " "select the target speaker. You can list the available speakers for this model by `--list_speaker_idxs`." @@ -450,31 +407,29 @@ def main(): if args.text: logger.info("Text: %s", args.text) - # kick it - if tts_path is not None: - wav = synthesizer.tts( - args.text, - speaker_name=args.speaker_idx, - language_name=args.language_idx, + if args.text is not None: + api.tts_to_file( + text=args.text, + speaker=args.speaker_idx, + language=args.language_idx, speaker_wav=args.speaker_wav, + pipe_out=pipe_out, + file_path=args.out_path, reference_wav=args.reference_wav, style_wav=args.capacitron_style_wav, style_text=args.capacitron_style_text, reference_speaker_name=args.reference_speaker_idx, + voice_dir=args.voice_dir, ) - elif vc_path is not None: - wav = synthesizer.voice_conversion( + logger.info("Saved TTS output to %s", args.out_path) + elif args.source_wav is not None and args.target_wav is not None: + api.voice_conversion_to_file( source_wav=args.source_wav, target_wav=args.target_wav, + file_path=args.out_path, + pipe_out=pipe_out, ) - elif model_dir is not None: - wav = synthesizer.tts( - args.text, speaker_name=args.speaker_idx, language_name=args.language_idx, speaker_wav=args.speaker_wav - ) - - # save the results - synthesizer.save_wav(wav, args.out_path, pipe_out=pipe_out) - logger.info("Saved output to %s", args.out_path) + logger.info("Saved VC output to %s", args.out_path) if __name__ == "__main__": diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index b944423988..f38880b51f 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -34,30 +34,27 @@ def run_models(offset=0, step=1): # download and run the model speaker_files = glob.glob(local_download_dir + "/speaker*") language_files = glob.glob(local_download_dir + "/language*") - language_id = "" + speaker_arg = "" + language_arg = "" if len(speaker_files) > 0: # multi-speaker model if "speaker_ids" in speaker_files[0]: speaker_manager = SpeakerManager(speaker_id_file_path=speaker_files[0]) elif "speakers" in speaker_files[0]: speaker_manager = SpeakerManager(d_vectors_file_path=speaker_files[0]) - - # multi-lingual model - Assuming multi-lingual models are also multi-speaker - if len(language_files) > 0 and "language_ids" in language_files[0]: - language_manager = LanguageManager(language_ids_file_path=language_files[0]) - language_id = language_manager.language_names[0] - - speaker_id = list(speaker_manager.name_to_id.keys())[0] - run_cli( - f"tts --model_name {model_name} " - f'--text "This is an example." --out_path "{output_path}" --speaker_idx "{speaker_id}" --language_idx "{language_id}" --no-progress_bar' - ) - else: - # single-speaker model - run_cli( - f"tts --model_name {model_name} " - f'--text "This is an example." --out_path "{output_path}" --no-progress_bar' - ) + speakers = list(speaker_manager.name_to_id.keys()) + if len(speakers) > 1: + speaker_arg = f'--speaker_idx "{speakers[0]}"' + if len(language_files) > 0 and "language_ids" in language_files[0]: + # multi-lingual model + language_manager = LanguageManager(language_ids_file_path=language_files[0]) + languages = language_manager.language_names + if len(languages) > 1: + language_arg = f'--language_idx "{languages[0]}"' + run_cli( + f'tts --model_name {model_name} --text "This is an example." ' + f'--out_path "{output_path}" {speaker_arg} {language_arg} --no-progress_bar' + ) # remove downloaded models shutil.rmtree(local_download_dir) shutil.rmtree(get_user_data_dir("tts")) From c0d9ed3d18b708956d9d9d7c43b0c591d66db996 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 9 Dec 2024 16:13:13 +0100 Subject: [PATCH 242/255] fix: handle difference in xtts/tortoise attention (#199) --- TTS/tts/layers/tortoise/arch_utils.py | 17 ++-- TTS/tts/layers/tortoise/autoregressive.py | 4 +- TTS/tts/layers/tortoise/classifier.py | 2 +- TTS/tts/layers/tortoise/diffusion_decoder.py | 21 +++-- TTS/tts/layers/xtts/latent_encoder.py | 95 -------------------- 5 files changed, 28 insertions(+), 111 deletions(-) delete mode 100644 TTS/tts/layers/xtts/latent_encoder.py diff --git a/TTS/tts/layers/tortoise/arch_utils.py b/TTS/tts/layers/tortoise/arch_utils.py index 4c3733e691..1bbf676393 100644 --- a/TTS/tts/layers/tortoise/arch_utils.py +++ b/TTS/tts/layers/tortoise/arch_utils.py @@ -70,11 +70,10 @@ def forward(self, qkv, mask=None, rel_pos=None): weight = rel_pos(weight.reshape(bs, self.n_heads, weight.shape[-2], weight.shape[-1])).reshape( bs * self.n_heads, weight.shape[-2], weight.shape[-1] ) - weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) if mask is not None: - # The proper way to do this is to mask before the softmax using -inf, but that doesn't work properly on CPUs. - mask = mask.repeat(self.n_heads, 1).unsqueeze(1) - weight = weight * mask + mask = mask.repeat(self.n_heads, 1, 1) + weight[mask.logical_not()] = -torch.inf + weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) a = torch.einsum("bts,bcs->bct", weight, v) return a.reshape(bs, -1, length) @@ -93,7 +92,9 @@ def __init__( channels, num_heads=1, num_head_channels=-1, + *, relative_pos_embeddings=False, + tortoise_norm=False, ): super().__init__() self.channels = channels @@ -108,6 +109,7 @@ def __init__( self.qkv = nn.Conv1d(channels, channels * 3, 1) # split heads before split qkv self.attention = QKVAttentionLegacy(self.num_heads) + self.tortoise_norm = tortoise_norm self.proj_out = zero_module(nn.Conv1d(channels, channels, 1)) if relative_pos_embeddings: @@ -124,10 +126,13 @@ def __init__( def forward(self, x, mask=None): b, c, *spatial = x.shape x = x.reshape(b, c, -1) - qkv = self.qkv(self.norm(x)) + x_norm = self.norm(x) + qkv = self.qkv(x_norm) h = self.attention(qkv, mask, self.relative_pos_embeddings) h = self.proj_out(h) - return (x + h).reshape(b, c, *spatial) + if self.tortoise_norm: + return (x + h).reshape(b, c, *spatial) + return (x_norm + h).reshape(b, c, *spatial) class Upsample(nn.Module): diff --git a/TTS/tts/layers/tortoise/autoregressive.py b/TTS/tts/layers/tortoise/autoregressive.py index 07cf3d542b..00c884e973 100644 --- a/TTS/tts/layers/tortoise/autoregressive.py +++ b/TTS/tts/layers/tortoise/autoregressive.py @@ -176,12 +176,14 @@ def __init__( embedding_dim, attn_blocks=6, num_attn_heads=4, + *, + tortoise_norm=False, ): super().__init__() attn = [] self.init = nn.Conv1d(spec_dim, embedding_dim, kernel_size=1) for a in range(attn_blocks): - attn.append(AttentionBlock(embedding_dim, num_attn_heads)) + attn.append(AttentionBlock(embedding_dim, num_attn_heads, tortoise_norm=tortoise_norm)) self.attn = nn.Sequential(*attn) self.dim = embedding_dim diff --git a/TTS/tts/layers/tortoise/classifier.py b/TTS/tts/layers/tortoise/classifier.py index c72834e9a8..337323db67 100644 --- a/TTS/tts/layers/tortoise/classifier.py +++ b/TTS/tts/layers/tortoise/classifier.py @@ -97,7 +97,7 @@ def __init__( self.final = nn.Sequential(normalization(ch), nn.SiLU(), nn.Conv1d(ch, embedding_dim, 1)) attn = [] for a in range(attn_blocks): - attn.append(AttentionBlock(embedding_dim, num_attn_heads)) + attn.append(AttentionBlock(embedding_dim, num_attn_heads, tortoise_norm=True)) self.attn = nn.Sequential(*attn) self.dim = embedding_dim diff --git a/TTS/tts/layers/tortoise/diffusion_decoder.py b/TTS/tts/layers/tortoise/diffusion_decoder.py index 15bbfb7121..cfdeaff8bb 100644 --- a/TTS/tts/layers/tortoise/diffusion_decoder.py +++ b/TTS/tts/layers/tortoise/diffusion_decoder.py @@ -130,7 +130,7 @@ def __init__(self, model_channels, dropout, num_heads): dims=1, use_scale_shift_norm=True, ) - self.attn = AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True) + self.attn = AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True) def forward(self, x, time_emb): y = self.resblk(x, time_emb) @@ -177,17 +177,17 @@ def __init__( # transformer network. self.code_embedding = nn.Embedding(in_tokens, model_channels) self.code_converter = nn.Sequential( - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), ) self.code_norm = normalization(model_channels) self.latent_conditioner = nn.Sequential( nn.Conv1d(in_latent_channels, model_channels, 3, padding=1), - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), - AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), + AttentionBlock(model_channels, num_heads, relative_pos_embeddings=True, tortoise_norm=True), ) self.contextual_embedder = nn.Sequential( nn.Conv1d(in_channels, model_channels, 3, padding=1, stride=2), @@ -196,26 +196,31 @@ def __init__( model_channels * 2, num_heads, relative_pos_embeddings=True, + tortoise_norm=True, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, + tortoise_norm=True, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, + tortoise_norm=True, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, + tortoise_norm=True, ), AttentionBlock( model_channels * 2, num_heads, relative_pos_embeddings=True, + tortoise_norm=True, ), ) self.unconditioned_embedding = nn.Parameter(torch.randn(1, model_channels, 1)) diff --git a/TTS/tts/layers/xtts/latent_encoder.py b/TTS/tts/layers/xtts/latent_encoder.py deleted file mode 100644 index 6becffb8b7..0000000000 --- a/TTS/tts/layers/xtts/latent_encoder.py +++ /dev/null @@ -1,95 +0,0 @@ -# ported from: Originally ported from: https://github.com/neonbjb/tortoise-tts - -import math - -import torch -from torch import nn -from torch.nn import functional as F - -from TTS.tts.layers.tortoise.arch_utils import normalization, zero_module - - -def conv_nd(dims, *args, **kwargs): - if dims == 1: - return nn.Conv1d(*args, **kwargs) - elif dims == 2: - return nn.Conv2d(*args, **kwargs) - elif dims == 3: - return nn.Conv3d(*args, **kwargs) - raise ValueError(f"unsupported dimensions: {dims}") - - -class QKVAttention(nn.Module): - def __init__(self, n_heads): - super().__init__() - self.n_heads = n_heads - - def forward(self, qkv, mask=None, qk_bias=0): - """ - Apply QKV attention. - - :param qkv: an [N x (H * 3 * C) x T] tensor of Qs, Ks, and Vs. - :return: an [N x (H * C) x T] tensor after attention. - """ - bs, width, length = qkv.shape - assert width % (3 * self.n_heads) == 0 - ch = width // (3 * self.n_heads) - q, k, v = qkv.reshape(bs * self.n_heads, ch * 3, length).split(ch, dim=1) - scale = 1 / math.sqrt(math.sqrt(ch)) - weight = torch.einsum("bct,bcs->bts", q * scale, k * scale) # More stable with f16 than dividing afterwards - weight = weight + qk_bias - if mask is not None: - mask = mask.repeat(self.n_heads, 1, 1) - weight[mask.logical_not()] = -torch.inf - weight = torch.softmax(weight.float(), dim=-1).type(weight.dtype) - a = torch.einsum("bts,bcs->bct", weight, v) - - return a.reshape(bs, -1, length) - - -class AttentionBlock(nn.Module): - """An attention block that allows spatial positions to attend to each other.""" - - def __init__( - self, - channels, - num_heads=1, - num_head_channels=-1, - out_channels=None, - do_activation=False, - ): - super().__init__() - self.channels = channels - out_channels = channels if out_channels is None else out_channels - self.do_activation = do_activation - if num_head_channels == -1: - self.num_heads = num_heads - else: - assert ( - channels % num_head_channels == 0 - ), f"q,k,v channels {channels} is not divisible by num_head_channels {num_head_channels}" - self.num_heads = channels // num_head_channels - self.norm = normalization(channels) - self.qkv = conv_nd(1, channels, out_channels * 3, 1) - self.attention = QKVAttention(self.num_heads) - - self.x_proj = nn.Identity() if out_channels == channels else conv_nd(1, channels, out_channels, 1) - self.proj_out = zero_module(conv_nd(1, out_channels, out_channels, 1)) - - def forward(self, x, mask=None, qk_bias=0): - b, c, *spatial = x.shape - if mask is not None: - if len(mask.shape) == 2: - mask = mask.unsqueeze(0).repeat(x.shape[0], 1, 1) - if mask.shape[1] != x.shape[-1]: - mask = mask[:, : x.shape[-1], : x.shape[-1]] - - x = x.reshape(b, c, -1) - x = self.norm(x) - if self.do_activation: - x = F.silu(x, inplace=True) - qkv = self.qkv(x) - h = self.attention(qkv, mask=mask, qk_bias=qk_bias) - h = self.proj_out(h) - xp = self.x_proj(x) - return (xp + h).reshape(b, xp.shape[1], *spatial) From f329072df200bf1473f881dd5afe854660d1d9fe Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 9 Dec 2024 16:35:50 +0100 Subject: [PATCH 243/255] chore: bump version to 0.25.1 (#202) --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5386d274ac..bf0a1d88c2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ build-backend = "hatchling.build" [project] name = "coqui-tts" -version = "0.25.0" +version = "0.25.1" description = "Deep learning for Text to Speech." readme = "README.md" requires-python = ">=3.9, <3.13" From 236e4901d8924b06a9387c7eb12d1bd83d4a6956 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Wed, 11 Dec 2024 23:26:21 +0100 Subject: [PATCH 244/255] build(docs): update dependencies, fix makefile --- Makefile | 5 +---- docs/source/conf.py | 3 +++ pyproject.toml | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Makefile b/Makefile index 1d6867f5e8..6964773fb5 100644 --- a/Makefile +++ b/Makefile @@ -59,9 +59,6 @@ lint: ## run linters. system-deps: ## install linux system deps sudo apt-get install -y libsndfile1-dev -build-docs: ## build the docs - cd docs && make clean && make build - install: ## install 🐸 TTS uv sync --all-extras @@ -70,4 +67,4 @@ install_dev: ## install 🐸 TTS for development. uv run pre-commit install docs: ## build the docs - $(MAKE) -C docs clean && $(MAKE) -C docs html + uv run --group docs $(MAKE) -C docs clean && uv run --group docs $(MAKE) -C docs html diff --git a/docs/source/conf.py b/docs/source/conf.py index e7d36c1f43..e878d0e8f9 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -52,6 +52,7 @@ "sphinx_inline_tabs", ] +suppress_warnings = ["autosectionlabel.*"] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -67,6 +68,8 @@ "linkify", ] +myst_heading_anchors = 4 + # 'sphinxcontrib.katex', # 'sphinx.ext.autosectionlabel', diff --git a/pyproject.toml b/pyproject.toml index bf0a1d88c2..16d990c169 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -143,12 +143,12 @@ dev = [ ] # Dependencies for building the documentation docs = [ - "furo>=2023.5.20", - "myst-parser==2.0.0", - "sphinx==7.2.5", + "furo>=2024.8.6", + "myst-parser==3.0.1", + "sphinx==7.4.7", "sphinx_inline_tabs>=2023.4.21", - "sphinx_copybutton>=0.1", - "linkify-it-py>=2.0.0", + "sphinx_copybutton>=0.5.2", + "linkify-it-py>=2.0.3", ] [project.urls] From 849e75e96707a1e33a8f9d79a64ed27829bcd23c Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Dec 2024 00:37:48 +0100 Subject: [PATCH 245/255] docs: improve documentation --- CONTRIBUTING.md | 24 ++-- README.md | 116 ++++++++---------- TTS/model.py | 2 +- TTS/tts/models/bark.py | 10 +- TTS/tts/models/base_tts.py | 10 +- TTS/tts/models/overflow.py | 45 +++---- TTS/tts/models/tortoise.py | 6 +- TTS/tts/models/xtts.py | 4 +- docs/source/configuration.md | 4 +- docs/source/docker_images.md | 10 +- docs/source/faq.md | 9 +- docs/source/finetuning.md | 12 +- docs/source/formatting_your_dataset.md | 4 +- .../implementing_a_new_language_frontend.md | 4 +- docs/source/implementing_a_new_model.md | 6 +- docs/source/index.md | 105 ++++++++-------- docs/source/inference.md | 12 +- docs/source/installation.md | 8 +- docs/source/main_classes/model_api.md | 8 +- docs/source/main_classes/trainer_api.md | 2 +- docs/source/marytts.md | 2 +- docs/source/models/xtts.md | 64 +++++----- docs/source/training_a_model.md | 4 +- docs/source/tts_datasets.md | 2 +- docs/source/tutorial_for_nervous_beginners.md | 47 +++---- 25 files changed, 249 insertions(+), 271 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index d4a8cf0090..2b3a973763 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,30 +11,25 @@ You can contribute not only with code but with bug reports, comments, questions, If you like to contribute code, squash a bug but if you don't know where to start, here are some pointers. -- [Development Road Map](https://github.com/coqui-ai/TTS/issues/378) - - You can pick something out of our road map. We keep the progess of the project in this simple issue thread. It has new model proposals or developmental updates etc. - - [Github Issues Tracker](https://github.com/idiap/coqui-ai-TTS/issues) This is a place to find feature requests, bugs. - Issues with the ```good first issue``` tag are good place for beginners to take on. - -- ✨**PR**✨ [pages](https://github.com/idiap/coqui-ai-TTS/pulls) with the ```🚀new version``` tag. - - We list all the target improvements for the next version. You can pick one of them and start contributing. + Issues with the ```good first issue``` tag are good place for beginners to + take on. Issues tagged with `help wanted` are suited for more experienced + outside contributors. - Also feel free to suggest new features, ideas and models. We're always open for new things. -## Call for sharing language models +## Call for sharing pretrained models If possible, please consider sharing your pre-trained models in any language (if the licences allow for you to do so). We will include them in our model catalogue for public use and give the proper attribution, whether it be your name, company, website or any other source specified. This model can be shared in two ways: 1. Share the model files with us and we serve them with the next 🐸 TTS release. 2. Upload your models on GDrive and share the link. -Models are served under `.models.json` file and any model is available under TTS CLI or Server end points. +Models are served under `.models.json` file and any model is available under TTS +CLI and Python API end points. Either way you choose, please make sure you send the models [here](https://github.com/coqui-ai/TTS/discussions/930). @@ -135,7 +130,8 @@ curl -LsSf https://astral.sh/uv/install.sh | sh 13. Let's discuss until it is perfect. 💪 - We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/idiap/coqui-ai-TTS/pulls]. + We might ask you for certain changes that would appear in the + [Github ✨**PR**✨'s page](https://github.com/idiap/coqui-ai-TTS/pulls). 14. Once things look perfect, We merge it to the ```dev``` branch and make it ready for the next version. @@ -143,9 +139,9 @@ curl -LsSf https://astral.sh/uv/install.sh | sh If you prefer working within a Docker container as your development environment, you can do the following: -1. Fork 🐸TTS[https://github.com/idiap/coqui-ai-TTS] by clicking the fork button at the top right corner of the project page. +1. Fork the 🐸TTS [Github repository](https://github.com/idiap/coqui-ai-TTS) by clicking the fork button at the top right corner of the page. -2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```. +2. Clone 🐸TTS and add the main repo as a new remote named ```upstream```. ```bash git clone git@github.com:/coqui-ai-TTS.git diff --git a/README.md b/README.md index 7dddf3a37b..5ab60dd348 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,10 @@ - -## 🐸Coqui TTS News +# 🐸Coqui TTS +## News - 📣 Fork of the [original, unmaintained repository](https://github.com/coqui-ai/TTS). New PyPI package: [coqui-tts](https://pypi.org/project/coqui-tts) - 📣 [OpenVoice](https://github.com/myshell-ai/OpenVoice) models now available for voice conversion. - 📣 Prebuilt wheels are now also published for Mac and Windows (in addition to Linux as before) for easier installation across platforms. -- 📣 ⓍTTSv2 is here with 17 languages and better performance across the board. ⓍTTS can stream with <200ms latency. -- 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). -- 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://coqui-tts.readthedocs.io/en/latest/models/bark.html) +- 📣 XTTSv2 is here with 17 languages and better performance across the board. XTTS can stream with <200ms latency. +- 📣 XTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). - 📣 You can use [Fairseq models in ~1100 languages](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. ## @@ -21,6 +20,7 @@ ______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) +![PyPI - Python Version](https://img.shields.io/pypi/pyversions/coqui-tts) [![License]()](https://opensource.org/licenses/MPL-2.0) [![PyPI version](https://badge.fury.io/py/coqui-tts.svg)](https://badge.fury.io/py/coqui-tts) [![Downloads](https://pepy.tech/badge/coqui-tts)](https://pepy.tech/project/coqui-tts) @@ -63,71 +63,65 @@ repository are also still a useful source of information. | 🚀 **Released Models** | [Standard models](https://github.com/idiap/coqui-ai-TTS/blob/dev/TTS/.models.json) and [Fairseq models in ~1100 languages](https://github.com/idiap/coqui-ai-TTS#example-text-to-speech-using-fairseq-models-in-1100-languages-)| ## Features -- High-performance Deep Learning models for Text2Speech tasks. See lists of models below. -- Fast and efficient model training. -- Detailed training logs on the terminal and Tensorboard. -- Support for Multi-speaker TTS. -- Efficient, flexible, lightweight but feature complete `Trainer API`. +- High-performance text-to-speech and voice conversion models, see list below. +- Fast and efficient model training with detailed training logs on the terminal and Tensorboard. +- Support for multi-speaker and multilingual TTS. - Released and ready-to-use models. -- Tools to curate Text2Speech datasets under```dataset_analysis```. -- Utilities to use and test your models. +- Tools to curate TTS datasets under ```dataset_analysis/```. +- Command line and Python APIs to use and test your models. - Modular (but not too much) code base enabling easy implementation of new ideas. ## Model Implementations ### Spectrogram models -- Tacotron: [paper](https://arxiv.org/abs/1703.10135) -- Tacotron2: [paper](https://arxiv.org/abs/1712.05884) -- Glow-TTS: [paper](https://arxiv.org/abs/2005.11129) -- Speedy-Speech: [paper](https://arxiv.org/abs/2008.03802) -- Align-TTS: [paper](https://arxiv.org/abs/2003.01950) -- FastPitch: [paper](https://arxiv.org/pdf/2006.06873.pdf) -- FastSpeech: [paper](https://arxiv.org/abs/1905.09263) -- FastSpeech2: [paper](https://arxiv.org/abs/2006.04558) -- SC-GlowTTS: [paper](https://arxiv.org/abs/2104.05557) -- Capacitron: [paper](https://arxiv.org/abs/1906.03402) -- OverFlow: [paper](https://arxiv.org/abs/2211.06892) -- Neural HMM TTS: [paper](https://arxiv.org/abs/2108.13320) -- Delightful TTS: [paper](https://arxiv.org/abs/2110.12612) +- [Tacotron](https://arxiv.org/abs/1703.10135), [Tacotron2](https://arxiv.org/abs/1712.05884) +- [Glow-TTS](https://arxiv.org/abs/2005.11129), [SC-GlowTTS](https://arxiv.org/abs/2104.05557) +- [Speedy-Speech](https://arxiv.org/abs/2008.03802) +- [Align-TTS](https://arxiv.org/abs/2003.01950) +- [FastPitch](https://arxiv.org/pdf/2006.06873.pdf) +- [FastSpeech](https://arxiv.org/abs/1905.09263), [FastSpeech2](https://arxiv.org/abs/2006.04558) +- [Capacitron](https://arxiv.org/abs/1906.03402) +- [OverFlow](https://arxiv.org/abs/2211.06892) +- [Neural HMM TTS](https://arxiv.org/abs/2108.13320) +- [Delightful TTS](https://arxiv.org/abs/2110.12612) ### End-to-End Models -- ⓍTTS: [blog](https://coqui.ai/blog/tts/open_xtts) -- VITS: [paper](https://arxiv.org/pdf/2106.06103) -- 🐸 YourTTS: [paper](https://arxiv.org/abs/2112.02418) -- 🐢 Tortoise: [orig. repo](https://github.com/neonbjb/tortoise-tts) -- 🐶 Bark: [orig. repo](https://github.com/suno-ai/bark) - -### Attention Methods -- Guided Attention: [paper](https://arxiv.org/abs/1710.08969) -- Forward Backward Decoding: [paper](https://arxiv.org/abs/1907.09006) -- Graves Attention: [paper](https://arxiv.org/abs/1910.10288) -- Double Decoder Consistency: [blog](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/) -- Dynamic Convolutional Attention: [paper](https://arxiv.org/pdf/1910.10288.pdf) -- Alignment Network: [paper](https://arxiv.org/abs/2108.10447) - -### Speaker Encoder -- GE2E: [paper](https://arxiv.org/abs/1710.10467) -- Angular Loss: [paper](https://arxiv.org/pdf/2003.11982.pdf) +- [XTTS](https://arxiv.org/abs/2406.04904) +- [VITS](https://arxiv.org/pdf/2106.06103) +- 🐸[YourTTS](https://arxiv.org/abs/2112.02418) +- 🐢[Tortoise](https://github.com/neonbjb/tortoise-tts) +- 🐶[Bark](https://github.com/suno-ai/bark) ### Vocoders -- MelGAN: [paper](https://arxiv.org/abs/1910.06711) -- MultiBandMelGAN: [paper](https://arxiv.org/abs/2005.05106) -- ParallelWaveGAN: [paper](https://arxiv.org/abs/1910.11480) -- GAN-TTS discriminators: [paper](https://arxiv.org/abs/1909.11646) -- WaveRNN: [origin](https://github.com/fatchord/WaveRNN/) -- WaveGrad: [paper](https://arxiv.org/abs/2009.00713) -- HiFiGAN: [paper](https://arxiv.org/abs/2010.05646) -- UnivNet: [paper](https://arxiv.org/abs/2106.07889) +- [MelGAN](https://arxiv.org/abs/1910.06711) +- [MultiBandMelGAN](https://arxiv.org/abs/2005.05106) +- [ParallelWaveGAN](https://arxiv.org/abs/1910.11480) +- [GAN-TTS discriminators](https://arxiv.org/abs/1909.11646) +- [WaveRNN](https://github.com/fatchord/WaveRNN/) +- [WaveGrad](https://arxiv.org/abs/2009.00713) +- [HiFiGAN](https://arxiv.org/abs/2010.05646) +- [UnivNet](https://arxiv.org/abs/2106.07889) ### Voice Conversion -- FreeVC: [paper](https://arxiv.org/abs/2210.15418) -- OpenVoice: [technical report](https://arxiv.org/abs/2312.01479) +- [FreeVC](https://arxiv.org/abs/2210.15418) +- [OpenVoice](https://arxiv.org/abs/2312.01479) + +### Others +- Attention methods: [Guided Attention](https://arxiv.org/abs/1710.08969), + [Forward Backward Decoding](https://arxiv.org/abs/1907.09006), + [Graves Attention](https://arxiv.org/abs/1910.10288), + [Double Decoder Consistency](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/), + [Dynamic Convolutional Attention](https://arxiv.org/pdf/1910.10288.pdf), + [Alignment Network](https://arxiv.org/abs/2108.10447) +- Speaker encoders: [GE2E](https://arxiv.org/abs/1710.10467), + [Angular Loss](https://arxiv.org/pdf/2003.11982.pdf) You can also help us implement more models. ## Installation -🐸TTS is tested on Ubuntu 22.04 with **python >= 3.9, < 3.13.**. +🐸TTS is tested on Ubuntu 24.04 with **python >= 3.9, < 3.13.**, but should also +work on Mac and Windows. -If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option. +If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the pretrained 🐸TTS models, installing from PyPI is the easiest option. ```bash pip install coqui-tts @@ -172,14 +166,9 @@ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you h make install ``` -If you are on Windows, 👑@GuyPaddock wrote installation instructions -[here](https://stackoverflow.com/questions/66726331/how-can-i-run-mozilla-tts-coqui-tts-training-with-cuda-on-a-windows-system) -(note that these are out of date, e.g. you need to have at least Python 3.9). - - ## Docker Image -You can also try TTS without install with the docker image. -Simply run the following command and you will be able to run TTS without installing it. +You can also try out Coqui TTS without installation with the docker image. +Simply run the following command and you will be able to run TTS: ```bash docker run --rm -it -p 5002:5002 --entrypoint /bin/bash ghcr.io/coqui-ai/tts-cpu @@ -281,11 +270,12 @@ api.tts_to_file( -Synthesize speech on command line. +Synthesize speech on the command line. You can either use your trained model or choose a model from the provided list. -If you don't specify any models, then it uses LJSpeech based English model. +If you don't specify any models, then it uses a Tacotron2 English model trained +on LJSpeech. #### Single Speaker Models diff --git a/TTS/model.py b/TTS/model.py index c3707c85ae..779b1775a3 100644 --- a/TTS/model.py +++ b/TTS/model.py @@ -12,7 +12,7 @@ class BaseTrainerModel(TrainerModel): """BaseTrainerModel model expanding TrainerModel with required functions by 🐸TTS. - Every new 🐸TTS model must inherit it. + Every new Coqui model must inherit it. """ @staticmethod diff --git a/TTS/tts/models/bark.py b/TTS/tts/models/bark.py index ced8f60ed8..c52c541b25 100644 --- a/TTS/tts/models/bark.py +++ b/TTS/tts/models/bark.py @@ -206,12 +206,14 @@ def synthesize( speaker_wav (str): Path to the speaker audio file for cloning a new voice. It is cloned and saved in `voice_dirs` with the name `speaker_id`. Defaults to None. voice_dirs (List[str]): List of paths that host reference audio files for speakers. Defaults to None. - **kwargs: Model specific inference settings used by `generate_audio()` and `TTS.tts.layers.bark.inference_funcs.generate_text_semantic(). + **kwargs: Model specific inference settings used by `generate_audio()` and + `TTS.tts.layers.bark.inference_funcs.generate_text_semantic()`. Returns: - A dictionary of the output values with `wav` as output waveform, `deterministic_seed` as seed used at inference, - `text_input` as text token IDs after tokenizer, `voice_samples` as samples used for cloning, `conditioning_latents` - as latents used at inference. + A dictionary of the output values with `wav` as output waveform, + `deterministic_seed` as seed used at inference, `text_input` as text token IDs + after tokenizer, `voice_samples` as samples used for cloning, + `conditioning_latents` as latents used at inference. """ speaker_id = "random" if speaker_id is None else speaker_id diff --git a/TTS/tts/models/base_tts.py b/TTS/tts/models/base_tts.py index ccb023ce84..33a75598c9 100644 --- a/TTS/tts/models/base_tts.py +++ b/TTS/tts/models/base_tts.py @@ -80,15 +80,17 @@ def _set_model_args(self, config: Coqpit): raise ValueError("config must be either a *Config or *Args") def init_multispeaker(self, config: Coqpit, data: List = None): - """Initialize a speaker embedding layer if needen and define expected embedding channel size for defining - `in_channels` size of the connected layers. + """Set up for multi-speaker TTS. + + Initialize a speaker embedding layer if needed and define expected embedding + channel size for defining `in_channels` size of the connected layers. This implementation yields 3 possible outcomes: - 1. If `config.use_speaker_embedding` and `config.use_d_vector_file are False, do nothing. + 1. If `config.use_speaker_embedding` and `config.use_d_vector_file` are False, do nothing. 2. If `config.use_d_vector_file` is True, set expected embedding channel size to `config.d_vector_dim` or 512. 3. If `config.use_speaker_embedding`, initialize a speaker embedding layer with channel size of - `config.d_vector_dim` or 512. + `config.d_vector_dim` or 512. You can override this function for new models. diff --git a/TTS/tts/models/overflow.py b/TTS/tts/models/overflow.py index ac09e406ad..1c146b2eac 100644 --- a/TTS/tts/models/overflow.py +++ b/TTS/tts/models/overflow.py @@ -33,32 +33,33 @@ class Overflow(BaseTTS): Paper abstract:: Neural HMMs are a type of neural transducer recently proposed for - sequence-to-sequence modelling in text-to-speech. They combine the best features - of classic statistical speech synthesis and modern neural TTS, requiring less - data and fewer training updates, and are less prone to gibberish output caused - by neural attention failures. In this paper, we combine neural HMM TTS with - normalising flows for describing the highly non-Gaussian distribution of speech - acoustics. The result is a powerful, fully probabilistic model of durations and - acoustics that can be trained using exact maximum likelihood. Compared to - dominant flow-based acoustic models, our approach integrates autoregression for - improved modelling of long-range dependences such as utterance-level prosody. - Experiments show that a system based on our proposal gives more accurate - pronunciations and better subjective speech quality than comparable methods, - whilst retaining the original advantages of neural HMMs. Audio examples and code - are available at https://shivammehta25.github.io/OverFlow/. + sequence-to-sequence modelling in text-to-speech. They combine the best features + of classic statistical speech synthesis and modern neural TTS, requiring less + data and fewer training updates, and are less prone to gibberish output caused + by neural attention failures. In this paper, we combine neural HMM TTS with + normalising flows for describing the highly non-Gaussian distribution of speech + acoustics. The result is a powerful, fully probabilistic model of durations and + acoustics that can be trained using exact maximum likelihood. Compared to + dominant flow-based acoustic models, our approach integrates autoregression for + improved modelling of long-range dependences such as utterance-level prosody. + Experiments show that a system based on our proposal gives more accurate + pronunciations and better subjective speech quality than comparable methods, + whilst retaining the original advantages of neural HMMs. Audio examples and code + are available at https://shivammehta25.github.io/OverFlow/. Note: - - Neural HMMs uses flat start initialization i.e it computes the means and std and transition probabilities - of the dataset and uses them to initialize the model. This benefits the model and helps with faster learning - If you change the dataset or want to regenerate the parameters change the `force_generate_statistics` and - `mel_statistics_parameter_path` accordingly. + - Neural HMMs uses flat start initialization i.e it computes the means + and std and transition probabilities of the dataset and uses them to initialize + the model. This benefits the model and helps with faster learning If you change + the dataset or want to regenerate the parameters change the + `force_generate_statistics` and `mel_statistics_parameter_path` accordingly. - To enable multi-GPU training, set the `use_grad_checkpointing=False` in config. - This will significantly increase the memory usage. This is because to compute - the actual data likelihood (not an approximation using MAS/Viterbi) we must use - all the states at the previous time step during the forward pass to decide the - probability distribution at the current step i.e the difference between the forward - algorithm and viterbi approximation. + This will significantly increase the memory usage. This is because to compute + the actual data likelihood (not an approximation using MAS/Viterbi) we must use + all the states at the previous time step during the forward pass to decide the + probability distribution at the current step i.e the difference between the forward + algorithm and viterbi approximation. Check :class:`TTS.tts.configs.overflow.OverFlowConfig` for class arguments. """ diff --git a/TTS/tts/models/tortoise.py b/TTS/tts/models/tortoise.py index 01629b5d2a..738e9dd9b3 100644 --- a/TTS/tts/models/tortoise.py +++ b/TTS/tts/models/tortoise.py @@ -423,7 +423,9 @@ def get_conditioning_latents( Transforms one or more voice_samples into a tuple (autoregressive_conditioning_latent, diffusion_conditioning_latent). These are expressive learned latents that encode aspects of the provided clips like voice, intonation, and acoustic properties. - :param voice_samples: List of arbitrary reference clips, which should be *pairs* of torch tensors containing arbitrary kHz waveform data. + + :param voice_samples: List of arbitrary reference clips, which should be *pairs* + of torch tensors containing arbitrary kHz waveform data. :param latent_averaging_mode: 0/1/2 for following modes: 0 - latents will be generated as in original tortoise, using ~4.27s from each voice sample, averaging latent across all samples 1 - latents will be generated using (almost) entire voice samples, averaged across all the ~4.27s chunks @@ -671,7 +673,7 @@ def inference( As cond_free_k increases, the output becomes dominated by the conditioning-free signal. diffusion_temperature: (float) Controls the variance of the noise fed into the diffusion model. [0,1]. Values at 0 are the "mean" prediction of the diffusion network and will sound bland and smeared. - hf_generate_kwargs: (**kwargs) The huggingface Transformers generate API is used for the autoregressive transformer. + hf_generate_kwargs: (`**kwargs`) The huggingface Transformers generate API is used for the autoregressive transformer. Extra keyword args fed to this function get forwarded directly to that API. Documentation here: https://huggingface.co/docs/transformers/internal/generation_utils diff --git a/TTS/tts/models/xtts.py b/TTS/tts/models/xtts.py index f05863ae1d..395208cc6b 100644 --- a/TTS/tts/models/xtts.py +++ b/TTS/tts/models/xtts.py @@ -178,7 +178,7 @@ class XttsArgs(Coqpit): class Xtts(BaseTTS): - """ⓍTTS model implementation. + """XTTS model implementation. ❗ Currently it only supports inference. @@ -460,7 +460,7 @@ def full_inference( gpt_cond_chunk_len: (int) Chunk length used for cloning. It must be <= `gpt_cond_len`. If gpt_cond_len == gpt_cond_chunk_len, no chunking. Defaults to 6 seconds. - hf_generate_kwargs: (**kwargs) The huggingface Transformers generate API is used for the autoregressive + hf_generate_kwargs: (`**kwargs`) The huggingface Transformers generate API is used for the autoregressive transformer. Extra keyword args fed to this function get forwarded directly to that API. Documentation here: https://huggingface.co/docs/transformers/internal/generation_utils diff --git a/docs/source/configuration.md b/docs/source/configuration.md index ada61e16db..220c96c363 100644 --- a/docs/source/configuration.md +++ b/docs/source/configuration.md @@ -1,6 +1,6 @@ # Configuration -We use 👩‍✈️[Coqpit] for configuration management. It provides basic static type checking and serialization capabilities on top of native Python `dataclasses`. Here is how a simple configuration looks like with Coqpit. +We use 👩‍✈️[Coqpit](https://github.com/idiap/coqui-ai-coqpit) for configuration management. It provides basic static type checking and serialization capabilities on top of native Python `dataclasses`. Here is how a simple configuration looks like with Coqpit. ```python from dataclasses import asdict, dataclass, field @@ -36,7 +36,7 @@ class SimpleConfig(Coqpit): check_argument("val_c", c, restricted=True) ``` -In TTS, each model must have a configuration class that exposes all the values necessary for its lifetime. +In Coqui, each model must have a configuration class that exposes all the values necessary for its lifetime. It defines model architecture, hyper-parameters, training, and inference settings. For our models, we merge all the fields in a single configuration class for ease. It may not look like a wise practice but enables easier bookkeeping and reproducible experiments. diff --git a/docs/source/docker_images.md b/docs/source/docker_images.md index 58d961203e..042f9f8e7a 100644 --- a/docs/source/docker_images.md +++ b/docs/source/docker_images.md @@ -1,20 +1,20 @@ (docker_images)= -## Docker images +# Docker images We provide docker images to be able to test TTS without having to setup your own environment. -### Using premade images +## Using premade images You can use premade images built automatically from the latest TTS version. -#### CPU version +### CPU version ```bash docker pull ghcr.io/coqui-ai/tts-cpu ``` -#### GPU version +### GPU version ```bash docker pull ghcr.io/coqui-ai/tts ``` -### Building your own image +## Building your own image ```bash docker build -t tts . ``` diff --git a/docs/source/faq.md b/docs/source/faq.md index 1090aaa35c..e0197cf752 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -1,4 +1,4 @@ -# Humble FAQ +# FAQ We tried to collect common issues and questions we receive about 🐸TTS. It is worth checking before going deeper. ## Errors with a pre-trained model. How can I resolve this? @@ -7,7 +7,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - If you feel like it's a bug to be fixed, then prefer Github issues with the same level of scrutiny. ## What are the requirements of a good 🐸TTS dataset? -* {ref}`See this page ` +- [See this page](what_makes_a_good_dataset.md) ## How should I choose the right model? - First, train Tacotron. It is smaller and faster to experiment with. If it performs poorly, try Tacotron2. @@ -61,7 +61,8 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - SingleGPU training: ```CUDA_VISIBLE_DEVICES="0" python train_tts.py --config_path config.json``` - MultiGPU training: ```python3 -m trainer.distribute --gpus "0,1" --script TTS/bin/train_tts.py --config_path config.json``` -**Note:** You can also train your model using pure 🐍 python. Check ```{eval-rst} :ref: 'tutorial_for_nervous_beginners'```. +**Note:** You can also train your model using pure 🐍 python. Check the +[tutorial](tutorial_for_nervous_beginners.md). ## How can I train in a different language? - Check steps 2, 3, 4, 5 above. @@ -104,7 +105,7 @@ The best approach is to pick a set of promising models and run a Mean-Opinion-Sc - Check the 4th step under "How can I check model performance?" ## How can I test a trained model? -- The best way is to use `tts` or `tts-server` commands. For details check {ref}`here `. +- The best way is to use `tts` or `tts-server` commands. For details check [here](inference.md). - If you need to code your own ```TTS.utils.synthesizer.Synthesizer``` class. ## My Tacotron model does not stop - I see "Decoder stopped with 'max_decoder_steps" - Stopnet does not work. diff --git a/docs/source/finetuning.md b/docs/source/finetuning.md index 548e385ec7..9c9f2c8d06 100644 --- a/docs/source/finetuning.md +++ b/docs/source/finetuning.md @@ -1,4 +1,4 @@ -# Fine-tuning a 🐸 TTS model +# Fine-tuning a model ## Fine-tuning @@ -21,8 +21,9 @@ them and fine-tune it for your own dataset. This will help you in two main ways: Fine-tuning comes to the rescue in this case. You can take one of our pre-trained models and fine-tune it on your own speech dataset and achieve reasonable results with only a couple of hours of data. - However, note that, fine-tuning does not ensure great results. The model performance still depends on the - {ref}`dataset quality ` and the hyper-parameters you choose for fine-tuning. Therefore, + However, note that, fine-tuning does not ensure great results. The model + performance still depends on the [dataset quality](what_makes_a_good_dataset.md) + and the hyper-parameters you choose for fine-tuning. Therefore, it still takes a bit of tinkering. @@ -31,7 +32,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways: 1. Setup your dataset. You need to format your target dataset in a certain way so that 🐸TTS data loader will be able to load it for the - training. Please see {ref}`this page ` for more information about formatting. + training. Please see [this page](formatting_your_dataset.md) for more information about formatting. 2. Choose the model you want to fine-tune. @@ -47,7 +48,8 @@ them and fine-tune it for your own dataset. This will help you in two main ways: You should choose the model based on your requirements. Some models are fast and some are better in speech quality. One lazy way to test a model is running the model on the hardware you want to use and see how it works. For - simple testing, you can use the `tts` command on the terminal. For more info see {ref}`here `. + simple testing, you can use the `tts` command on the terminal. For more info + see [here](inference.md). 3. Download the model. diff --git a/docs/source/formatting_your_dataset.md b/docs/source/formatting_your_dataset.md index 23c497d0bf..7376ff6644 100644 --- a/docs/source/formatting_your_dataset.md +++ b/docs/source/formatting_your_dataset.md @@ -1,5 +1,5 @@ (formatting_your_dataset)= -# Formatting Your Dataset +# Formatting your dataset For training a TTS model, you need a dataset with speech recordings and transcriptions. The speech must be divided into audio clips and each clip needs transcription. @@ -49,7 +49,7 @@ The format above is taken from widely-used the [LJSpeech](https://keithito.com/L Your dataset should have good coverage of the target language. It should cover the phonemic variety, exceptional sounds and syllables. This is extremely important for especially non-phonemic languages like English. -For more info about dataset qualities and properties check our [post](https://github.com/coqui-ai/TTS/wiki/What-makes-a-good-TTS-dataset). +For more info about dataset qualities and properties check [this page](what_makes_a_good_dataset.md). ## Using Your Dataset in 🐸TTS diff --git a/docs/source/implementing_a_new_language_frontend.md b/docs/source/implementing_a_new_language_frontend.md index 2041352d64..0b3ef59be0 100644 --- a/docs/source/implementing_a_new_language_frontend.md +++ b/docs/source/implementing_a_new_language_frontend.md @@ -1,6 +1,6 @@ -# Implementing a New Language Frontend +# Implementing new language front ends -- Language frontends are located under `TTS.tts.utils.text` +- Language front ends are located under `TTS.tts.utils.text` - Each special language has a separate folder. - Each folder contains all the utilities for processing the text input. - `TTS.tts.utils.text.phonemizers` contains the main phonemizer for a language. This is the class that uses the utilities diff --git a/docs/source/implementing_a_new_model.md b/docs/source/implementing_a_new_model.md index 1bf7a8822e..a2721a1c4d 100644 --- a/docs/source/implementing_a_new_model.md +++ b/docs/source/implementing_a_new_model.md @@ -1,4 +1,4 @@ -# Implementing a Model +# Implementing new models 1. Implement layers. @@ -36,7 +36,7 @@ There is also the `callback` interface by which you can manipulate both the model and the `Trainer` states. Callbacks give you an infinite flexibility to add custom behaviours for your model and training routines. - For more details, see {ref}`BaseTTS ` and :obj:`TTS.utils.callbacks`. + For more details, see [BaseTTS](main_classes/model_api.md#base-tts-model) and :obj:`TTS.utils.callbacks`. 6. Optionally, define `MyModelArgs`. @@ -62,7 +62,7 @@ We love you more when you document your code. ❤️ -# Template 🐸TTS Model implementation +## Template 🐸TTS Model implementation You can start implementing your model by copying the following base class. diff --git a/docs/source/index.md b/docs/source/index.md index 79993eec76..8924fdc83b 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -5,58 +5,57 @@ ---- # Documentation Content -```{eval-rst} -.. toctree:: - :maxdepth: 2 - :caption: Get started - - tutorial_for_nervous_beginners - installation - faq - contributing - -.. toctree:: - :maxdepth: 2 - :caption: Using 🐸TTS - - inference - docker_images - implementing_a_new_model - implementing_a_new_language_frontend - training_a_model - finetuning - configuration - formatting_your_dataset - what_makes_a_good_dataset - tts_datasets - marytts - -.. toctree:: - :maxdepth: 2 - :caption: Main Classes - - main_classes/trainer_api - main_classes/audio_processor - main_classes/model_api - main_classes/dataset - main_classes/gan - main_classes/speaker_manager - -.. toctree:: - :maxdepth: 2 - :caption: `tts` Models - - models/glow_tts.md - models/vits.md - models/forward_tts.md - models/tacotron1-2.md - models/overflow.md - models/tortoise.md - models/bark.md - models/xtts.md - -.. toctree:: - :maxdepth: 2 - :caption: `vocoder` Models +```{toctree} +:maxdepth: 1 +:caption: Get started + +tutorial_for_nervous_beginners +installation +docker_images +faq +contributing +``` + +```{toctree} +:maxdepth: 1 +:caption: Using Coqui + +inference +training_a_model +finetuning +implementing_a_new_model +implementing_a_new_language_frontend +formatting_your_dataset +what_makes_a_good_dataset +tts_datasets +marytts +``` + + +```{toctree} +:maxdepth: 1 +:caption: Main Classes + +configuration +main_classes/trainer_api +main_classes/audio_processor +main_classes/model_api +main_classes/dataset +main_classes/gan +main_classes/speaker_manager +``` + + +```{toctree} +:maxdepth: 1 +:caption: TTS Models +models/glow_tts.md +models/vits.md +models/forward_tts.md +models/tacotron1-2.md +models/overflow.md +models/tortoise.md +models/bark.md +models/xtts.md ``` diff --git a/docs/source/inference.md b/docs/source/inference.md index 4cb8f45a71..4556643cca 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -1,5 +1,5 @@ (synthesizing_speech)= -# Synthesizing Speech +# Synthesizing speech First, you need to install TTS. We recommend using PyPi. You need to call the command below: @@ -136,7 +136,7 @@ wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language= tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") ``` -#### Here is an example for a single speaker model. +### Single speaker model. ```python # Init TTS with the target model name @@ -145,7 +145,7 @@ tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False) tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH) ``` -#### Example voice cloning with YourTTS in English, French and Portuguese: +### Voice cloning with YourTTS in English, French and Portuguese: ```python tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda") @@ -154,14 +154,14 @@ tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wa tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="output.wav") ``` -#### Example voice conversion converting speaker of the `source_wav` to the speaker of the `target_wav` +### Voice conversion from the speaker of `source_wav` to the speaker of `target_wav` ```python tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda") tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav") ``` -#### Example voice cloning by a single speaker TTS model combining with the voice conversion model. +### Voice cloning by combining single speaker TTS model with the voice conversion model. This way, you can clone voices by using any model in 🐸TTS. @@ -174,7 +174,7 @@ tts.tts_with_vc_to_file( ) ``` -#### Example text to speech using **Fairseq models in ~1100 languages** 🤯. +### Text to speech using **Fairseq models in ~1100 languages** 🤯. For these models use the following name format: `tts_models//fairseq/vits`. You can find the list of language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html) and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms). diff --git a/docs/source/installation.md b/docs/source/installation.md index 405c436643..5becc28b70 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,6 +1,7 @@ # Installation -🐸TTS supports python >=3.9 <3.13.0 and was tested on Ubuntu 22.04. +🐸TTS supports python >=3.9 <3.13.0 and was tested on Ubuntu 24.04, but should +also run on Mac and Windows. ## Using `pip` @@ -33,8 +34,3 @@ make install # Same as above + dev dependencies and pre-commit make install_dev ``` - -## On Windows -If you are on Windows, 👑@GuyPaddock wrote installation instructions -[here](https://stackoverflow.com/questions/66726331/) (note that these are out -of date, e.g. you need to have at least Python 3.9) diff --git a/docs/source/main_classes/model_api.md b/docs/source/main_classes/model_api.md index 71b3d41640..bb7e9d1a1d 100644 --- a/docs/source/main_classes/model_api.md +++ b/docs/source/main_classes/model_api.md @@ -1,22 +1,22 @@ # Model API Model API provides you a set of functions that easily make your model compatible with the `Trainer`, -`Synthesizer` and `ModelZoo`. +`Synthesizer` and the Coqui Python API. -## Base TTS Model +## Base Trainer Model ```{eval-rst} .. autoclass:: TTS.model.BaseTrainerModel :members: ``` -## Base tts Model +## Base TTS Model ```{eval-rst} .. autoclass:: TTS.tts.models.base_tts.BaseTTS :members: ``` -## Base vocoder Model +## Base Vocoder Model ```{eval-rst} .. autoclass:: TTS.vocoder.models.base_vocoder.BaseVocoder diff --git a/docs/source/main_classes/trainer_api.md b/docs/source/main_classes/trainer_api.md index 335294aa4d..bdb6048e45 100644 --- a/docs/source/main_classes/trainer_api.md +++ b/docs/source/main_classes/trainer_api.md @@ -1,3 +1,3 @@ # Trainer API -We made the trainer a separate project on https://github.com/eginhard/coqui-trainer +We made the trainer a separate project: https://github.com/idiap/coqui-ai-Trainer diff --git a/docs/source/marytts.md b/docs/source/marytts.md index 9091ca330f..11cf4a2b9a 100644 --- a/docs/source/marytts.md +++ b/docs/source/marytts.md @@ -1,4 +1,4 @@ -# Mary-TTS API Support for Coqui-TTS +# Mary-TTS API support for Coqui TTS ## What is Mary-TTS? diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index 7c0f1c4a60..96f5bb7cd5 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -1,25 +1,25 @@ -# ⓍTTS -ⓍTTS is a super cool Text-to-Speech model that lets you clone voices in different languages by using just a quick 3-second audio clip. Built on the 🐢Tortoise, -ⓍTTS has important model changes that make cross-language voice cloning and multi-lingual speech generation super easy. +# XTTS +XTTS is a super cool Text-to-Speech model that lets you clone voices in different languages by using just a quick 3-second audio clip. Built on the 🐢Tortoise, +XTTS has important model changes that make cross-language voice cloning and multi-lingual speech generation super easy. There is no need for an excessive amount of training data that spans countless hours. -### Features +## Features - Voice cloning. - Cross-language voice cloning. - Multi-lingual speech generation. - 24khz sampling rate. -- Streaming inference with < 200ms latency. (See [Streaming inference](#streaming-inference)) +- Streaming inference with < 200ms latency. (See [Streaming inference](#streaming-manually)) - Fine-tuning support. (See [Training](#training)) -### Updates with v2 +## Updates with v2 - Improved voice cloning. - Voices can be cloned with a single audio file or multiple audio files, without any effect on the runtime. - Across the board quality improvements. -### Code +## Code Current implementation only supports inference and GPT encoder training. -### Languages +## Languages XTTS-v2 supports 17 languages: - Arabic (ar) @@ -40,15 +40,15 @@ XTTS-v2 supports 17 languages: - Spanish (es) - Turkish (tr) -### License +## License This model is licensed under [Coqui Public Model License](https://coqui.ai/cpml). -### Contact +## Contact Come and join in our 🐸Community. We're active on [Discord](https://discord.gg/fBC58unbKE) and [Github](https://github.com/idiap/coqui-ai-TTS/discussions). -### Inference +## Inference -#### 🐸TTS Command line +### 🐸TTS Command line You can check all supported languages with the following command: @@ -64,7 +64,7 @@ You can check all Coqui available speakers with the following command: --list_speaker_idx ``` -##### Coqui speakers +#### Coqui speakers You can do inference using one of the available speakers using the following command: ```console @@ -75,10 +75,10 @@ You can do inference using one of the available speakers using the following com --use_cuda ``` -##### Clone a voice +#### Clone a voice You can clone a speaker voice using a single or multiple references: -###### Single reference +##### Single reference ```console tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 \ @@ -88,7 +88,7 @@ You can clone a speaker voice using a single or multiple references: --use_cuda ``` -###### Multiple references +##### Multiple references ```console tts --model_name tts_models/multilingual/multi-dataset/xtts_v2 \ --text "Bugün okula gitmek istemiyorum." \ @@ -106,12 +106,12 @@ or for all wav files in a directory you can use: --use_cuda ``` -#### 🐸TTS API +### 🐸TTS API -##### Clone a voice +#### Clone a voice You can clone a speaker voice using a single or multiple references: -###### Single reference +##### Single reference Splits the text into sentences and generates audio for each sentence. The audio files are then concatenated to produce the final audio. You can optionally disable sentence splitting for better coherence but more VRAM and possibly hitting models context length limit. @@ -129,7 +129,7 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t ) ``` -###### Multiple references +##### Multiple references You can pass multiple audio files to the `speaker_wav` argument for better voice cloning. @@ -154,7 +154,7 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t language="en") ``` -##### Coqui speakers +#### Coqui speakers You can do inference using one of the available speakers using the following code: @@ -172,11 +172,11 @@ tts.tts_to_file(text="It took me quite a long time to develop a voice, and now t ``` -#### 🐸TTS Model API +### 🐸TTS Model API To use the model API, you need to download the model files and pass config and model file paths manually. -#### Manual Inference +### Manual Inference If you want to be able to `load_checkpoint` with `use_deepspeed=True` and **enjoy the speedup**, you need to install deepspeed first. @@ -184,7 +184,7 @@ If you want to be able to `load_checkpoint` with `use_deepspeed=True` and **enjo pip install deepspeed==0.10.3 ``` -##### inference parameters +#### Inference parameters - `text`: The text to be synthesized. - `language`: The language of the text to be synthesized. @@ -199,7 +199,7 @@ pip install deepspeed==0.10.3 - `enable_text_splitting`: Whether to split the text into sentences and generate audio for each sentence. It allows you to have infinite input length but might loose important context between sentences. Defaults to True. -##### Inference +#### Inference ```python @@ -231,7 +231,7 @@ torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000) ``` -##### Streaming manually +#### Streaming manually Here the goal is to stream the audio as it is being generated. This is useful for real-time applications. Streaming inference is typically slower than regular inference, but it allows to get a first chunk of audio faster. @@ -275,9 +275,9 @@ torchaudio.save("xtts_streaming.wav", wav.squeeze().unsqueeze(0).cpu(), 24000) ``` -### Training +## Training -#### Easy training +### Easy training To make `XTTS_v2` GPT encoder training easier for beginner users we did a gradio demo that implements the whole fine-tuning pipeline. The gradio demo enables the user to easily do the following steps: - Preprocessing of the uploaded audio or audio files in 🐸 TTS coqui formatter @@ -286,7 +286,7 @@ To make `XTTS_v2` GPT encoder training easier for beginner users we did a gradio The user can run this gradio demo locally or remotely using a Colab Notebook. -##### Run demo on Colab +#### Run demo on Colab To make the `XTTS_v2` fine-tuning more accessible for users that do not have good GPUs available we did a Google Colab Notebook. The Colab Notebook is available [here](https://colab.research.google.com/drive/1GiI4_X724M8q2W-zZ-jXo7cWTV7RfaH-?usp=sharing). @@ -302,7 +302,7 @@ If you are not able to acess the video you need to follow the steps: 5. Soon the training is done you can go to the third Tab (3 - Inference) and then click on the button "Step 3 - Load Fine-tuned XTTS model" and wait until the fine-tuned model is loaded. Then you can do the inference on the model by clicking on the button "Step 4 - Inference". -##### Run demo locally +#### Run demo locally To run the demo locally you need to do the following steps: 1. Install 🐸 TTS following the instructions available [here](https://coqui-tts.readthedocs.io/en/latest/installation.html). @@ -319,7 +319,7 @@ If you are not able to access the video, here is what you need to do: 4. Go to the third Tab (3 - Inference) and then click on the button "Step 3 - Load Fine-tuned XTTS model" and wait until the fine-tuned model is loaded. 5. Now you can run inference with the model by clicking on the button "Step 4 - Inference". -#### Advanced training +### Advanced training A recipe for `XTTS_v2` GPT encoder training using `LJSpeech` dataset is available at https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech/xtts_v1/train_gpt_xtts.py @@ -393,6 +393,6 @@ torchaudio.save(OUTPUT_WAV_PATH, torch.tensor(out["wav"]).unsqueeze(0), 24000) ## XTTS Model ```{eval-rst} -.. autoclass:: TTS.tts.models.xtts.XTTS +.. autoclass:: TTS.tts.models.xtts.Xtts :members: ``` diff --git a/docs/source/training_a_model.md b/docs/source/training_a_model.md index 989a57042a..6f612dc02c 100644 --- a/docs/source/training_a_model.md +++ b/docs/source/training_a_model.md @@ -1,4 +1,4 @@ -# Training a Model +# Training a model 1. Decide the model you want to use. @@ -132,7 +132,7 @@ In the example above, we trained a `GlowTTS` model, but the same workflow applies to all the other 🐸TTS models. -# Multi-speaker Training +## Multi-speaker Training Training a multi-speaker model is mostly the same as training a single-speaker model. You need to specify a couple of configuration parameters, initiate a `SpeakerManager` instance and pass it to the model. diff --git a/docs/source/tts_datasets.md b/docs/source/tts_datasets.md index 11da1b7688..3a0bcf11d6 100644 --- a/docs/source/tts_datasets.md +++ b/docs/source/tts_datasets.md @@ -1,4 +1,4 @@ -# TTS Datasets +# TTS datasets Some of the known public datasets that we successfully applied 🐸TTS: diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index b417c4c45a..5df56fc6e4 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -1,20 +1,29 @@ -# Tutorial For Nervous Beginners +# Tutorial for nervous beginners -## Installation +First [install](installation.md) Coqui TTS. -User friendly installation. Recommended only for synthesizing voice. +## Synthesizing Speech + +You can run `tts` and synthesize speech directly on the terminal. ```bash -$ pip install coqui-tts +$ tts -h # see the help +$ tts --list_models # list the available models. ``` -Developer friendly installation. +![cli.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/tts_cli.gif) + + +You can call `tts-server` to start a local demo server that you can open on +your favorite web browser and 🗣️ (make sure to install the additional +dependencies with `pip install coqui-tts[server]`). ```bash -$ git clone https://github.com/idiap/coqui-ai-TTS -$ cd coqui-ai-TTS -$ pip install -e . +$ tts-server -h # see the help +$ tts-server --list_models # list the available models. ``` +![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) + ## Training a `tts` Model @@ -99,25 +108,3 @@ We still support running training from CLI like in the old days. The same traini ``` ❗️ Note that you can also use ```train_vocoder.py``` as the ```tts``` models above. - -## Synthesizing Speech - -You can run `tts` and synthesize speech directly on the terminal. - -```bash -$ tts -h # see the help -$ tts --list_models # list the available models. -``` - -![cli.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/tts_cli.gif) - - -You can call `tts-server` to start a local demo server that you can open on -your favorite web browser and 🗣️ (make sure to install the additional -dependencies with `pip install coqui-tts[server]`). - -```bash -$ tts-server -h # see the help -$ tts-server --list_models # list the available models. -``` -![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) From e23766d501609f422b8706b4e71fe90b80b4e069 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Dec 2024 14:09:44 +0100 Subject: [PATCH 246/255] docs: move project structure from readme into documentation --- README.md | 20 -------------------- docs/source/index.md | 1 + docs/source/project_structure.md | 30 ++++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 docs/source/project_structure.md diff --git a/README.md b/README.md index 5ab60dd348..c766b51415 100644 --- a/README.md +++ b/README.md @@ -391,23 +391,3 @@ $ tts --out_path output/path/speech.wav --model_name "// - -## Directory Structure -``` -|- notebooks/ (Jupyter Notebooks for model evaluation, parameter selection and data analysis.) -|- utils/ (common utilities.) -|- TTS - |- bin/ (folder for all the executables.) - |- train*.py (train your target model.) - |- ... - |- tts/ (text to speech models) - |- layers/ (model layer definitions) - |- models/ (model definitions) - |- utils/ (model specific utilities.) - |- speaker_encoder/ (Speaker Encoder models.) - |- (same) - |- vocoder/ (Vocoder models.) - |- (same) - |- vc/ (Voice conversion models.) - |- (same) -``` diff --git a/docs/source/index.md b/docs/source/index.md index 8924fdc83b..cb835d47e8 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -13,6 +13,7 @@ tutorial_for_nervous_beginners installation docker_images faq +project_structure contributing ``` diff --git a/docs/source/project_structure.md b/docs/source/project_structure.md new file mode 100644 index 0000000000..af3e472adc --- /dev/null +++ b/docs/source/project_structure.md @@ -0,0 +1,30 @@ +# Project structure + +## Directory structure + +A non-comprehensive overview of the Coqui source code: + +| Directory | Contents | +| - | - | +| **Core** | | +| **[`TTS/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS)** | Main source code | +| **[`- .models.json`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/.models.json)** | Pretrained model list | +| **[`- api.py`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/api.py)** | Python API | +| **[`- bin/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/bin)** | Executables and CLI | +| **[`- tts/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/tts)** | Text-to-speech models | +| **[`- configs/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/tts/configs)** | Model configurations | +| **[`- layers/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/tts/layers)** | Model layer definitions | +| **[`- models/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/tts/models)** | Model definitions | +| **[`- vc/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/vc)** | Voice conversion models | +| `- (same)` | | +| **[`- vocoder/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/vocoder)** | Vocoder models | +| `- (same)` | | +| **[`- encoder/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/TTS/encoder)** | Speaker encoder models | +| `- (same)` | | +| **Recipes/notebooks** | | +| **[`notebooks/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/notebooks)** | Jupyter Notebooks for model evaluation, parameter selection and data analysis | +| **[`recipes/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes)** | Training recipes | +| **Others** | | +| **[`pyproject.toml`](https://github.com/idiap/coqui-ai-TTS/tree/dev/pyproject.toml)** | Project metadata, configuration and dependencies | +| **[`docs/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/docs)** | Documentation | +| **[`tests/`](https://github.com/idiap/coqui-ai-TTS/tree/dev/tests)** | Unit and integration tests | From ae2f8d235450eb7a1674754110451d7a59c3b635 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Dec 2024 15:52:55 +0100 Subject: [PATCH 247/255] docs: use nested contents for easier overview --- .../{ => datasets}/formatting_your_dataset.md | 4 +++- docs/source/datasets/index.md | 12 ++++++++++++ docs/source/{ => datasets}/tts_datasets.md | 4 ++-- .../{ => datasets}/what_makes_a_good_dataset.md | 0 .../implementing_a_new_language_frontend.md | 0 .../{ => extension}/implementing_a_new_model.md | 3 ++- docs/source/extension/index.md | 14 ++++++++++++++ docs/source/faq.md | 4 ++-- docs/source/index.md | 16 +++++++--------- docs/source/inference.md | 9 +++++++-- docs/source/{ => training}/finetuning.md | 6 +++--- docs/source/training/index.md | 10 ++++++++++ docs/source/{ => training}/training_a_model.md | 9 ++++----- docs/source/tutorial_for_nervous_beginners.md | 6 +++++- 14 files changed, 71 insertions(+), 26 deletions(-) rename docs/source/{ => datasets}/formatting_your_dataset.md (98%) create mode 100644 docs/source/datasets/index.md rename docs/source/{ => datasets}/tts_datasets.md (90%) rename docs/source/{ => datasets}/what_makes_a_good_dataset.md (100%) rename docs/source/{ => extension}/implementing_a_new_language_frontend.md (100%) rename docs/source/{ => extension}/implementing_a_new_model.md (98%) create mode 100644 docs/source/extension/index.md rename docs/source/{ => training}/finetuning.md (95%) create mode 100644 docs/source/training/index.md rename docs/source/{ => training}/training_a_model.md (93%) diff --git a/docs/source/formatting_your_dataset.md b/docs/source/datasets/formatting_your_dataset.md similarity index 98% rename from docs/source/formatting_your_dataset.md rename to docs/source/datasets/formatting_your_dataset.md index 7376ff6644..e92263339e 100644 --- a/docs/source/formatting_your_dataset.md +++ b/docs/source/datasets/formatting_your_dataset.md @@ -1,7 +1,9 @@ (formatting_your_dataset)= # Formatting your dataset -For training a TTS model, you need a dataset with speech recordings and transcriptions. The speech must be divided into audio clips and each clip needs transcription. +For training a TTS model, you need a dataset with speech recordings and +transcriptions. The speech must be divided into audio clips and each clip needs +a transcription. If you have a single audio file and you need to split it into clips, there are different open-source tools for you. We recommend Audacity. It is an open-source and free audio editing software. diff --git a/docs/source/datasets/index.md b/docs/source/datasets/index.md new file mode 100644 index 0000000000..6b040fc416 --- /dev/null +++ b/docs/source/datasets/index.md @@ -0,0 +1,12 @@ +# Datasets + +For training a TTS model, you need a dataset with speech recordings and +transcriptions. See the following pages for more information on: + +```{toctree} +:maxdepth: 1 + +formatting_your_dataset +what_makes_a_good_dataset +tts_datasets +``` diff --git a/docs/source/tts_datasets.md b/docs/source/datasets/tts_datasets.md similarity index 90% rename from docs/source/tts_datasets.md rename to docs/source/datasets/tts_datasets.md index 3a0bcf11d6..df8d2f2ad9 100644 --- a/docs/source/tts_datasets.md +++ b/docs/source/datasets/tts_datasets.md @@ -1,6 +1,6 @@ -# TTS datasets +# Public TTS datasets -Some of the known public datasets that we successfully applied 🐸TTS: +Some of the known public datasets that were successfully used for 🐸TTS: - [English - LJ Speech](https://keithito.com/LJ-Speech-Dataset/) - [English - Nancy](http://www.cstr.ed.ac.uk/projects/blizzard/2011/lessac_blizzard2011/) diff --git a/docs/source/what_makes_a_good_dataset.md b/docs/source/datasets/what_makes_a_good_dataset.md similarity index 100% rename from docs/source/what_makes_a_good_dataset.md rename to docs/source/datasets/what_makes_a_good_dataset.md diff --git a/docs/source/implementing_a_new_language_frontend.md b/docs/source/extension/implementing_a_new_language_frontend.md similarity index 100% rename from docs/source/implementing_a_new_language_frontend.md rename to docs/source/extension/implementing_a_new_language_frontend.md diff --git a/docs/source/implementing_a_new_model.md b/docs/source/extension/implementing_a_new_model.md similarity index 98% rename from docs/source/implementing_a_new_model.md rename to docs/source/extension/implementing_a_new_model.md index a2721a1c4d..2521789771 100644 --- a/docs/source/implementing_a_new_model.md +++ b/docs/source/extension/implementing_a_new_model.md @@ -36,7 +36,8 @@ There is also the `callback` interface by which you can manipulate both the model and the `Trainer` states. Callbacks give you an infinite flexibility to add custom behaviours for your model and training routines. - For more details, see [BaseTTS](main_classes/model_api.md#base-tts-model) and :obj:`TTS.utils.callbacks`. + For more details, see [BaseTTS](../main_classes/model_api.md#base-tts-model) + and `TTS.utils.callbacks`. 6. Optionally, define `MyModelArgs`. diff --git a/docs/source/extension/index.md b/docs/source/extension/index.md new file mode 100644 index 0000000000..39c36b632c --- /dev/null +++ b/docs/source/extension/index.md @@ -0,0 +1,14 @@ +# Adding models or languages + +You can extend Coqui by implementing new model architectures or adding front +ends for new languages. See the pages below for more details. The [project +structure](../project_structure.md) and [contribution +guidelines](../contributing.md) may also be helpful. Please open a pull request +with your changes to share back the improvements with the community. + +```{toctree} +:maxdepth: 1 + +implementing_a_new_model +implementing_a_new_language_frontend +``` diff --git a/docs/source/faq.md b/docs/source/faq.md index e0197cf752..1dd5c1847b 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -7,7 +7,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - If you feel like it's a bug to be fixed, then prefer Github issues with the same level of scrutiny. ## What are the requirements of a good 🐸TTS dataset? -- [See this page](what_makes_a_good_dataset.md) +- [See this page](datasets/what_makes_a_good_dataset.md) ## How should I choose the right model? - First, train Tacotron. It is smaller and faster to experiment with. If it performs poorly, try Tacotron2. @@ -18,7 +18,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is ## How can I train my own `tts` model? 0. Check your dataset with notebooks in [dataset_analysis](https://github.com/idiap/coqui-ai-TTS/tree/main/notebooks/dataset_analysis) folder. Use [this notebook](https://github.com/idiap/coqui-ai-TTS/blob/main/notebooks/dataset_analysis/CheckSpectrograms.ipynb) to find the right audio processing parameters. A better set of parameters results in a better audio synthesis. -1. Write your own dataset `formatter` in `datasets/formatters.py` or format your dataset as one of the supported datasets, like LJSpeech. +1. Write your own dataset `formatter` in `datasets/formatters.py` or [format](datasets/formatting_your_dataset) your dataset as one of the supported datasets, like LJSpeech. A `formatter` parses the metadata file and converts a list of training samples. 2. If you have a dataset with a different alphabet than English, you need to set your own character list in the ```config.json```. diff --git a/docs/source/index.md b/docs/source/index.md index cb835d47e8..ae34771c68 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -4,10 +4,10 @@ ``` ---- -# Documentation Content ```{toctree} :maxdepth: 1 :caption: Get started +:hidden: tutorial_for_nervous_beginners installation @@ -20,22 +20,19 @@ contributing ```{toctree} :maxdepth: 1 :caption: Using Coqui +:hidden: inference -training_a_model -finetuning -implementing_a_new_model -implementing_a_new_language_frontend -formatting_your_dataset -what_makes_a_good_dataset -tts_datasets -marytts +training/index +extension/index +datasets/index ``` ```{toctree} :maxdepth: 1 :caption: Main Classes +:hidden: configuration main_classes/trainer_api @@ -50,6 +47,7 @@ main_classes/speaker_manager ```{toctree} :maxdepth: 1 :caption: TTS Models +:hidden: models/glow_tts.md models/vits.md diff --git a/docs/source/inference.md b/docs/source/inference.md index 4556643cca..ccce84b08b 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -86,8 +86,8 @@ tts --model_name "voice_conversion///" You can boot up a demo 🐸TTS server to run an inference with your models (make sure to install the additional dependencies with `pip install coqui-tts[server]`). -Note that the server is not optimized for performance but gives you an easy way -to interact with the models. +Note that the server is not optimized for performance and does not support all +Coqui models yet. The demo server provides pretty much the same interface as the CLI command. @@ -192,3 +192,8 @@ api.tts_with_vc_to_file( file_path="ouptut.wav" ) ``` + +```{toctree} +:hidden: +marytts +``` diff --git a/docs/source/finetuning.md b/docs/source/training/finetuning.md similarity index 95% rename from docs/source/finetuning.md rename to docs/source/training/finetuning.md index 9c9f2c8d06..1fe54fbcde 100644 --- a/docs/source/finetuning.md +++ b/docs/source/training/finetuning.md @@ -22,7 +22,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways: speech dataset and achieve reasonable results with only a couple of hours of data. However, note that, fine-tuning does not ensure great results. The model - performance still depends on the [dataset quality](what_makes_a_good_dataset.md) + performance still depends on the [dataset quality](../datasets/what_makes_a_good_dataset.md) and the hyper-parameters you choose for fine-tuning. Therefore, it still takes a bit of tinkering. @@ -32,7 +32,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways: 1. Setup your dataset. You need to format your target dataset in a certain way so that 🐸TTS data loader will be able to load it for the - training. Please see [this page](formatting_your_dataset.md) for more information about formatting. + training. Please see [this page](../datasets/formatting_your_dataset.md) for more information about formatting. 2. Choose the model you want to fine-tune. @@ -49,7 +49,7 @@ them and fine-tune it for your own dataset. This will help you in two main ways: You should choose the model based on your requirements. Some models are fast and some are better in speech quality. One lazy way to test a model is running the model on the hardware you want to use and see how it works. For simple testing, you can use the `tts` command on the terminal. For more info - see [here](inference.md). + see [here](../inference.md). 3. Download the model. diff --git a/docs/source/training/index.md b/docs/source/training/index.md new file mode 100644 index 0000000000..bb76a705df --- /dev/null +++ b/docs/source/training/index.md @@ -0,0 +1,10 @@ +# Training and fine-tuning + +The following pages show you how to train and fine-tune Coqui models: + +```{toctree} +:maxdepth: 1 + +training_a_model +finetuning +``` diff --git a/docs/source/training_a_model.md b/docs/source/training/training_a_model.md similarity index 93% rename from docs/source/training_a_model.md rename to docs/source/training/training_a_model.md index 6f612dc02c..22505ccb17 100644 --- a/docs/source/training_a_model.md +++ b/docs/source/training/training_a_model.md @@ -11,11 +11,10 @@ 3. Check the recipes. - Recipes are located under `TTS/recipes/`. They do not promise perfect models but they provide a good start point for - `Nervous Beginners`. + Recipes are located under `TTS/recipes/`. They do not promise perfect models but they provide a good start point. A recipe for `GlowTTS` using `LJSpeech` dataset looks like below. Let's be creative and call this `train_glowtts.py`. - ```{literalinclude} ../../recipes/ljspeech/glow_tts/train_glowtts.py + ```{literalinclude} ../../../recipes/ljspeech/glow_tts/train_glowtts.py ``` You need to change fields of the `BaseDatasetConfig` to match your dataset and then update `GlowTTSConfig` @@ -113,7 +112,7 @@ Note that different models have different metrics, visuals and outputs. - You should also check the [FAQ page](https://github.com/coqui-ai/TTS/wiki/FAQ) for common problems and solutions + You should also check the [FAQ page](../faq.md) for common problems and solutions that occur in a training. 7. Use your best model for inference. @@ -142,5 +141,5 @@ d-vectors. For using d-vectors, you first need to compute the d-vectors using th The same Glow-TTS model above can be trained on a multi-speaker VCTK dataset with the script below. -```{literalinclude} ../../recipes/vctk/glow_tts/train_glow_tts.py +```{literalinclude} ../../../recipes/vctk/glow_tts/train_glow_tts.py ``` diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index 5df56fc6e4..a8a64410c4 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -24,10 +24,14 @@ $ tts-server --list_models # list the available models. ``` ![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) +See [this page](inference.md) for more details on synthesizing speech with the +CLI, server or Python API. ## Training a `tts` Model -A breakdown of a simple script that trains a GlowTTS model on the LJspeech dataset. See the comments for more details. +A breakdown of a simple script that trains a GlowTTS model on the LJspeech +dataset. For a more in-depth guide to training and fine-tuning also see [this +page](training/index.md). ### Pure Python Way From e38dcbea7ad7cac4ca3e4eaeaa6d254d90e6ff35 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 12 Dec 2024 17:34:00 +0100 Subject: [PATCH 248/255] docs: streamline readme and reuse content in other docs pages [ci skip] --- README.md | 232 +++++++++++++++++++----------------- TTS/bin/synthesize.py | 129 ++++++++++---------- docs/source/index.md | 5 +- docs/source/inference.md | 194 ++---------------------------- docs/source/installation.md | 36 +----- docs/source/server.md | 30 +++++ scripts/sync_readme.py | 6 +- 7 files changed, 235 insertions(+), 397 deletions(-) create mode 100644 docs/source/server.md diff --git a/README.md b/README.md index c766b51415..9ccf8657ab 100644 --- a/README.md +++ b/README.md @@ -1,39 +1,34 @@ -# 🐸Coqui TTS -## News -- 📣 Fork of the [original, unmaintained repository](https://github.com/coqui-ai/TTS). New PyPI package: [coqui-tts](https://pypi.org/project/coqui-tts) -- 📣 [OpenVoice](https://github.com/myshell-ai/OpenVoice) models now available for voice conversion. -- 📣 Prebuilt wheels are now also published for Mac and Windows (in addition to Linux as before) for easier installation across platforms. -- 📣 XTTSv2 is here with 17 languages and better performance across the board. XTTS can stream with <200ms latency. -- 📣 XTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). -- 📣 You can use [Fairseq models in ~1100 languages](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. +# -## - -**🐸TTS is a library for advanced Text-to-Speech generation.** +**🐸 Coqui TTS is a library for advanced Text-to-Speech generation.** 🚀 Pretrained models in +1100 languages. 🛠️ Tools for training new models and fine-tuning existing models in any language. 📚 Utilities for dataset analysis and curation. -______________________________________________________________________ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv) -![PyPI - Python Version](https://img.shields.io/pypi/pyversions/coqui-tts) +[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/coqui-tts)](https://pypi.org/project/coqui-tts/) [![License]()](https://opensource.org/licenses/MPL-2.0) -[![PyPI version](https://badge.fury.io/py/coqui-tts.svg)](https://badge.fury.io/py/coqui-tts) +[![PyPI version](https://badge.fury.io/py/coqui-tts.svg)](https://pypi.org/project/coqui-tts/) [![Downloads](https://pepy.tech/badge/coqui-tts)](https://pepy.tech/project/coqui-tts) [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440) - -![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/tests.yml/badge.svg) -![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/docker.yaml/badge.svg) -![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/style_check.yml/badge.svg) +[![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/tests.yml/badge.svg)](https://github.com/idiap/coqui-ai-TTS/actions/workflows/tests.yml) +[![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/docker.yaml/badge.svg)](https://github.com/idiap/coqui-ai-TTS/actions/workflows/docker.yaml) +[![GithubActions](https://github.com/idiap/coqui-ai-TTS/actions/workflows/style_check.yml/badge.svg)](https://github.com/idiap/coqui-ai-TTS/actions/workflows/style_check.yml) [![Docs]()](https://coqui-tts.readthedocs.io/en/latest/)
-______________________________________________________________________ +## 📣 News +- **Fork of the [original, unmaintained repository](https://github.com/coqui-ai/TTS). New PyPI package: [coqui-tts](https://pypi.org/project/coqui-tts)** +- 0.25.0: [OpenVoice](https://github.com/myshell-ai/OpenVoice) models now available for voice conversion. +- 0.24.2: Prebuilt wheels are now also published for Mac and Windows (in addition to Linux as before) for easier installation across platforms. +- 0.20.0: XTTSv2 is here with 17 languages and better performance across the board. XTTS can stream with <200ms latency. +- 0.19.0: XTTS fine-tuning code is out. Check the [example recipes](https://github.com/idiap/coqui-ai-TTS/tree/dev/recipes/ljspeech). +- 0.14.1: You can use [Fairseq models in ~1100 languages](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS. ## 💬 Where to ask questions Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly so that more people can benefit from it. @@ -117,8 +112,10 @@ repository are also still a useful source of information. You can also help us implement more models. + ## Installation -🐸TTS is tested on Ubuntu 24.04 with **python >= 3.9, < 3.13.**, but should also + +🐸TTS is tested on Ubuntu 24.04 with **python >= 3.9, < 3.13**, but should also work on Mac and Windows. If you are only interested in [synthesizing speech](https://coqui-tts.readthedocs.io/en/latest/inference.html) with the pretrained 🐸TTS models, installing from PyPI is the easiest option. @@ -159,13 +156,15 @@ pip install -e .[server,ja] ### Platforms -If you are on Ubuntu (Debian), you can also run following commands for installation. +If you are on Ubuntu (Debian), you can also run the following commands for installation. ```bash -make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS. +make system-deps make install ``` + + ## Docker Image You can also try out Coqui TTS without installation with the docker image. Simply run the following command and you will be able to run TTS: @@ -182,10 +181,10 @@ More details about the docker images (like GPU support) can be found ## Synthesizing speech by 🐸TTS - + ### 🐍 Python API -#### Running a multi-speaker and multi-lingual model +#### Multi-speaker and multi-lingual model ```python import torch @@ -197,47 +196,60 @@ device = "cuda" if torch.cuda.is_available() else "cpu" # List available 🐸TTS models print(TTS().list_models()) -# Init TTS +# Initialize TTS tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) +# List speakers +print(tts.speakers) + # Run TTS -# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language -# Text to speech list of amplitude values as output -wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en") -# Text to speech to a file -tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") +# ❗ XTTS supports both, but many models allow only one of the `speaker` and +# `speaker_wav` arguments + +# TTS with list of amplitude values as output, clone the voice from `speaker_wav` +wav = tts.tts( + text="Hello world!", + speaker_wav="my/cloning/audio.wav", + language="en" +) + +# TTS to a file, use a preset speaker +tts.tts_to_file( + text="Hello world!", + speaker="Craig Gutsy", + language="en", + file_path="output.wav" +) ``` -#### Running a single speaker model +#### Single speaker model ```python -# Init TTS with the target model name -tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False).to(device) +# Initialize TTS with the target model name +tts = TTS("tts_models/de/thorsten/tacotron2-DDC").to(device) # Run TTS tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH) - -# Example voice cloning with YourTTS in English, French and Portuguese -tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device) -tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") -tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr-fr", file_path="output.wav") -tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt-br", file_path="output.wav") ``` -#### Example voice conversion +#### Voice conversion (VC) Converting the voice in `source_wav` to the voice of `target_wav` ```python -tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda") -tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav") +tts = TTS("voice_conversion_models/multilingual/vctk/freevc24").to("cuda") +tts.voice_conversion_to_file( + source_wav="my/source.wav", + target_wav="my/target.wav", + file_path="output.wav" +) ``` Other available voice conversion models: - `voice_conversion_models/multilingual/multi-dataset/openvoice_v1` - `voice_conversion_models/multilingual/multi-dataset/openvoice_v2` -#### Example voice cloning together with the default voice conversion model. +#### Voice cloning by combining single speaker TTS model with the default VC model This way, you can clone voices by using any model in 🐸TTS. The FreeVC model is used for voice conversion after synthesizing speech. @@ -252,7 +264,7 @@ tts.tts_with_vc_to_file( ) ``` -#### Example text to speech using **Fairseq models in ~1100 languages** 🤯. +#### TTS using Fairseq models in ~1100 languages 🤯 For Fairseq models, use the following name format: `tts_models//fairseq/vits`. You can find the language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html) and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms). @@ -266,7 +278,7 @@ api.tts_to_file( ) ``` -### Command-line `tts` +### Command-line interface `tts` @@ -274,120 +286,118 @@ Synthesize speech on the command line. You can either use your trained model or choose a model from the provided list. -If you don't specify any models, then it uses a Tacotron2 English model trained -on LJSpeech. - -#### Single Speaker Models - - List provided models: + ```sh + tts --list_models ``` - $ tts --list_models - ``` - -- Get model info (for both tts_models and vocoder_models): - - - Query by type/name: - The model_info_by_name uses the name as it from the --list_models. - ``` - $ tts --model_info_by_name "///" - ``` - For example: - ``` - $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts - $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 - ``` - - Query by type/idx: - The model_query_idx uses the corresponding idx from --list_models. - - ``` - $ tts --model_info_by_idx "/" - ``` - For example: - - ``` - $ tts --model_info_by_idx tts_models/3 - ``` +- Get model information. Use the names obtained from `--list_models`. + ```sh + tts --model_info_by_name "///" + ``` + For example: + ```sh + tts --model_info_by_name tts_models/tr/common-voice/glow-tts + tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 + ``` - - Query info for model info by full name: - ``` - $ tts --model_info_by_name "///" - ``` +#### Single speaker models -- Run TTS with default models: +- Run TTS with the default model (`tts_models/en/ljspeech/tacotron2-DDC`): - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" --out_path output/path/speech.wav ``` - Run TTS and pipe out the generated TTS wav file data: - ``` - $ tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay + ```sh + tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay ``` - Run a TTS model with its default vocoder model: - ``` - $ tts --text "Text for TTS" --model_name "///" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \ + --model_name "///" \ + --out_path output/path/speech.wav ``` For example: - ``` - $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \ + --model_name "tts_models/en/ljspeech/glow-tts" \ + --out_path output/path/speech.wav ``` -- Run with specific TTS and vocoder models from the list: +- Run with specific TTS and vocoder models from the list. Note that not every vocoder is compatible with every TTS model. - ``` - $ tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \ + --model_name "///" \ + --vocoder_name "///" \ + --out_path output/path/speech.wav ``` For example: - ``` - $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \ + --model_name "tts_models/en/ljspeech/glow-tts" \ + --vocoder_name "vocoder_models/en/ljspeech/univnet" \ + --out_path output/path/speech.wav ``` -- Run your own TTS model (Using Griffin-Lim Vocoder): +- Run your own TTS model (using Griffin-Lim Vocoder): - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \ + --model_path path/to/model.pth \ + --config_path path/to/config.json \ + --out_path output/path/speech.wav ``` - Run your own TTS and Vocoder models: - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav - --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json + ```sh + tts --text "Text for TTS" \ + --model_path path/to/model.pth \ + --config_path path/to/config.json \ + --out_path output/path/speech.wav \ + --vocoder_path path/to/vocoder.pth \ + --vocoder_config_path path/to/vocoder_config.json ``` -#### Multi-speaker Models +#### Multi-speaker models -- List the available speakers and choose a among them: +- List the available speakers and choose a `` among them: - ``` - $ tts --model_name "//" --list_speaker_idxs + ```sh + tts --model_name "//" --list_speaker_idxs ``` - Run the multi-speaker TTS model with the target speaker ID: - ``` - $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "//" --speaker_idx + ```sh + tts --text "Text for TTS." --out_path output/path/speech.wav \ + --model_name "//" --speaker_idx ``` - Run your own multi-speaker TTS model: - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx + ```sh + tts --text "Text for TTS" --out_path output/path/speech.wav \ + --model_path path/to/model.pth --config_path path/to/config.json \ + --speakers_file_path path/to/speaker.json --speaker_idx ``` -### Voice Conversion Models +#### Voice conversion models -``` -$ tts --out_path output/path/speech.wav --model_name "//" --source_wav --target_wav +```sh +tts --out_path output/path/speech.wav --model_name "//" \ + --source_wav --target_wav ``` diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 885f6d6f0c..5fce93b7f4 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -14,123 +14,122 @@ logger = logging.getLogger(__name__) description = """ -Synthesize speech on command line. +Synthesize speech on the command line. You can either use your trained model or choose a model from the provided list. -If you don't specify any models, then it uses LJSpeech based English model. - -#### Single Speaker Models - - List provided models: + ```sh + tts --list_models ``` - $ tts --list_models - ``` - -- Get model info (for both tts_models and vocoder_models): - - - Query by type/name: - The model_info_by_name uses the name as it from the --list_models. - ``` - $ tts --model_info_by_name "///" - ``` - For example: - ``` - $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts - $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 - ``` - - Query by type/idx: - The model_query_idx uses the corresponding idx from --list_models. - ``` - $ tts --model_info_by_idx "/" - ``` - - For example: - - ``` - $ tts --model_info_by_idx tts_models/3 - ``` +- Get model information. Use the names obtained from `--list_models`. + ```sh + tts --model_info_by_name "///" + ``` + For example: + ```sh + tts --model_info_by_name tts_models/tr/common-voice/glow-tts + tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 + ``` - - Query info for model info by full name: - ``` - $ tts --model_info_by_name "///" - ``` +#### Single Speaker Models -- Run TTS with default models: +- Run TTS with the default model (`tts_models/en/ljspeech/tacotron2-DDC`): - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" --out_path output/path/speech.wav ``` - Run TTS and pipe out the generated TTS wav file data: - ``` - $ tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay + ```sh + tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay ``` - Run a TTS model with its default vocoder model: - ``` - $ tts --text "Text for TTS" --model_name "///" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \\ + --model_name "///" \\ + --out_path output/path/speech.wav ``` For example: - ``` - $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \\ + --model_name "tts_models/en/ljspeech/glow-tts" \\ + --out_path output/path/speech.wav ``` -- Run with specific TTS and vocoder models from the list: +- Run with specific TTS and vocoder models from the list. Note that not every vocoder is compatible with every TTS model. - ``` - $ tts --text "Text for TTS" --model_name "///" --vocoder_name "///" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \\ + --model_name "///" \\ + --vocoder_name "///" \\ + --out_path output/path/speech.wav ``` For example: - ``` - $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \\ + --model_name "tts_models/en/ljspeech/glow-tts" \\ + --vocoder_name "vocoder_models/en/ljspeech/univnet" \\ + --out_path output/path/speech.wav ``` -- Run your own TTS model (Using Griffin-Lim Vocoder): +- Run your own TTS model (using Griffin-Lim Vocoder): - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav + ```sh + tts --text "Text for TTS" \\ + --model_path path/to/model.pth \\ + --config_path path/to/config.json \\ + --out_path output/path/speech.wav ``` - Run your own TTS and Vocoder models: - ``` - $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav - --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json + ```sh + tts --text "Text for TTS" \\ + --model_path path/to/model.pth \\ + --config_path path/to/config.json \\ + --out_path output/path/speech.wav \\ + --vocoder_path path/to/vocoder.pth \\ + --vocoder_config_path path/to/vocoder_config.json ``` #### Multi-speaker Models -- List the available speakers and choose a among them: +- List the available speakers and choose a `` among them: - ``` - $ tts --model_name "//" --list_speaker_idxs + ```sh + tts --model_name "//" --list_speaker_idxs ``` - Run the multi-speaker TTS model with the target speaker ID: - ``` - $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "//" --speaker_idx + ```sh + tts --text "Text for TTS." --out_path output/path/speech.wav \\ + --model_name "//" --speaker_idx ``` - Run your own multi-speaker TTS model: - ``` - $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx + ```sh + tts --text "Text for TTS" --out_path output/path/speech.wav \\ + --model_path path/to/model.pth --config_path path/to/config.json \\ + --speakers_file_path path/to/speaker.json --speaker_idx ``` -### Voice Conversion Models +#### Voice Conversion Models -``` -$ tts --out_path output/path/speech.wav --model_name "//" --source_wav --target_wav +```sh +tts --out_path output/path/speech.wav --model_name "//" \\ + --source_wav --target_wav ``` """ diff --git a/docs/source/index.md b/docs/source/index.md index ae34771c68..3a030b4f81 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -1,8 +1,11 @@ +--- +hide-toc: true +--- ```{include} ../../README.md :relative-images: +:end-before: ``` ----- ```{toctree} :maxdepth: 1 diff --git a/docs/source/inference.md b/docs/source/inference.md index ccce84b08b..cb7d01fca3 100644 --- a/docs/source/inference.md +++ b/docs/source/inference.md @@ -1,199 +1,21 @@ (synthesizing_speech)= # Synthesizing speech -First, you need to install TTS. We recommend using PyPi. You need to call the command below: +## Overview -```bash -$ pip install coqui-tts -``` - -After the installation, 2 terminal commands are available. - -1. TTS Command Line Interface (CLI). - `tts` -2. Local Demo Server. - `tts-server` -3. In 🐍Python. - `from TTS.api import TTS` - -## On the Commandline - `tts` -![cli.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/tts_cli.gif) - -After the installation, 🐸TTS provides a CLI interface for synthesizing speech using pre-trained models. You can either use your own model or the release models under 🐸TTS. - -Listing released 🐸TTS models. - -```bash -tts --list_models -``` +Coqui TTS provides three main methods for inference: -Run a TTS model, from the release models list, with its default vocoder. (Simply copy and paste the full model names from the list as arguments for the command below.) +1. 🐍Python API +2. TTS command line interface (CLI) +3. [Local demo server](server.md) -```bash -tts --text "Text for TTS" \ - --model_name "///" \ - --out_path folder/to/save/output.wav +```{include} ../../README.md +:start-after: ``` -Run a tts and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model. - -```bash -tts --text "Text for TTS" \ - --model_name "tts_models///" \ - --vocoder_name "vocoder_models///" \ - --out_path folder/to/save/output.wav -``` - -Run your own TTS model (Using Griffin-Lim Vocoder) - -```bash -tts --text "Text for TTS" \ - --model_path path/to/model.pth \ - --config_path path/to/config.json \ - --out_path folder/to/save/output.wav -``` - -Run your own TTS and Vocoder models - -```bash -tts --text "Text for TTS" \ - --config_path path/to/config.json \ - --model_path path/to/model.pth \ - --out_path folder/to/save/output.wav \ - --vocoder_path path/to/vocoder.pth \ - --vocoder_config_path path/to/vocoder_config.json -``` - -Run a multi-speaker TTS model from the released models list. - -```bash -tts --model_name "tts_models///" --list_speaker_idxs # list the possible speaker IDs. -tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "tts_models///" --speaker_idx "" -``` - -Run a released voice conversion model - -```bash -tts --model_name "voice_conversion///" - --source_wav "my/source/speaker/audio.wav" - --target_wav "my/target/speaker/audio.wav" - --out_path folder/to/save/output.wav -``` - -**Note:** You can use ```./TTS/bin/synthesize.py``` if you prefer running ```tts``` from the TTS project folder. - -## On the Demo Server - `tts-server` - - -![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) - -You can boot up a demo 🐸TTS server to run an inference with your models (make -sure to install the additional dependencies with `pip install coqui-tts[server]`). -Note that the server is not optimized for performance and does not support all -Coqui models yet. - -The demo server provides pretty much the same interface as the CLI command. - -```bash -tts-server -h # see the help -tts-server --list_models # list the available models. -``` - -Run a TTS model, from the release models list, with its default vocoder. -If the model you choose is a multi-speaker TTS model, you can select different speakers on the Web interface and synthesize -speech. - -```bash -tts-server --model_name "///" -``` - -Run a TTS and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model. - -```bash -tts-server --model_name "///" \ - --vocoder_name "///" -``` - -## Python 🐸TTS API - -You can run a multi-speaker and multi-lingual model in Python as - -```python -import torch -from TTS.api import TTS - -# Get device -device = "cuda" if torch.cuda.is_available() else "cpu" - -# List available 🐸TTS models -print(TTS().list_models()) - -# Init TTS -tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device) - -# Run TTS -# ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language -# Text to speech list of amplitude values as output -wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en") -# Text to speech to a file -tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") -``` - -### Single speaker model. - -```python -# Init TTS with the target model name -tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False) -# Run TTS -tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH) -``` - -### Voice cloning with YourTTS in English, French and Portuguese: - -```python -tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to("cuda") -tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav") -tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="output.wav") -tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="output.wav") -``` - -### Voice conversion from the speaker of `source_wav` to the speaker of `target_wav` - -```python -tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda") -tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav") -``` - -### Voice cloning by combining single speaker TTS model with the voice conversion model. - -This way, you can clone voices by using any model in 🐸TTS. - -```python -tts = TTS("tts_models/de/thorsten/tacotron2-DDC") -tts.tts_with_vc_to_file( - "Wie sage ich auf Italienisch, dass ich dich liebe?", - speaker_wav="target/speaker.wav", - file_path="ouptut.wav" -) -``` - -### Text to speech using **Fairseq models in ~1100 languages** 🤯. -For these models use the following name format: `tts_models//fairseq/vits`. - -You can find the list of language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html) and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms). - -```python -from TTS.api import TTS -api = TTS(model_name="tts_models/eng/fairseq/vits").to("cuda") -api.tts_to_file("This is a test.", file_path="output.wav") - -# TTS with on the fly voice conversion -api = TTS("tts_models/deu/fairseq/vits") -api.tts_with_vc_to_file( - "Wie sage ich auf Italienisch, dass ich dich liebe?", - speaker_wav="target/speaker.wav", - file_path="ouptut.wav" -) -``` ```{toctree} :hidden: +server marytts ``` diff --git a/docs/source/installation.md b/docs/source/installation.md index 5becc28b70..1315395a59 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -1,36 +1,6 @@ # Installation -🐸TTS supports python >=3.9 <3.13.0 and was tested on Ubuntu 24.04, but should -also run on Mac and Windows. - -## Using `pip` - -`pip` is recommended if you want to use 🐸TTS only for inference. - -You can install from PyPI as follows: - -```bash -pip install coqui-tts # from PyPI -``` - -Or install from Github: - -```bash -pip install git+https://github.com/idiap/coqui-ai-TTS # from Github -``` - -## Installing From Source - -This is recommended for development and more control over 🐸TTS. - -```bash -git clone https://github.com/idiap/coqui-ai-TTS -cd coqui-ai-TTS -make system-deps # only on Linux systems. - -# Install package and optional extras -make install - -# Same as above + dev dependencies and pre-commit -make install_dev +```{include} ../../README.md +:start-after: +:end-before: ``` diff --git a/docs/source/server.md b/docs/source/server.md new file mode 100644 index 0000000000..3fa211d0d7 --- /dev/null +++ b/docs/source/server.md @@ -0,0 +1,30 @@ +# Demo server + +![server.gif](https://github.com/idiap/coqui-ai-TTS/raw/main/images/demo_server.gif) + +You can boot up a demo 🐸TTS server to run an inference with your models (make +sure to install the additional dependencies with `pip install coqui-tts[server]`). +Note that the server is not optimized for performance and does not support all +Coqui models yet. + +The demo server provides pretty much the same interface as the CLI command. + +```bash +tts-server -h # see the help +tts-server --list_models # list the available models. +``` + +Run a TTS model, from the release models list, with its default vocoder. +If the model you choose is a multi-speaker TTS model, you can select different speakers on the Web interface and synthesize +speech. + +```bash +tts-server --model_name "///" +``` + +Run a TTS and a vocoder model from the released model list. Note that not every vocoder is compatible with every TTS model. + +```bash +tts-server --model_name "///" \ + --vocoder_name "///" +``` diff --git a/scripts/sync_readme.py b/scripts/sync_readme.py index 584286814b..97256bca6d 100644 --- a/scripts/sync_readme.py +++ b/scripts/sync_readme.py @@ -22,8 +22,12 @@ def sync_readme(): new_content = replace_between_markers(orig_content, "tts-readme", description.strip()) if args.check: if orig_content != new_content: - print("README.md is out of sync; please edit TTS/bin/TTS_README.md and run scripts/sync_readme.py") + print( + "README.md is out of sync; please reconcile README.md and TTS/bin/synthesize.py and run scripts/sync_readme.py" + ) exit(42) + print("All good, files in sync") + exit(0) readme_path.write_text(new_content) print("Updated README.md") From a425ba599d93db96338dca86cd1e0e6c9fe34d2d Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 14 Dec 2024 00:28:01 +0100 Subject: [PATCH 249/255] feat: allow both Path and strings where possible and add type hints --- TTS/api.py | 2 +- TTS/config/__init__.py | 5 +- TTS/tts/utils/languages.py | 8 +- TTS/tts/utils/managers.py | 47 ++++--- TTS/tts/utils/speakers.py | 10 +- TTS/utils/audio/numpy_transforms.py | 9 +- TTS/utils/audio/processor.py | 9 +- TTS/utils/generic_utils.py | 7 +- TTS/utils/manage.py | 199 +++++++++++++++------------- TTS/utils/synthesizer.py | 58 ++++---- tests/zoo_tests/test_models.py | 13 +- 11 files changed, 204 insertions(+), 163 deletions(-) diff --git a/TTS/api.py b/TTS/api.py index 83189482cb..7720530823 100644 --- a/TTS/api.py +++ b/TTS/api.py @@ -157,7 +157,7 @@ def list_models() -> list[str]: def download_model_by_name( self, model_name: str, vocoder_name: Optional[str] = None - ) -> tuple[Optional[str], Optional[str], Optional[str]]: + ) -> tuple[Optional[Path], Optional[Path], Optional[Path]]: model_path, config_path, model_item = self.manager.download_model(model_name) if "fairseq" in model_name or (model_item is not None and isinstance(model_item["model_url"], list)): # return model directory if there are multiple files diff --git a/TTS/config/__init__.py b/TTS/config/__init__.py index 5103f200b0..e5f40c0296 100644 --- a/TTS/config/__init__.py +++ b/TTS/config/__init__.py @@ -1,7 +1,7 @@ import json import os import re -from typing import Dict +from typing import Any, Dict, Union import fsspec import yaml @@ -68,7 +68,7 @@ def _process_model_name(config_dict: Dict) -> str: return model_name -def load_config(config_path: str) -> Coqpit: +def load_config(config_path: Union[str, os.PathLike[Any]]) -> Coqpit: """Import `json` or `yaml` files as TTS configs. First, load the input file as a `dict` and check the model name to find the corresponding Config class. Then initialize the Config. @@ -81,6 +81,7 @@ def load_config(config_path: str) -> Coqpit: Returns: Coqpit: TTS config object. """ + config_path = str(config_path) config_dict = {} ext = os.path.splitext(config_path)[1] if ext in (".yml", ".yaml"): diff --git a/TTS/tts/utils/languages.py b/TTS/tts/utils/languages.py index f134daf58e..c72de2d4e6 100644 --- a/TTS/tts/utils/languages.py +++ b/TTS/tts/utils/languages.py @@ -1,5 +1,5 @@ import os -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union import fsspec import numpy as np @@ -27,8 +27,8 @@ class LanguageManager(BaseIDManager): def __init__( self, - language_ids_file_path: str = "", - config: Coqpit = None, + language_ids_file_path: Union[str, os.PathLike[Any]] = "", + config: Optional[Coqpit] = None, ): super().__init__(id_file_path=language_ids_file_path) @@ -76,7 +76,7 @@ def parse_ids_from_data(items: List, parse_key: str) -> Any: def set_ids_from_data(self, items: List, parse_key: str) -> Any: raise NotImplementedError - def save_ids_to_file(self, file_path: str) -> None: + def save_ids_to_file(self, file_path: Union[str, os.PathLike[Any]]) -> None: """Save language IDs to a json file. Args: diff --git a/TTS/tts/utils/managers.py b/TTS/tts/utils/managers.py index 6a2f7df67b..3a715dd75d 100644 --- a/TTS/tts/utils/managers.py +++ b/TTS/tts/utils/managers.py @@ -1,4 +1,5 @@ import json +import os import random from typing import Any, Dict, List, Tuple, Union @@ -12,7 +13,8 @@ from TTS.utils.generic_utils import is_pytorch_at_least_2_4 -def load_file(path: str): +def load_file(path: Union[str, os.PathLike[Any]]): + path = str(path) if path.endswith(".json"): with fsspec.open(path, "r") as f: return json.load(f) @@ -23,7 +25,8 @@ def load_file(path: str): raise ValueError("Unsupported file type") -def save_file(obj: Any, path: str): +def save_file(obj: Any, path: Union[str, os.PathLike[Any]]): + path = str(path) if path.endswith(".json"): with fsspec.open(path, "w") as f: json.dump(obj, f, indent=4) @@ -39,20 +42,20 @@ class BaseIDManager: It defines common `ID` manager specific functions. """ - def __init__(self, id_file_path: str = ""): + def __init__(self, id_file_path: Union[str, os.PathLike[Any]] = ""): self.name_to_id = {} if id_file_path: self.load_ids_from_file(id_file_path) @staticmethod - def _load_json(json_file_path: str) -> Dict: - with fsspec.open(json_file_path, "r") as f: + def _load_json(json_file_path: Union[str, os.PathLike[Any]]) -> Dict: + with fsspec.open(str(json_file_path), "r") as f: return json.load(f) @staticmethod - def _save_json(json_file_path: str, data: dict) -> None: - with fsspec.open(json_file_path, "w") as f: + def _save_json(json_file_path: Union[str, os.PathLike[Any]], data: dict) -> None: + with fsspec.open(str(json_file_path), "w") as f: json.dump(data, f, indent=4) def set_ids_from_data(self, items: List, parse_key: str) -> None: @@ -63,7 +66,7 @@ def set_ids_from_data(self, items: List, parse_key: str) -> None: """ self.name_to_id = self.parse_ids_from_data(items, parse_key=parse_key) - def load_ids_from_file(self, file_path: str) -> None: + def load_ids_from_file(self, file_path: Union[str, os.PathLike[Any]]) -> None: """Set IDs from a file. Args: @@ -71,7 +74,7 @@ def load_ids_from_file(self, file_path: str) -> None: """ self.name_to_id = load_file(file_path) - def save_ids_to_file(self, file_path: str) -> None: + def save_ids_to_file(self, file_path: Union[str, os.PathLike[Any]]) -> None: """Save IDs to a json file. Args: @@ -130,10 +133,10 @@ class EmbeddingManager(BaseIDManager): def __init__( self, - embedding_file_path: Union[str, List[str]] = "", - id_file_path: str = "", - encoder_model_path: str = "", - encoder_config_path: str = "", + embedding_file_path: Union[Union[str, os.PathLike[Any]], list[Union[str, os.PathLike[Any]]]] = "", + id_file_path: Union[str, os.PathLike[Any]] = "", + encoder_model_path: Union[str, os.PathLike[Any]] = "", + encoder_config_path: Union[str, os.PathLike[Any]] = "", use_cuda: bool = False, ): super().__init__(id_file_path=id_file_path) @@ -176,7 +179,7 @@ def embedding_names(self): """Get embedding names.""" return list(self.embeddings_by_names.keys()) - def save_embeddings_to_file(self, file_path: str) -> None: + def save_embeddings_to_file(self, file_path: Union[str, os.PathLike[Any]]) -> None: """Save embeddings to a json file. Args: @@ -185,7 +188,7 @@ def save_embeddings_to_file(self, file_path: str) -> None: save_file(self.embeddings, file_path) @staticmethod - def read_embeddings_from_file(file_path: str): + def read_embeddings_from_file(file_path: Union[str, os.PathLike[Any]]): """Load embeddings from a json file. Args: @@ -204,7 +207,7 @@ def read_embeddings_from_file(file_path: str): embeddings_by_names[x["name"]].append(x["embedding"]) return name_to_id, clip_ids, embeddings, embeddings_by_names - def load_embeddings_from_file(self, file_path: str) -> None: + def load_embeddings_from_file(self, file_path: Union[str, os.PathLike[Any]]) -> None: """Load embeddings from a json file. Args: @@ -214,7 +217,7 @@ def load_embeddings_from_file(self, file_path: str) -> None: file_path ) - def load_embeddings_from_list_of_files(self, file_paths: List[str]) -> None: + def load_embeddings_from_list_of_files(self, file_paths: list[Union[str, os.PathLike[Any]]]) -> None: """Load embeddings from a list of json files and don't allow duplicate keys. Args: @@ -313,7 +316,9 @@ def get_random_embedding(self) -> Any: def get_clips(self) -> List: return sorted(self.embeddings.keys()) - def init_encoder(self, model_path: str, config_path: str, use_cuda=False) -> None: + def init_encoder( + self, model_path: Union[str, os.PathLike[Any]], config_path: Union[str, os.PathLike[Any]], use_cuda=False + ) -> None: """Initialize a speaker encoder model. Args: @@ -325,11 +330,13 @@ def init_encoder(self, model_path: str, config_path: str, use_cuda=False) -> Non self.encoder_config = load_config(config_path) self.encoder = setup_encoder_model(self.encoder_config) self.encoder_criterion = self.encoder.load_checkpoint( - self.encoder_config, model_path, eval=True, use_cuda=use_cuda, cache=True + self.encoder_config, str(model_path), eval=True, use_cuda=use_cuda, cache=True ) self.encoder_ap = AudioProcessor(**self.encoder_config.audio) - def compute_embedding_from_clip(self, wav_file: Union[str, List[str]]) -> list: + def compute_embedding_from_clip( + self, wav_file: Union[Union[str, os.PathLike[Any]], List[Union[str, os.PathLike[Any]]]] + ) -> list: """Compute a embedding from a given audio file. Args: diff --git a/TTS/tts/utils/speakers.py b/TTS/tts/utils/speakers.py index 5229af81c5..89c56583f5 100644 --- a/TTS/tts/utils/speakers.py +++ b/TTS/tts/utils/speakers.py @@ -1,7 +1,7 @@ import json import logging import os -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Optional, Union import fsspec import numpy as np @@ -56,11 +56,11 @@ class SpeakerManager(EmbeddingManager): def __init__( self, - data_items: List[List[Any]] = None, + data_items: Optional[list[list[Any]]] = None, d_vectors_file_path: str = "", - speaker_id_file_path: str = "", - encoder_model_path: str = "", - encoder_config_path: str = "", + speaker_id_file_path: Union[str, os.PathLike[Any]] = "", + encoder_model_path: Union[str, os.PathLike[Any]] = "", + encoder_config_path: Union[str, os.PathLike[Any]] = "", use_cuda: bool = False, ): super().__init__( diff --git a/TTS/utils/audio/numpy_transforms.py b/TTS/utils/audio/numpy_transforms.py index 9c83009b0f..0cba7fc8a8 100644 --- a/TTS/utils/audio/numpy_transforms.py +++ b/TTS/utils/audio/numpy_transforms.py @@ -1,6 +1,7 @@ import logging +import os from io import BytesIO -from typing import Optional +from typing import Any, Optional, Union import librosa import numpy as np @@ -406,7 +407,9 @@ def rms_volume_norm(*, x: np.ndarray, db_level: float = -27.0, **kwargs) -> np.n return rms_norm(wav=x, db_level=db_level) -def load_wav(*, filename: str, sample_rate: Optional[int] = None, resample: bool = False, **kwargs) -> np.ndarray: +def load_wav( + *, filename: Union[str, os.PathLike[Any]], sample_rate: Optional[int] = None, resample: bool = False, **kwargs +) -> np.ndarray: """Read a wav file using Librosa and optionally resample, silence trim, volume normalize. Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before. @@ -434,7 +437,7 @@ def load_wav(*, filename: str, sample_rate: Optional[int] = None, resample: bool def save_wav( *, wav: np.ndarray, - path: str, + path: Union[str, os.PathLike[Any]], sample_rate: int, pipe_out=None, do_rms_norm: bool = False, diff --git a/TTS/utils/audio/processor.py b/TTS/utils/audio/processor.py index 1d8fed8e39..bf07333aea 100644 --- a/TTS/utils/audio/processor.py +++ b/TTS/utils/audio/processor.py @@ -1,5 +1,6 @@ import logging -from typing import Optional +import os +from typing import Any, Optional, Union import librosa import numpy as np @@ -548,7 +549,7 @@ def sound_norm(x: np.ndarray) -> np.ndarray: return volume_norm(x=x) ### save and load ### - def load_wav(self, filename: str, sr: Optional[int] = None) -> np.ndarray: + def load_wav(self, filename: Union[str, os.PathLike[Any]], sr: Optional[int] = None) -> np.ndarray: """Read a wav file using Librosa and optionally resample, silence trim, volume normalize. Resampling slows down loading the file significantly. Therefore it is recommended to resample the file before. @@ -575,7 +576,9 @@ def load_wav(self, filename: str, sr: Optional[int] = None) -> np.ndarray: x = rms_volume_norm(x=x, db_level=self.db_level) return x - def save_wav(self, wav: np.ndarray, path: str, sr: Optional[int] = None, pipe_out=None) -> None: + def save_wav( + self, wav: np.ndarray, path: Union[str, os.PathLike[Any]], sr: Optional[int] = None, pipe_out=None + ) -> None: """Save a waveform to a file using Scipy. Args: diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index 087ae7d0e1..d7397f673d 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -4,7 +4,7 @@ import logging import re from pathlib import Path -from typing import Callable, Dict, Optional, TypeVar, Union +from typing import Any, Callable, Dict, Optional, TypeVar, Union import torch from packaging.version import Version @@ -133,3 +133,8 @@ def setup_logger( def is_pytorch_at_least_2_4() -> bool: """Check if the installed Pytorch version is 2.4 or higher.""" return Version(torch.__version__) >= Version("2.4") + + +def optional_to_str(x: Optional[Any]) -> str: + """Convert input to string, using empty string if input is None.""" + return "" if x is None else str(x) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index 38fcfd60e9..b33243ffa9 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -6,17 +6,35 @@ import zipfile from pathlib import Path from shutil import copyfile, rmtree -from typing import Dict, Tuple +from typing import Any, Optional, TypedDict, Union import fsspec import requests from tqdm import tqdm from trainer.io import get_user_data_dir +from typing_extensions import Required from TTS.config import load_config, read_json_with_comments logger = logging.getLogger(__name__) + +class ModelItem(TypedDict, total=False): + model_name: Required[str] + model_type: Required[str] + description: str + license: str + author: str + contact: str + commit: Optional[str] + model_hash: str + tos_required: bool + default_vocoder: Optional[str] + model_url: Union[str, list[str]] + github_rls_url: Union[str, list[str]] + hf_url: list[str] + + LICENSE_URLS = { "cc by-nc-nd 4.0": "https://creativecommons.org/licenses/by-nc-nd/4.0/", "mpl": "https://www.mozilla.org/en-US/MPL/2.0/", @@ -40,19 +58,24 @@ class ModelManager(object): home path. Args: - models_file (str): path to .model.json file. Defaults to None. - output_prefix (str): prefix to `tts` to download models. Defaults to None + models_file (str or Path): path to .model.json file. Defaults to None. + output_prefix (str or Path): prefix to `tts` to download models. Defaults to None progress_bar (bool): print a progress bar when donwloading a file. Defaults to False. """ - def __init__(self, models_file=None, output_prefix=None, progress_bar=False): + def __init__( + self, + models_file: Optional[Union[str, os.PathLike[Any]]] = None, + output_prefix: Optional[Union[str, os.PathLike[Any]]] = None, + progress_bar: bool = False, + ) -> None: super().__init__() self.progress_bar = progress_bar if output_prefix is None: self.output_prefix = get_user_data_dir("tts") else: - self.output_prefix = os.path.join(output_prefix, "tts") - self.models_dict = None + self.output_prefix = Path(output_prefix) / "tts" + self.models_dict = {} if models_file is not None: self.read_models_file(models_file) else: @@ -60,7 +83,7 @@ def __init__(self, models_file=None, output_prefix=None, progress_bar=False): path = Path(__file__).parent / "../.models.json" self.read_models_file(path) - def read_models_file(self, file_path): + def read_models_file(self, file_path: Union[str, os.PathLike[Any]]) -> None: """Read .models.json as a dict Args: @@ -68,7 +91,7 @@ def read_models_file(self, file_path): """ self.models_dict = read_json_with_comments(file_path) - def _list_models(self, model_type, model_count=0): + def _list_models(self, model_type: str, model_count: int = 0) -> list[str]: logger.info("") logger.info("Name format: type/language/dataset/model") model_list = [] @@ -83,13 +106,13 @@ def _list_models(self, model_type, model_count=0): model_count += 1 return model_list - def _list_for_model_type(self, model_type): + def _list_for_model_type(self, model_type: str) -> list[str]: models_name_list = [] model_count = 1 models_name_list.extend(self._list_models(model_type, model_count)) return models_name_list - def list_models(self): + def list_models(self) -> list[str]: models_name_list = [] model_count = 1 for model_type in self.models_dict: @@ -97,7 +120,7 @@ def list_models(self): models_name_list.extend(model_list) return models_name_list - def log_model_details(self, model_type, lang, dataset, model): + def log_model_details(self, model_type: str, lang: str, dataset: str, model: str) -> None: logger.info("Model type: %s", model_type) logger.info("Language supported: %s", lang) logger.info("Dataset used: %s", dataset) @@ -112,7 +135,7 @@ def log_model_details(self, model_type, lang, dataset, model): self.models_dict[model_type][lang][dataset][model]["default_vocoder"], ) - def model_info_by_idx(self, model_query): + def model_info_by_idx(self, model_query: str) -> None: """Print the description of the model from .models.json file using model_query_idx Args: @@ -144,7 +167,7 @@ def model_info_by_idx(self, model_query): model_type, lang, dataset, model = model_name_list[model_query_idx - 1].split("/") self.log_model_details(model_type, lang, dataset, model) - def model_info_by_full_name(self, model_query_name): + def model_info_by_full_name(self, model_query_name: str) -> None: """Print the description of the model from .models.json file using model_full_name Args: @@ -165,35 +188,35 @@ def model_info_by_full_name(self, model_query_name): return self.log_model_details(model_type, lang, dataset, model) - def list_tts_models(self): + def list_tts_models(self) -> list[str]: """Print all `TTS` models and return a list of model names Format is `language/dataset/model` """ return self._list_for_model_type("tts_models") - def list_vocoder_models(self): + def list_vocoder_models(self) -> list[str]: """Print all the `vocoder` models and return a list of model names Format is `language/dataset/model` """ return self._list_for_model_type("vocoder_models") - def list_vc_models(self): + def list_vc_models(self) -> list[str]: """Print all the voice conversion models and return a list of model names Format is `language/dataset/model` """ return self._list_for_model_type("voice_conversion_models") - def list_langs(self): + def list_langs(self) -> None: """Print all the available languages""" logger.info("Name format: type/language") for model_type in self.models_dict: for lang in self.models_dict[model_type]: logger.info(" %s/%s", model_type, lang) - def list_datasets(self): + def list_datasets(self) -> None: """Print all the datasets""" logger.info("Name format: type/language/dataset") for model_type in self.models_dict: @@ -202,7 +225,7 @@ def list_datasets(self): logger.info(" %s/%s/%s", model_type, lang, dataset) @staticmethod - def print_model_license(model_item: Dict): + def print_model_license(model_item: ModelItem) -> None: """Print the license of a model Args: @@ -217,27 +240,27 @@ def print_model_license(model_item: Dict): else: logger.info("Model's license - No license information available") - def _download_github_model(self, model_item: Dict, output_path: str): + def _download_github_model(self, model_item: ModelItem, output_path: Path) -> None: if isinstance(model_item["github_rls_url"], list): self._download_model_files(model_item["github_rls_url"], output_path, self.progress_bar) else: self._download_zip_file(model_item["github_rls_url"], output_path, self.progress_bar) - def _download_hf_model(self, model_item: Dict, output_path: str): + def _download_hf_model(self, model_item: ModelItem, output_path: Path) -> None: if isinstance(model_item["hf_url"], list): self._download_model_files(model_item["hf_url"], output_path, self.progress_bar) else: self._download_zip_file(model_item["hf_url"], output_path, self.progress_bar) - def download_fairseq_model(self, model_name, output_path): + def download_fairseq_model(self, model_name: str, output_path: Path) -> None: URI_PREFIX = "https://dl.fbaipublicfiles.com/mms/tts/" _, lang, _, _ = model_name.split("/") model_download_uri = os.path.join(URI_PREFIX, f"{lang}.tar.gz") self._download_tar_file(model_download_uri, output_path, self.progress_bar) @staticmethod - def set_model_url(model_item: Dict): - model_item["model_url"] = None + def set_model_url(model_item: ModelItem) -> ModelItem: + model_item["model_url"] = "" if "github_rls_url" in model_item: model_item["model_url"] = model_item["github_rls_url"] elif "hf_url" in model_item: @@ -248,18 +271,18 @@ def set_model_url(model_item: Dict): model_item["model_url"] = "https://huggingface.co/coqui/" return model_item - def _set_model_item(self, model_name): + def _set_model_item(self, model_name: str) -> tuple[ModelItem, str, str, Optional[str]]: # fetch model info from the dict if "fairseq" in model_name: model_type, lang, dataset, model = model_name.split("/") - model_item = { + model_item: ModelItem = { + "model_name": model_name, "model_type": "tts_models", "license": "CC BY-NC 4.0", "default_vocoder": None, "author": "fairseq", "description": "this model is released by Meta under Fairseq repo. Visit https://github.com/facebookresearch/fairseq/tree/main/examples/mms for more info.", } - model_item["model_name"] = model_name elif "xtts" in model_name and len(model_name.split("/")) != 4: # loading xtts models with only model name (e.g. xtts_v2.0.2) # check model name has the version number with regex @@ -273,6 +296,8 @@ def _set_model_item(self, model_name): dataset = "multi-dataset" model = model_name model_item = { + "model_name": model_name, + "model_type": model_type, "default_vocoder": None, "license": "CPML", "contact": "info@coqui.ai", @@ -297,9 +322,9 @@ def _set_model_item(self, model_name): return model_item, model_full_name, model, md5hash @staticmethod - def ask_tos(model_full_path): + def ask_tos(model_full_path: Path) -> bool: """Ask the user to agree to the terms of service""" - tos_path = os.path.join(model_full_path, "tos_agreed.txt") + tos_path = model_full_path / "tos_agreed.txt" print(" > You must confirm the following:") print(' | > "I have purchased a commercial license from Coqui: licensing@coqui.ai"') print(' | > "Otherwise, I agree to the terms of the non-commercial CPML: https://coqui.ai/cpml" - [y/n]') @@ -311,7 +336,7 @@ def ask_tos(model_full_path): return False @staticmethod - def tos_agreed(model_item, model_full_path): + def tos_agreed(model_item: ModelItem, model_full_path: Path) -> bool: """Check if the user has agreed to the terms of service""" if "tos_required" in model_item and model_item["tos_required"]: tos_path = os.path.join(model_full_path, "tos_agreed.txt") @@ -320,12 +345,12 @@ def tos_agreed(model_item, model_full_path): return False return True - def create_dir_and_download_model(self, model_name, model_item, output_path): - os.makedirs(output_path, exist_ok=True) + def create_dir_and_download_model(self, model_name: str, model_item: ModelItem, output_path: Path) -> None: + output_path.mkdir(exist_ok=True, parents=True) # handle TOS if not self.tos_agreed(model_item, output_path): if not self.ask_tos(output_path): - os.rmdir(output_path) + output_path.rmdir() raise Exception(" [!] You must agree to the terms of service to use this model.") logger.info("Downloading model to %s", output_path) try: @@ -342,7 +367,7 @@ def create_dir_and_download_model(self, model_name, model_item, output_path): raise e self.print_model_license(model_item=model_item) - def check_if_configs_are_equal(self, model_name, model_item, output_path): + def check_if_configs_are_equal(self, model_name: str, model_item: ModelItem, output_path: Path) -> None: with fsspec.open(self._find_files(output_path)[1], "r", encoding="utf-8") as f: config_local = json.load(f) remote_url = None @@ -358,7 +383,7 @@ def check_if_configs_are_equal(self, model_name, model_item, output_path): logger.info("%s is already downloaded however it has been changed. Redownloading it...", model_name) self.create_dir_and_download_model(model_name, model_item, output_path) - def download_model(self, model_name): + def download_model(self, model_name: str) -> tuple[Path, Optional[Path], ModelItem]: """Download model files given the full model name. Model name is in the format 'type/language/dataset/model' @@ -374,12 +399,12 @@ def download_model(self, model_name): """ model_item, model_full_name, model, md5sum = self._set_model_item(model_name) # set the model specific output path - output_path = os.path.join(self.output_prefix, model_full_name) - if os.path.exists(output_path): + output_path = Path(self.output_prefix) / model_full_name + if output_path.is_dir(): if md5sum is not None: - md5sum_file = os.path.join(output_path, "hash.md5") - if os.path.isfile(md5sum_file): - with open(md5sum_file, mode="r") as f: + md5sum_file = output_path / "hash.md5" + if md5sum_file.is_file(): + with md5sum_file.open() as f: if not f.read() == md5sum: logger.info("%s has been updated, clearing model cache...", model_name) self.create_dir_and_download_model(model_name, model_item, output_path) @@ -407,12 +432,14 @@ def download_model(self, model_name): model not in ["tortoise-v2", "bark"] and "fairseq" not in model_name and "xtts" not in model_name ): # TODO:This is stupid but don't care for now. output_model_path, output_config_path = self._find_files(output_path) + else: + output_config_path = output_model_path / "config.json" # update paths in the config.json self._update_paths(output_path, output_config_path) return output_model_path, output_config_path, model_item @staticmethod - def _find_files(output_path: str) -> Tuple[str, str]: + def _find_files(output_path: Path) -> tuple[Path, Path]: """Find the model and config files in the output path Args: @@ -423,11 +450,11 @@ def _find_files(output_path: str) -> Tuple[str, str]: """ model_file = None config_file = None - for file_name in os.listdir(output_path): - if file_name in ["model_file.pth", "model_file.pth.tar", "model.pth", "checkpoint.pth"]: - model_file = os.path.join(output_path, file_name) - elif file_name == "config.json": - config_file = os.path.join(output_path, file_name) + for f in output_path.iterdir(): + if f.name in ["model_file.pth", "model_file.pth.tar", "model.pth", "checkpoint.pth"]: + model_file = f + elif f.name == "config.json": + config_file = f if model_file is None: raise ValueError(" [!] Model file not found in the output path") if config_file is None: @@ -435,7 +462,7 @@ def _find_files(output_path: str) -> Tuple[str, str]: return model_file, config_file @staticmethod - def _find_speaker_encoder(output_path: str) -> str: + def _find_speaker_encoder(output_path: Path) -> Optional[Path]: """Find the speaker encoder file in the output path Args: @@ -445,24 +472,24 @@ def _find_speaker_encoder(output_path: str) -> str: str: path to the speaker encoder file """ speaker_encoder_file = None - for file_name in os.listdir(output_path): - if file_name in ["model_se.pth", "model_se.pth.tar"]: - speaker_encoder_file = os.path.join(output_path, file_name) + for f in output_path.iterdir(): + if f.name in ["model_se.pth", "model_se.pth.tar"]: + speaker_encoder_file = f return speaker_encoder_file - def _update_paths(self, output_path: str, config_path: str) -> None: + def _update_paths(self, output_path: Path, config_path: Path) -> None: """Update paths for certain files in config.json after download. Args: output_path (str): local path the model is downloaded to. config_path (str): local config.json path. """ - output_stats_path = os.path.join(output_path, "scale_stats.npy") - output_d_vector_file_path = os.path.join(output_path, "speakers.json") - output_d_vector_file_pth_path = os.path.join(output_path, "speakers.pth") - output_speaker_ids_file_path = os.path.join(output_path, "speaker_ids.json") - output_speaker_ids_file_pth_path = os.path.join(output_path, "speaker_ids.pth") - speaker_encoder_config_path = os.path.join(output_path, "config_se.json") + output_stats_path = output_path / "scale_stats.npy" + output_d_vector_file_path = output_path / "speakers.json" + output_d_vector_file_pth_path = output_path / "speakers.pth" + output_speaker_ids_file_path = output_path / "speaker_ids.json" + output_speaker_ids_file_pth_path = output_path / "speaker_ids.pth" + speaker_encoder_config_path = output_path / "config_se.json" speaker_encoder_model_path = self._find_speaker_encoder(output_path) # update the scale_path.npy file path in the model config.json @@ -487,10 +514,10 @@ def _update_paths(self, output_path: str, config_path: str) -> None: self._update_path("model_args.speaker_encoder_config_path", speaker_encoder_config_path, config_path) @staticmethod - def _update_path(field_name, new_path, config_path): + def _update_path(field_name: str, new_path: Optional[Path], config_path: Path) -> None: """Update the path in the model config.json for the current environment after download""" - if new_path and os.path.exists(new_path): - config = load_config(config_path) + if new_path is not None and new_path.is_file(): + config = load_config(str(config_path)) field_names = field_name.split(".") if len(field_names) > 1: # field name points to a sub-level field @@ -515,7 +542,7 @@ def _update_path(field_name, new_path, config_path): config.save_json(config_path) @staticmethod - def _download_zip_file(file_url, output_folder, progress_bar): + def _download_zip_file(file_url: str, output_folder: Path, progress_bar: bool) -> None: """Download the github releases""" # download the file r = requests.get(file_url, stream=True) @@ -525,7 +552,7 @@ def _download_zip_file(file_url, output_folder, progress_bar): block_size = 1024 # 1 Kibibyte if progress_bar: ModelManager.tqdm_progress = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) - temp_zip_name = os.path.join(output_folder, file_url.split("/")[-1]) + temp_zip_name = output_folder / file_url.split("/")[-1] with open(temp_zip_name, "wb") as file: for data in r.iter_content(block_size): if progress_bar: @@ -533,24 +560,24 @@ def _download_zip_file(file_url, output_folder, progress_bar): file.write(data) with zipfile.ZipFile(temp_zip_name) as z: z.extractall(output_folder) - os.remove(temp_zip_name) # delete zip after extract + temp_zip_name.unlink() # delete zip after extract except zipfile.BadZipFile: logger.exception("Bad zip file - %s", file_url) raise zipfile.BadZipFile # pylint: disable=raise-missing-from # move the files to the outer path for file_path in z.namelist(): - src_path = os.path.join(output_folder, file_path) - if os.path.isfile(src_path): - dst_path = os.path.join(output_folder, os.path.basename(file_path)) + src_path = output_folder / file_path + if src_path.is_file(): + dst_path = output_folder / os.path.basename(file_path) if src_path != dst_path: copyfile(src_path, dst_path) # remove redundant (hidden or not) folders for file_path in z.namelist(): - if os.path.isdir(os.path.join(output_folder, file_path)): - rmtree(os.path.join(output_folder, file_path)) + if (output_folder / file_path).is_dir(): + rmtree(output_folder / file_path) @staticmethod - def _download_tar_file(file_url, output_folder, progress_bar): + def _download_tar_file(file_url: str, output_folder: Path, progress_bar: bool) -> None: """Download the github releases""" # download the file r = requests.get(file_url, stream=True) @@ -560,7 +587,7 @@ def _download_tar_file(file_url, output_folder, progress_bar): block_size = 1024 # 1 Kibibyte if progress_bar: ModelManager.tqdm_progress = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) - temp_tar_name = os.path.join(output_folder, file_url.split("/")[-1]) + temp_tar_name = output_folder / file_url.split("/")[-1] with open(temp_tar_name, "wb") as file: for data in r.iter_content(block_size): if progress_bar: @@ -569,43 +596,37 @@ def _download_tar_file(file_url, output_folder, progress_bar): with tarfile.open(temp_tar_name) as t: t.extractall(output_folder) tar_names = t.getnames() - os.remove(temp_tar_name) # delete tar after extract + temp_tar_name.unlink() # delete tar after extract except tarfile.ReadError: logger.exception("Bad tar file - %s", file_url) raise tarfile.ReadError # pylint: disable=raise-missing-from # move the files to the outer path - for file_path in os.listdir(os.path.join(output_folder, tar_names[0])): - src_path = os.path.join(output_folder, tar_names[0], file_path) - dst_path = os.path.join(output_folder, os.path.basename(file_path)) + for file_path in (output_folder / tar_names[0]).iterdir(): + src_path = file_path + dst_path = output_folder / file_path.name if src_path != dst_path: copyfile(src_path, dst_path) # remove the extracted folder - rmtree(os.path.join(output_folder, tar_names[0])) + rmtree(output_folder / tar_names[0]) @staticmethod - def _download_model_files(file_urls, output_folder, progress_bar): + def _download_model_files( + file_urls: list[str], output_folder: Union[str, os.PathLike[Any]], progress_bar: bool + ) -> None: """Download the github releases""" + output_folder = Path(output_folder) for file_url in file_urls: # download the file r = requests.get(file_url, stream=True) # extract the file - bease_filename = file_url.split("/")[-1] - temp_zip_name = os.path.join(output_folder, bease_filename) + base_filename = file_url.split("/")[-1] + file_path = output_folder / base_filename total_size_in_bytes = int(r.headers.get("content-length", 0)) block_size = 1024 # 1 Kibibyte - with open(temp_zip_name, "wb") as file: + with open(file_path, "wb") as f: if progress_bar: ModelManager.tqdm_progress = tqdm(total=total_size_in_bytes, unit="iB", unit_scale=True) for data in r.iter_content(block_size): if progress_bar: ModelManager.tqdm_progress.update(len(data)) - file.write(data) - - @staticmethod - def _check_dict_key(my_dict, key): - if key in my_dict.keys() and my_dict[key] is not None: - if not isinstance(key, str): - return True - if isinstance(key, str) and len(my_dict[key]) > 0: - return True - return False + f.write(data) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index a9b9feffc1..52f5a86de5 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -2,7 +2,7 @@ import os import time from pathlib import Path -from typing import List +from typing import Any, List, Optional, Union import numpy as np import pysbd @@ -16,6 +16,7 @@ from TTS.tts.utils.synthesis import synthesis, transfer_voice, trim_silence from TTS.utils.audio import AudioProcessor from TTS.utils.audio.numpy_transforms import save_wav +from TTS.utils.generic_utils import optional_to_str from TTS.vc.configs.openvoice_config import OpenVoiceConfig from TTS.vc.models import setup_model as setup_vc_model from TTS.vc.models.openvoice import OpenVoice @@ -29,18 +30,18 @@ class Synthesizer(nn.Module): def __init__( self, *, - tts_checkpoint: str = "", - tts_config_path: str = "", - tts_speakers_file: str = "", - tts_languages_file: str = "", - vocoder_checkpoint: str = "", - vocoder_config: str = "", - encoder_checkpoint: str = "", - encoder_config: str = "", - vc_checkpoint: str = "", - vc_config: str = "", - model_dir: str = "", - voice_dir: str = None, + tts_checkpoint: Optional[Union[str, os.PathLike[Any]]] = None, + tts_config_path: Optional[Union[str, os.PathLike[Any]]] = None, + tts_speakers_file: Optional[Union[str, os.PathLike[Any]]] = None, + tts_languages_file: Optional[Union[str, os.PathLike[Any]]] = None, + vocoder_checkpoint: Optional[Union[str, os.PathLike[Any]]] = None, + vocoder_config: Optional[Union[str, os.PathLike[Any]]] = None, + encoder_checkpoint: Optional[Union[str, os.PathLike[Any]]] = None, + encoder_config: Optional[Union[str, os.PathLike[Any]]] = None, + vc_checkpoint: Optional[Union[str, os.PathLike[Any]]] = None, + vc_config: Optional[Union[str, os.PathLike[Any]]] = None, + model_dir: Optional[Union[str, os.PathLike[Any]]] = None, + voice_dir: Optional[Union[str, os.PathLike[Any]]] = None, use_cuda: bool = False, ) -> None: """General 🐸 TTS interface for inference. It takes a tts and a vocoder @@ -66,16 +67,17 @@ def __init__( use_cuda (bool, optional): enable/disable cuda. Defaults to False. """ super().__init__() - self.tts_checkpoint = tts_checkpoint - self.tts_config_path = tts_config_path - self.tts_speakers_file = tts_speakers_file - self.tts_languages_file = tts_languages_file - self.vocoder_checkpoint = vocoder_checkpoint - self.vocoder_config = vocoder_config - self.encoder_checkpoint = encoder_checkpoint - self.encoder_config = encoder_config - self.vc_checkpoint = vc_checkpoint - self.vc_config = vc_config + self.tts_checkpoint = optional_to_str(tts_checkpoint) + self.tts_config_path = optional_to_str(tts_config_path) + self.tts_speakers_file = optional_to_str(tts_speakers_file) + self.tts_languages_file = optional_to_str(tts_languages_file) + self.vocoder_checkpoint = optional_to_str(vocoder_checkpoint) + self.vocoder_config = optional_to_str(vocoder_config) + self.encoder_checkpoint = optional_to_str(encoder_checkpoint) + self.encoder_config = optional_to_str(encoder_config) + self.vc_checkpoint = optional_to_str(vc_checkpoint) + self.vc_config = optional_to_str(vc_config) + model_dir = optional_to_str(model_dir) self.use_cuda = use_cuda self.tts_model = None @@ -89,18 +91,18 @@ def __init__( self.d_vector_dim = 0 self.seg = self._get_segmenter("en") self.use_cuda = use_cuda - self.voice_dir = voice_dir + self.voice_dir = optional_to_str(voice_dir) if self.use_cuda: assert torch.cuda.is_available(), "CUDA is not availabe on this machine." if tts_checkpoint: - self._load_tts(tts_checkpoint, tts_config_path, use_cuda) + self._load_tts(self.tts_checkpoint, self.tts_config_path, use_cuda) if vocoder_checkpoint: - self._load_vocoder(vocoder_checkpoint, vocoder_config, use_cuda) + self._load_vocoder(self.vocoder_checkpoint, self.vocoder_config, use_cuda) - if vc_checkpoint and model_dir is None: - self._load_vc(vc_checkpoint, vc_config, use_cuda) + if vc_checkpoint and model_dir == "": + self._load_vc(self.vc_checkpoint, self.vc_config, use_cuda) if model_dir: if "fairseq" in model_dir: diff --git a/tests/zoo_tests/test_models.py b/tests/zoo_tests/test_models.py index f38880b51f..461b4fbe12 100644 --- a/tests/zoo_tests/test_models.py +++ b/tests/zoo_tests/test_models.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3` -import glob import os import shutil @@ -30,22 +29,22 @@ def run_models(offset=0, step=1): print(f"\n > Run - {model_name}") model_path, _, _ = manager.download_model(model_name) if "tts_models" in model_name: - local_download_dir = os.path.dirname(model_path) + local_download_dir = model_path.parent # download and run the model - speaker_files = glob.glob(local_download_dir + "/speaker*") - language_files = glob.glob(local_download_dir + "/language*") + speaker_files = list(local_download_dir.glob("speaker*")) + language_files = list(local_download_dir.glob("language*")) speaker_arg = "" language_arg = "" if len(speaker_files) > 0: # multi-speaker model - if "speaker_ids" in speaker_files[0]: + if "speaker_ids" in speaker_files[0].stem: speaker_manager = SpeakerManager(speaker_id_file_path=speaker_files[0]) - elif "speakers" in speaker_files[0]: + elif "speakers" in speaker_files[0].stem: speaker_manager = SpeakerManager(d_vectors_file_path=speaker_files[0]) speakers = list(speaker_manager.name_to_id.keys()) if len(speakers) > 1: speaker_arg = f'--speaker_idx "{speakers[0]}"' - if len(language_files) > 0 and "language_ids" in language_files[0]: + if len(language_files) > 0 and "language_ids" in language_files[0].stem: # multi-lingual model language_manager = LanguageManager(language_ids_file_path=language_files[0]) languages = language_manager.language_names From 0df04cc259c7094f2b0f64841da634045b3f6894 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Sat, 14 Dec 2024 15:52:13 +0100 Subject: [PATCH 250/255] docs: add notes about xtts fine-tuning --- TTS/bin/synthesize.py | 6 +++--- docs/source/faq.md | 8 +++++++- docs/source/training/finetuning.md | 3 +++ docs/source/training/index.md | 3 +++ docs/source/tutorial_for_nervous_beginners.md | 3 +++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 5fce93b7f4..47b442e266 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -34,7 +34,7 @@ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2 ``` -#### Single Speaker Models +#### Single speaker models - Run TTS with the default model (`tts_models/en/ljspeech/tacotron2-DDC`): @@ -102,7 +102,7 @@ --vocoder_config_path path/to/vocoder_config.json ``` -#### Multi-speaker Models +#### Multi-speaker models - List the available speakers and choose a `` among them: @@ -125,7 +125,7 @@ --speakers_file_path path/to/speaker.json --speaker_idx ``` -#### Voice Conversion Models +#### Voice conversion models ```sh tts --out_path output/path/speech.wav --model_name "//" \\ diff --git a/docs/source/faq.md b/docs/source/faq.md index 1dd5c1847b..a0eb5bbee4 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -16,13 +16,19 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - If you need faster models, consider SpeedySpeech, GlowTTS or AlignTTS. Keep in mind that SpeedySpeech requires a pre-trained Tacotron or Tacotron2 model to compute text-to-speech alignments. ## How can I train my own `tts` model? + +```{note} XTTS has separate fine-tuning scripts, see [here](models/xtts.md#training). +``` + 0. Check your dataset with notebooks in [dataset_analysis](https://github.com/idiap/coqui-ai-TTS/tree/main/notebooks/dataset_analysis) folder. Use [this notebook](https://github.com/idiap/coqui-ai-TTS/blob/main/notebooks/dataset_analysis/CheckSpectrograms.ipynb) to find the right audio processing parameters. A better set of parameters results in a better audio synthesis. 1. Write your own dataset `formatter` in `datasets/formatters.py` or [format](datasets/formatting_your_dataset) your dataset as one of the supported datasets, like LJSpeech. A `formatter` parses the metadata file and converts a list of training samples. 2. If you have a dataset with a different alphabet than English, you need to set your own character list in the ```config.json```. - - If you use phonemes for training and your language is supported [here](https://github.com/rhasspy/gruut#supported-languages), you don't need to set your character list. + - If you use phonemes for training and your language is supported by + [Espeak](https://github.com/espeak-ng/espeak-ng/blob/master/docs/languages.md) + or [Gruut](https://github.com/rhasspy/gruut#supported-languages), you don't need to set your character list. - You can use `TTS/bin/find_unique_chars.py` to get characters used in your dataset. 3. Write your own text cleaner in ```utils.text.cleaners```. It is not always necessary, except when you have a different alphabet or language-specific requirements. diff --git a/docs/source/training/finetuning.md b/docs/source/training/finetuning.md index 1fe54fbcde..fa2ed34a54 100644 --- a/docs/source/training/finetuning.md +++ b/docs/source/training/finetuning.md @@ -29,6 +29,9 @@ them and fine-tune it for your own dataset. This will help you in two main ways: ## Steps to fine-tune a 🐸 TTS model +```{note} XTTS has separate fine-tuning scripts, see [here](../models/xtts.md#training). +``` + 1. Setup your dataset. You need to format your target dataset in a certain way so that 🐸TTS data loader will be able to load it for the diff --git a/docs/source/training/index.md b/docs/source/training/index.md index bb76a705df..b09f9cadcb 100644 --- a/docs/source/training/index.md +++ b/docs/source/training/index.md @@ -8,3 +8,6 @@ The following pages show you how to train and fine-tune Coqui models: training_a_model finetuning ``` + +Also see the [XTTS page](../models/xtts.md#training) if you want to fine-tune +that model. diff --git a/docs/source/tutorial_for_nervous_beginners.md b/docs/source/tutorial_for_nervous_beginners.md index a8a64410c4..5e5eac0e0a 100644 --- a/docs/source/tutorial_for_nervous_beginners.md +++ b/docs/source/tutorial_for_nervous_beginners.md @@ -29,6 +29,9 @@ CLI, server or Python API. ## Training a `tts` Model +```{note} XTTS has separate fine-tuning scripts, see [here](models/xtts.md#training). +``` + A breakdown of a simple script that trains a GlowTTS model on the LJspeech dataset. For a more in-depth guide to training and fine-tuning also see [this page](training/index.md). From 9d5fc60a5d66f39071c32d33800f767d160f7af7 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Mon, 16 Dec 2024 10:28:25 +0100 Subject: [PATCH 251/255] feat(manager): print download location when listing models (#213) --- TTS/utils/manage.py | 2 ++ docs/source/faq.md | 54 +++++++++++++++++++++++++++++++-------------- 2 files changed, 40 insertions(+), 16 deletions(-) diff --git a/TTS/utils/manage.py b/TTS/utils/manage.py index b33243ffa9..d7d4deab9d 100644 --- a/TTS/utils/manage.py +++ b/TTS/utils/manage.py @@ -118,6 +118,8 @@ def list_models(self) -> list[str]: for model_type in self.models_dict: model_list = self._list_models(model_type, model_count) models_name_list.extend(model_list) + logger.info("") + logger.info("Path to downloaded models: %s", self.output_prefix) return models_name_list def log_model_details(self, model_type: str, lang: str, dataset: str, model: str) -> None: diff --git a/docs/source/faq.md b/docs/source/faq.md index a0eb5bbee4..4fbd149f00 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -1,21 +1,43 @@ # FAQ -We tried to collect common issues and questions we receive about 🐸TTS. It is worth checking before going deeper. +We tried to collect common issues and questions we receive about 🐸TTS. It is +worth checking before going deeper. -## Errors with a pre-trained model. How can I resolve this? -- Make sure you use the right commit version of 🐸TTS. Each pre-trained model has its corresponding version that needs to be used. It is defined on the model table. -- If it is still problematic, post your problem on [Discussions](https://github.com/idiap/coqui-ai-TTS/discussions). Please give as many details as possible (error message, your TTS version, your TTS model and config.json etc.) -- If you feel like it's a bug to be fixed, then prefer Github issues with the same level of scrutiny. +## Using Coqui -## What are the requirements of a good 🐸TTS dataset? +### Where does Coqui store downloaded models? + +The path to downloaded models is printed when running `tts --list_models`. +Default locations are: + +- **Linux:** `~/.local/share/tts` +- **Mac:** `~/Library/Application Support/tts` +- **Windows:** `C:\Users\\AppData\Local\tts` + +You can change the prefix of this `tts/` folder by setting the `XDG_DATA_HOME` +or `TTS_HOME` environment variables. + +### Errors with a pre-trained model. How can I resolve this? +- Make sure you use the latest version of 🐸TTS. Each pre-trained model is only + supported from a certain minimum version. +- If it is still problematic, post your problem on + [Discussions](https://github.com/idiap/coqui-ai-TTS/discussions). Please give + as many details as possible (error message, your TTS version, your TTS model + and config.json etc.) +- If you feel like it's a bug to be fixed, then prefer Github issues with the + same level of scrutiny. + +## Training Coqui models + +### What are the requirements of a good 🐸TTS dataset? - [See this page](datasets/what_makes_a_good_dataset.md) -## How should I choose the right model? +### How should I choose the right model? - First, train Tacotron. It is smaller and faster to experiment with. If it performs poorly, try Tacotron2. - Tacotron models produce the most natural voice if your dataset is not too noisy. - If both models do not perform well and especially the attention does not align, then try AlignTTS or GlowTTS. - If you need faster models, consider SpeedySpeech, GlowTTS or AlignTTS. Keep in mind that SpeedySpeech requires a pre-trained Tacotron or Tacotron2 model to compute text-to-speech alignments. -## How can I train my own `tts` model? +### How can I train my own `tts` model? ```{note} XTTS has separate fine-tuning scripts, see [here](models/xtts.md#training). ``` @@ -70,13 +92,13 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is **Note:** You can also train your model using pure 🐍 python. Check the [tutorial](tutorial_for_nervous_beginners.md). -## How can I train in a different language? +### How can I train in a different language? - Check steps 2, 3, 4, 5 above. -## How can I train multi-GPUs? +### How can I train multi-GPUs? - Check step 5 above. -## How can I check model performance? +### How can I check model performance? - You can inspect model training and performance using ```tensorboard```. It will show you loss, attention alignment, model output. Go with the order below to measure the model performance. 1. Check ground truth spectrograms. If they do not look as they are supposed to, then check audio processing parameters in ```config.json```. 2. Check train and eval losses and make sure that they all decrease smoothly in time. @@ -91,7 +113,7 @@ We tried to collect common issues and questions we receive about 🐸TTS. It is - 'bidirectional_decoder' is your ultimate savior, but it trains 2x slower and demands 1.5x more GPU memory. - You can also try the other models like AlignTTS or GlowTTS. -## How do I know when to stop training? +### How do I know when to stop training? There is no single objective metric to decide the end of a training since the voice quality is a subjective matter. In our model trainings, we follow these steps; @@ -104,17 +126,17 @@ In our model trainings, we follow these steps; Keep in mind that the approach above only validates the model robustness. It is hard to estimate the voice quality without asking the actual people. The best approach is to pick a set of promising models and run a Mean-Opinion-Score study asking actual people to score the models. -## My model does not learn. How can I debug? +### My model does not learn. How can I debug? - Go over the steps under "How can I check model performance?" -## Attention does not align. How can I make it work? +### Attention does not align. How can I make it work? - Check the 4th step under "How can I check model performance?" -## How can I test a trained model? +### How can I test a trained model? - The best way is to use `tts` or `tts-server` commands. For details check [here](inference.md). - If you need to code your own ```TTS.utils.synthesizer.Synthesizer``` class. -## My Tacotron model does not stop - I see "Decoder stopped with 'max_decoder_steps" - Stopnet does not work. +### My Tacotron model does not stop - I see "Decoder stopped with 'max_decoder_steps" - Stopnet does not work. - In general, all of the above relates to the `stopnet`. It is the part of the model telling the `decoder` when to stop. - In general, a poor `stopnet` relates to something else that is broken in your model or dataset. Especially the attention module. - One common reason is the silent parts in the audio clips at the beginning and the ending. Check ```trim_db``` value in the config. You can find a better value for your dataset by using ```CheckSpectrogram``` notebook. If this value is too small, too much of the audio will be trimmed. If too big, then too much silence will remain. Both will curtail the `stopnet` performance. From 1f9dda65adae5a6327b8c7c210d99aaae89983f6 Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 17 Dec 2024 10:41:49 +0100 Subject: [PATCH 252/255] docs(xtts): show manual inference with default speakers --- docs/source/models/xtts.md | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/docs/source/models/xtts.md b/docs/source/models/xtts.md index 96f5bb7cd5..91d4b4078c 100644 --- a/docs/source/models/xtts.md +++ b/docs/source/models/xtts.md @@ -163,12 +163,13 @@ from TTS.api import TTS tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to("cuda") # generate speech by cloning a voice using default settings -tts.tts_to_file(text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", - file_path="output.wav", - speaker="Ana Florence", - language="en", - split_sentences=True - ) +tts.tts_to_file( + text="It took me quite a long time to develop a voice, and now that I have it I'm not going to be silent.", + file_path="output.wav", + speaker="Ana Florence", + language="en", + split_sentences=True +) ``` @@ -230,6 +231,11 @@ out = model.inference( torchaudio.save("xtts.wav", torch.tensor(out["wav"]).unsqueeze(0), 24000) ``` +You can also use the Coqui speakers: + +```python +gpt_cond_latent, speaker_embedding = model.speaker_manager.speakers["Ana Florence"].values() +``` #### Streaming manually From 6a52c8a855a787435472f7b7ec0b9d0dc3b9869a Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Tue, 17 Dec 2024 11:38:39 +0100 Subject: [PATCH 253/255] fix(bin): log to stdout in cli tools, unless pipe_out is set This way the outputs are available for further downstream processing, e.g. with grep. For TTS/bin/synthesize.py, if --pipe_out is set, log to stderr because then only the output audio stream should be on stdout, e.g. to pipe it to aplay. --- TTS/bin/compute_attention_masks.py | 3 ++- TTS/bin/compute_embeddings.py | 3 ++- TTS/bin/compute_statistics.py | 3 ++- TTS/bin/eval_encoder.py | 3 ++- TTS/bin/extract_tts_spectrograms.py | 3 ++- TTS/bin/find_unique_chars.py | 3 ++- TTS/bin/find_unique_phonemes.py | 3 ++- TTS/bin/remove_silence_using_vad.py | 3 ++- TTS/bin/synthesize.py | 3 ++- TTS/bin/train_encoder.py | 2 +- TTS/bin/train_tts.py | 3 ++- TTS/bin/train_vocoder.py | 3 ++- TTS/bin/tune_wavegrad.py | 3 ++- TTS/encoder/utils/prepare_voxceleb.py | 2 +- TTS/server/server.py | 2 +- TTS/utils/generic_utils.py | 24 +++++++++++++++++------- 16 files changed, 44 insertions(+), 22 deletions(-) diff --git a/TTS/bin/compute_attention_masks.py b/TTS/bin/compute_attention_masks.py index 535182d214..b8f69b54e5 100644 --- a/TTS/bin/compute_attention_masks.py +++ b/TTS/bin/compute_attention_masks.py @@ -2,6 +2,7 @@ import importlib import logging import os +import sys from argparse import RawTextHelpFormatter import numpy as np @@ -18,7 +19,7 @@ from TTS.utils.generic_utils import ConsoleFormatter, setup_logger if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) # pylint: disable=bad-option-value parser = argparse.ArgumentParser( diff --git a/TTS/bin/compute_embeddings.py b/TTS/bin/compute_embeddings.py index 1bdb8d733c..dc0ce5b18b 100644 --- a/TTS/bin/compute_embeddings.py +++ b/TTS/bin/compute_embeddings.py @@ -1,6 +1,7 @@ import argparse import logging import os +import sys from argparse import RawTextHelpFormatter import torch @@ -102,7 +103,7 @@ def compute_embeddings( if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser( description="""Compute embedding vectors for each audio file in a dataset and store them keyed by `{dataset_name}#{file_path}` in a .pth file\n\n""" diff --git a/TTS/bin/compute_statistics.py b/TTS/bin/compute_statistics.py index dc5423a691..acec91c369 100755 --- a/TTS/bin/compute_statistics.py +++ b/TTS/bin/compute_statistics.py @@ -5,6 +5,7 @@ import glob import logging import os +import sys import numpy as np from tqdm import tqdm @@ -18,7 +19,7 @@ def main(): """Run preprocessing process.""" - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stderr, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser(description="Compute mean and variance of spectrogtram features.") parser.add_argument("config_path", type=str, help="TTS config file path to define audio processin parameters.") diff --git a/TTS/bin/eval_encoder.py b/TTS/bin/eval_encoder.py index 711c8221db..701c7d8e82 100644 --- a/TTS/bin/eval_encoder.py +++ b/TTS/bin/eval_encoder.py @@ -1,5 +1,6 @@ import argparse import logging +import sys from argparse import RawTextHelpFormatter import torch @@ -53,7 +54,7 @@ def compute_encoder_accuracy(dataset_items, encoder_manager): if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser( description="""Compute the accuracy of the encoder.\n\n""" diff --git a/TTS/bin/extract_tts_spectrograms.py b/TTS/bin/extract_tts_spectrograms.py index 86a4dce177..a04005ce39 100755 --- a/TTS/bin/extract_tts_spectrograms.py +++ b/TTS/bin/extract_tts_spectrograms.py @@ -4,6 +4,7 @@ import argparse import logging import os +import sys import numpy as np import torch @@ -273,7 +274,7 @@ def main(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser() parser.add_argument("--config_path", type=str, help="Path to config file for training.", required=True) diff --git a/TTS/bin/find_unique_chars.py b/TTS/bin/find_unique_chars.py index 0519d43769..7a7fdf5dd4 100644 --- a/TTS/bin/find_unique_chars.py +++ b/TTS/bin/find_unique_chars.py @@ -2,6 +2,7 @@ import argparse import logging +import sys from argparse import RawTextHelpFormatter from TTS.config import load_config @@ -10,7 +11,7 @@ def main(): - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) # pylint: disable=bad-option-value parser = argparse.ArgumentParser( diff --git a/TTS/bin/find_unique_phonemes.py b/TTS/bin/find_unique_phonemes.py index d99acb9893..7c68fdb070 100644 --- a/TTS/bin/find_unique_phonemes.py +++ b/TTS/bin/find_unique_phonemes.py @@ -3,6 +3,7 @@ import argparse import logging import multiprocessing +import sys from argparse import RawTextHelpFormatter from tqdm.contrib.concurrent import process_map @@ -20,7 +21,7 @@ def compute_phonemes(item): def main(): - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) # pylint: disable=W0601 global c, phonemizer diff --git a/TTS/bin/remove_silence_using_vad.py b/TTS/bin/remove_silence_using_vad.py index edab882db8..f9121d7f77 100755 --- a/TTS/bin/remove_silence_using_vad.py +++ b/TTS/bin/remove_silence_using_vad.py @@ -4,6 +4,7 @@ import multiprocessing import os import pathlib +import sys import torch from tqdm import tqdm @@ -77,7 +78,7 @@ def preprocess_audios(): if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser( description="python TTS/bin/remove_silence_using_vad.py -i=VCTK-Corpus/ -o=VCTK-Corpus-removed-silence/ -g=wav48_silence_trimmed/*/*_mic1.flac --trim_just_beginning_and_end" diff --git a/TTS/bin/synthesize.py b/TTS/bin/synthesize.py index 47b442e266..5d20db6a59 100755 --- a/TTS/bin/synthesize.py +++ b/TTS/bin/synthesize.py @@ -311,8 +311,9 @@ def parse_args() -> argparse.Namespace: def main() -> None: """Entry point for `tts` command line interface.""" - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) args = parse_args() + stream = sys.stderr if args.pipe_out else sys.stdout + setup_logger("TTS", level=logging.INFO, stream=stream, formatter=ConsoleFormatter()) pipe_out = sys.stdout if args.pipe_out else None diff --git a/TTS/bin/train_encoder.py b/TTS/bin/train_encoder.py index ba03c42b6d..84123d2db3 100644 --- a/TTS/bin/train_encoder.py +++ b/TTS/bin/train_encoder.py @@ -322,7 +322,7 @@ def main(args): # pylint: disable=redefined-outer-name if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) args, c, OUT_PATH, AUDIO_PATH, c_logger, dashboard_logger = init_training() diff --git a/TTS/bin/train_tts.py b/TTS/bin/train_tts.py index 6d6342a762..e93b1c9d24 100644 --- a/TTS/bin/train_tts.py +++ b/TTS/bin/train_tts.py @@ -1,5 +1,6 @@ import logging import os +import sys from dataclasses import dataclass, field from trainer import Trainer, TrainerArgs @@ -17,7 +18,7 @@ class TrainTTSArgs(TrainerArgs): def main(): """Run `tts` model training directly by a `config.json` file.""" - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) # init trainer args train_args = TrainTTSArgs() diff --git a/TTS/bin/train_vocoder.py b/TTS/bin/train_vocoder.py index 221ff4cff0..aa04177068 100644 --- a/TTS/bin/train_vocoder.py +++ b/TTS/bin/train_vocoder.py @@ -1,5 +1,6 @@ import logging import os +import sys from dataclasses import dataclass, field from trainer import Trainer, TrainerArgs @@ -18,7 +19,7 @@ class TrainVocoderArgs(TrainerArgs): def main(): """Run `tts` model training directly by a `config.json` file.""" - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) # init trainer args train_args = TrainVocoderArgs() diff --git a/TTS/bin/tune_wavegrad.py b/TTS/bin/tune_wavegrad.py index df2923952d..d05ae14b7f 100644 --- a/TTS/bin/tune_wavegrad.py +++ b/TTS/bin/tune_wavegrad.py @@ -2,6 +2,7 @@ import argparse import logging +import sys from itertools import product as cartesian_product import numpy as np @@ -17,7 +18,7 @@ from TTS.vocoder.models import setup_model if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) parser = argparse.ArgumentParser() parser.add_argument("--model_path", type=str, help="Path to model checkpoint.") diff --git a/TTS/encoder/utils/prepare_voxceleb.py b/TTS/encoder/utils/prepare_voxceleb.py index da7522a512..37619ed0f8 100644 --- a/TTS/encoder/utils/prepare_voxceleb.py +++ b/TTS/encoder/utils/prepare_voxceleb.py @@ -216,7 +216,7 @@ def processor(directory, subset, force_process): if __name__ == "__main__": - setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) + setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) if len(sys.argv) != 4: print("Usage: python prepare_data.py save_directory user password") sys.exit() diff --git a/TTS/server/server.py b/TTS/server/server.py index f410fb7539..6a4642f9a2 100644 --- a/TTS/server/server.py +++ b/TTS/server/server.py @@ -25,7 +25,7 @@ from TTS.utils.synthesizer import Synthesizer logger = logging.getLogger(__name__) -setup_logger("TTS", level=logging.INFO, screen=True, formatter=ConsoleFormatter()) +setup_logger("TTS", level=logging.INFO, stream=sys.stdout, formatter=ConsoleFormatter()) def create_argparser() -> argparse.ArgumentParser: diff --git a/TTS/utils/generic_utils.py b/TTS/utils/generic_utils.py index d7397f673d..54bb5ba825 100644 --- a/TTS/utils/generic_utils.py +++ b/TTS/utils/generic_utils.py @@ -2,9 +2,10 @@ import datetime import importlib import logging +import os import re from pathlib import Path -from typing import Any, Callable, Dict, Optional, TypeVar, Union +from typing import Any, Callable, Dict, Optional, TextIO, TypeVar, Union import torch from packaging.version import Version @@ -107,25 +108,34 @@ def setup_logger( level: int = logging.INFO, *, formatter: Optional[logging.Formatter] = None, - screen: bool = False, - tofile: bool = False, - log_dir: str = "logs", + stream: Optional[TextIO] = None, + log_dir: Optional[Union[str, os.PathLike[Any]]] = None, log_name: str = "log", ) -> None: + """Set up a logger. + + Args: + logger_name: Name of the logger to set up + level: Logging level + formatter: Formatter for the logger + stream: Add a StreamHandler for the given stream, e.g. sys.stderr or sys.stdout + log_dir: Folder to write the log file (no file created if None) + log_name: Prefix of the log file name + """ lg = logging.getLogger(logger_name) if formatter is None: formatter = logging.Formatter( "%(asctime)s.%(msecs)03d - %(levelname)-8s - %(name)s: %(message)s", datefmt="%y-%m-%d %H:%M:%S" ) lg.setLevel(level) - if tofile: + if log_dir is not None: Path(log_dir).mkdir(exist_ok=True, parents=True) log_file = Path(log_dir) / f"{log_name}_{get_timestamp()}.log" fh = logging.FileHandler(log_file, mode="w") fh.setFormatter(formatter) lg.addHandler(fh) - if screen: - sh = logging.StreamHandler() + if stream is not None: + sh = logging.StreamHandler(stream) sh.setFormatter(formatter) lg.addHandler(sh) From f89ce41924cbeaa96469b87ba855094d82c2cbcf Mon Sep 17 00:00:00 2001 From: Enno Hermann Date: Thu, 19 Dec 2024 17:22:23 +0100 Subject: [PATCH 254/255] fix(xtts): voice_dir should remain None if not specified (#224) --- TTS/utils/synthesizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 52f5a86de5..517cb7d2b2 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -91,7 +91,7 @@ def __init__( self.d_vector_dim = 0 self.seg = self._get_segmenter("en") self.use_cuda = use_cuda - self.voice_dir = optional_to_str(voice_dir) + self.voice_dir = voice_dir if self.use_cuda: assert torch.cuda.is_available(), "CUDA is not availabe on this machine." From 98080e282c42ebe0835117017d7628715e2dcda9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Z=C3=ADpek?= Date: Sat, 28 Dec 2024 13:25:46 +0100 Subject: [PATCH 255/255] fix(xtts): use correct language code for Czech num2words call (#237) * Fix num2words call using non-standard lang code * build: update minimum num2words version --------- Co-authored-by: Enno Hermann --- TTS/tts/layers/xtts/tokenizer.py | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/TTS/tts/layers/xtts/tokenizer.py b/TTS/tts/layers/xtts/tokenizer.py index 076727239c..fec8358deb 100644 --- a/TTS/tts/layers/xtts/tokenizer.py +++ b/TTS/tts/layers/xtts/tokenizer.py @@ -501,12 +501,12 @@ def _remove_dots(m): def _expand_decimal_point(m, lang="en"): amount = m.group(1).replace(",", ".") - return num2words(float(amount), lang=lang if lang != "cs" else "cz") + return num2words(float(amount), lang=lang) def _expand_currency(m, lang="en", currency="USD"): amount = float((re.sub(r"[^\d.]", "", m.group(0).replace(",", ".")))) - full_amount = num2words(amount, to="currency", currency=currency, lang=lang if lang != "cs" else "cz") + full_amount = num2words(amount, to="currency", currency=currency, lang=lang) and_equivalents = { "en": ", ", @@ -535,11 +535,11 @@ def _expand_currency(m, lang="en", currency="USD"): def _expand_ordinal(m, lang="en"): - return num2words(int(m.group(1)), ordinal=True, lang=lang if lang != "cs" else "cz") + return num2words(int(m.group(1)), ordinal=True, lang=lang) def _expand_number(m, lang="en"): - return num2words(int(m.group(0)), lang=lang if lang != "cs" else "cz") + return num2words(int(m.group(0)), lang=lang) def expand_numbers_multilingual(text, lang="en"): diff --git a/pyproject.toml b/pyproject.toml index 16d990c169..a7baf29e31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -86,7 +86,7 @@ dependencies = [ # Bark "encodec>=0.1.1", # XTTS - "num2words>=0.5.11", + "num2words>=0.5.14", "spacy[ja]>=3,<3.8", ]