From 9bae7e0f8166ced08930e6cb1ed11768c108cc29 Mon Sep 17 00:00:00 2001 From: Alex Iribarren Date: Sat, 8 Jul 2023 18:25:03 +0000 Subject: [PATCH] Configurable verbose output --- TTS/utils/synthesizer.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/TTS/utils/synthesizer.py b/TTS/utils/synthesizer.py index 760738467e..cd5175b2b9 100644 --- a/TTS/utils/synthesizer.py +++ b/TTS/utils/synthesizer.py @@ -257,6 +257,7 @@ def tts( style_text=None, reference_wav=None, reference_speaker_name=None, + verbose: bool = True, **kwargs, ) -> List[int]: """🐸 TTS magic. Run all the models and generate speech. @@ -270,6 +271,7 @@ def tts( style_text ([type], optional): transcription of style_wav for Capacitron. Defaults to None. reference_wav ([type], optional): reference waveform for voice conversion. Defaults to None. reference_speaker_name ([type], optional): speaker id of reference waveform. Defaults to None. + verbose (bool, optional): print verbose output. Defaults to True. Returns: List[int]: [description] """ @@ -283,8 +285,9 @@ def tts( if text: sens = self.split_into_sentences(text) - print(" > Text splitted to sentences.") - print(sens) + if verbose: + print(" > Text splitted to sentences.") + print(sens) # handle multi-speaker if "voice_dir" in kwargs: @@ -397,7 +400,8 @@ def tts( self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate, ] if scale_factor[1] != 1: - print(" > interpolating tts model output.") + if verbose: + print(" > interpolating tts model output.") vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) else: vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable @@ -462,7 +466,8 @@ def tts( self.vocoder_config["audio"]["sample_rate"] / self.tts_model.ap.sample_rate, ] if scale_factor[1] != 1: - print(" > interpolating tts model output.") + if verbose: + print(" > interpolating tts model output.") vocoder_input = interpolate_vocoder_input(scale_factor, vocoder_input) else: vocoder_input = torch.tensor(vocoder_input).unsqueeze(0) # pylint: disable=not-callable @@ -475,9 +480,10 @@ def tts( waveform = waveform.numpy() wavs = waveform.squeeze() - # compute stats - process_time = time.time() - start_time - audio_time = len(wavs) / self.tts_config.audio["sample_rate"] - print(f" > Processing time: {process_time}") - print(f" > Real-time factor: {process_time / audio_time}") + if verbose: + # compute stats + process_time = time.time() - start_time + audio_time = len(wavs) / self.tts_config.audio["sample_rate"] + print(f" > Processing time: {process_time}") + print(f" > Real-time factor: {process_time / audio_time}") return wavs