Skip to content

Commit

Permalink
Uses mutagen for all audio formats
Browse files Browse the repository at this point in the history
  • Loading branch information
Edresson committed Nov 7, 2023
1 parent bfe584e commit 8cc2dc0
Showing 1 changed file with 10 additions and 6 deletions.
16 changes: 10 additions & 6 deletions TTS/tts/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
from TTS.utils.audio.numpy_transforms import compute_energy as calculate_energy

from mutagen.mp3 import MP3
from mutagen.flac import FLAC
from mutagen.wave import WAVE

# to prevent too many open files error as suggested here
# https://github.com/pytorch/pytorch/issues/11201#issuecomment-421146936
Expand Down Expand Up @@ -48,12 +50,14 @@ def get_audio_size(audiopath):
extension = audiopath.rpartition(".")[-1].lower()
if extension == "mp3":
audio_info = MP3(audiopath).info
return int(audio_info.length * audio_info.sample_rate)
if extension in ("wav", "flac"):
compress_factor = 8
bitrate = 16 # assuming 16bit audio
return int(os.path.getsize(audiopath) / bitrate * compress_factor)
raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files for mp3, flac or wav format!")
elif extension == "wav":
audio_info = WAVE(audiopath).info
elif extension == "flac":
audio_info = FLAC(audiopath).info
else:
raise RuntimeError(f"The audio format {extension} is not supported, please convert the audio files for mp3, flac or wav format!")

return int(audio_info.length * audio_info.sample_rate)


class TTSDataset(Dataset):
Expand Down

0 comments on commit 8cc2dc0

Please sign in to comment.