Skip to content

Commit

Permalink
refactor(dataset): get audio length with torchaudio
Browse files Browse the repository at this point in the history
Removes a (GPL) dependency
  • Loading branch information
eginhard committed Mar 14, 2024
1 parent e5c6da1 commit adbcba0
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 5 deletions.
8 changes: 4 additions & 4 deletions TTS/tts/datasets/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
import random
from typing import Dict, List, Union

import mutagen
import numpy as np
import torch
import torchaudio
import tqdm
from torch.utils.data import Dataset

Expand Down Expand Up @@ -43,15 +43,15 @@ def string2filename(string):
return filename


def get_audio_size(audiopath):
def get_audio_size(audiopath) -> int:
"""Return the number of samples in the audio file."""
extension = audiopath.rpartition(".")[-1].lower()
if extension not in {"mp3", "wav", "flac"}:
raise RuntimeError(
f"The audio format {extension} is not supported, please convert the audio files to mp3, flac, or wav format!"
)

audio_info = mutagen.File(audiopath).info
return int(audio_info.length * audio_info.sample_rate)
return torchaudio.info(audiopath).num_frames


class TTSDataset(Dataset):
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ anyascii>=0.3.0
pyyaml>=6.0
fsspec[http]>=2023.6.0 # <= 2023.9.1 makes aux tests fail
packaging>=23.1
mutagen==1.47.0
# deps for inference
pysbd>=0.3.4
# deps for notebooks
Expand Down

0 comments on commit adbcba0

Please sign in to comment.