Fix fluent test for windows (pytorch#2510)

Summary: fluent dataset test currently fails on windows, due to new line generation in csv writer in testing and incorrect path parsing in dataset impl. Pull Request resolved: pytorch#2510 Reviewed By: carolineechen Differential Revision: D37573203 Pulled By: mthrok fbshipit-source-id: 4868bc649690c7e596b002686c6128ce735d3564
rgt-yncrea · Jul 6, 2022 · 09daa43 · 09daa43
1 parent ef8bd7b
commit 09daa43
Show file tree

Hide file tree

Showing 2 changed files with 20 additions and 14 deletions.
diff --git a/test/torchaudio_unittest/datasets/fluentcommands_test.py b/test/torchaudio_unittest/datasets/fluentcommands_test.py
@@ -2,7 +2,6 @@
 import os
 import random
 import string
-from pathlib import Path
 
 from torchaudio.datasets import fluentcommands
 from torchaudio_unittest.common_utils import get_whitenoise, save_wav, TempDirMixin, TorchaudioTestCase
@@ -49,7 +48,7 @@ def _gen_csv(dataset_dir: str, subset: str, init_seed: int):
             idx += 1
 
     csv_path = os.path.join(dataset_dir, "data", f"{subset}_data.csv")
-    with open(csv_path, "w") as csv_file:
+    with open(csv_path, "w", newline="") as csv_file:
         file_writer = csv.writer(csv_file)
         file_writer.writerows(data)
 
@@ -73,14 +72,15 @@ def _save_samples(dataset_dir: str, subset: str, seed: int):
             n_channels=1,
             seed=seed,
         )
-        filename = row[path_idx]
-        wav_file = os.path.join(dataset_dir, filename)
-        save_wav(wav_file, wav, SAMPLE_RATE)
-
-        path = Path(wav_file).stem
+        path = row[path_idx]
+        filename = path.split("/")[-1]
+        filename = filename.split(".")[0]
         speaker_id, transcription, act, obj, loc = row[2:]
 
-        sample = wav, SAMPLE_RATE, path, speaker_id, transcription, act, obj, loc
+        wav_file = os.path.join(dataset_dir, "wavs", "speakers", speaker_id, f"{filename}.wav")
+        save_wav(wav_file, wav, SAMPLE_RATE)
+
+        sample = wav, SAMPLE_RATE, filename, speaker_id, transcription, act, obj, loc
         samples.append(sample)
 
         seed += 1
@@ -91,6 +91,7 @@ def _save_samples(dataset_dir: str, subset: str, seed: int):
 def get_mock_dataset(dataset_dir: str):
     data_folder = os.path.join(dataset_dir, "data")
     wav_folder = os.path.join(dataset_dir, "wavs", "speakers")
+
     os.makedirs(data_folder, exist_ok=True)
     os.makedirs(wav_folder, exist_ok=True)
 

diff --git a/torchaudio/datasets/fluentcommands.py b/torchaudio/datasets/fluentcommands.py
@@ -22,6 +22,9 @@ def __init__(self, root: Union[str, Path], subset: str = "train"):
         root = os.fspath(root)
         self._path = os.path.join(root, "fluent_speech_commands_dataset")
 
+        if not os.path.isdir(self._path):
+            raise RuntimeError("Dataset not found.")
+
         subset_path = os.path.join(self._path, "data", f"{subset}_data.csv")
         with open(subset_path) as subset_csv:
             subset_reader = csv.reader(subset_csv)
@@ -40,15 +43,17 @@ def __getitem__(self, n: int):
             n (int): The index of the sample to be loaded
 
         Returns:
-            (Tensor, int, Path, int, str, str, str, str):
-            ``(waveform, sample_rate, path, speaker_id, transcription, action, object, location)``
+            (Tensor, int, str, int, str, str, str, str):
+            ``(waveform, sample_rate, file_name, speaker_id, transcription, action, object, location)``
         """
 
         sample = self.data[n]
-        wav_path = os.path.join(self._path, sample[self.header.index("path")])
-        wav, sample_rate = torchaudio.load(wav_path)
 
-        path = Path(wav_path).stem
+        file_name = sample[self.header.index("path")].split("/")[-1]
+        file_name = file_name.split(".")[0]
         speaker_id, transcription, action, obj, location = sample[2:]
 
-        return wav, sample_rate, path, speaker_id, transcription, action, obj, location
+        wav_path = os.path.join(self._path, "wavs", "speakers", speaker_id, f"{file_name}.wav")
+        wav, sample_rate = torchaudio.load(wav_path)
+
+        return wav, sample_rate, file_name, speaker_id, transcription, action, obj, location