-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjson_generator.py
52 lines (44 loc) · 1.79 KB
/
json_generator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from __future__ import absolute_import, division, print_function
import argparse
import json
import os
import wave
def main(data_directory, output_file):
labels = []
durations = []
keys = []
for group in os.listdir(data_directory):
if group.startswith('.'):
continue
speaker_path = os.path.join(data_directory, group)
for speaker in os.listdir(speaker_path):
if speaker.startswith('.'):
continue
labels_file = os.path.join(speaker_path, speaker,
'{}-{}.trans.txt'
.format(group, speaker))
for line in open(labels_file):
split = line.strip().split()
file_id = split[0]
label = ' '.join(split[1:]).lower()
audio_file = os.path.join(speaker_path, speaker,
file_id) + '.wav'
audio = wave.open(audio_file)
duration = float(audio.getnframes()) / audio.getframerate()
audio.close()
keys.append(audio_file)
durations.append(duration)
labels.append(label)
with open(output_file, 'w') as out_file:
for i in range(len(keys)):
line = json.dumps({'key': keys[i], 'duration': durations[i],
'text': labels[i]})
out_file.write(line + '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('data_directory', type=str,
help='Path to data directory')
parser.add_argument('output_file', type=str,
help='Path to output file')
args = parser.parse_args()
main(args.data_directory, args.output_file)