-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathwav2json.py
172 lines (147 loc) · 5.3 KB
/
wav2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
#!/usr/bin/env python
"""Generate json representations of audio files.
Simple Python 3 script that computes a json data representation of a single
wavefor by first taking the average of the N-channels of the input and then
using linear interpolation to shrink/expand the original audio data to the
requested number of output samples.
Note that this type of interpolation is NOT suitable for audio resampling in
general, but serves to reduce/expand the amount of audio data for visualization
purposes.
"""
import argparse
import os.path
import scipy.io.wavfile
import numpy
import json
import decimal
import math
# parse input arguments
def parseArgs():
"""Check the extension of an audio file."""
def check_audio_file_ext(allowed):
class Action(argparse.Action):
def __call__(self, parser, namespace, fname, option_string=None):
ext = os.path.splitext(fname)[1][1:]
if ext not in allowed:
option_string = '({})'.format(option_string) if \
option_string else ''
parser.error(
"file extension is not one of {}{}"
.format(allowed, option_string)
)
else:
setattr(namespace, self.dest, fname)
return Action
"""Check if the precision is in the allowed range."""
def check_precision_range(prec_range):
class Action(argparse.Action):
def __call__(self, parser, namespace, prec, option_string=None):
if prec not in range(*prec_range):
option_string = '({})'.format(option_string) if \
option_string else ''
parser.error(
"float precision is not in range [{}, {}]{}"
.format(
prec_range[0], prec_range[1] - 1, option_string
)
)
else:
setattr(namespace, self.dest, prec)
return Action
parser = argparse.ArgumentParser()
parser.add_argument(
"-i", "--ifile",
action=check_audio_file_ext({'wav'}),
help="Path to input file",
required=True
)
parser.add_argument(
"-o", "--ofile",
action=check_audio_file_ext({'json'}),
help="Path to output file in JSON format"
)
parser.add_argument(
"-s", "--samples",
type=int,
help="Number of sample points for the waveform representation",
default=800
)
parser.add_argument(
"-p", "--precision",
action=check_precision_range((1, 7)),
type=int,
help="Precision of the floats representing the waveform amplitude \
[1, 6]",
default=6
)
parser.add_argument(
"-n", "--normalize",
action="store_true",
help="If set, waveform amplitudes will be normalized to unity"
)
parser.add_argument(
"-l", "--logarithmic",
action="store_true",
help="If set, use a logarithmic (e.g. decibel) scale for the waveform \
amplitudes"
)
args = parser.parse_args()
if args.ofile is None: # use path of input if no output path is specified
args.ofile = os.path.splitext(args.ifile)[0] + ".json"
return args
def lin2log(val):
"""Convert linear amplitude values to logarithmic.
Compute amplitude in decibel and map it to the range -1.0 to 1.0.
(clip amplitudes to range -60dB - 0dB)
"""
db = (3.0 + math.log10(min(max(abs(val), 0.001), 1.0))) / 3.0
if val < 0:
db *= -1
return db
if __name__ == "__main__":
args = parseArgs()
N = args.samples # nr. of samples in output
SR, data = scipy.io.wavfile.read(args.ifile)
if data.ndim == 1:
M, numChannels = data.size, 1
else:
M, numChannels = data.shape
# convert fixed point audio data to floating point range -1. to 1.
if data.dtype == 'int16':
data = data / (2. ** 15)
elif data.dtype == 'int32':
data = data / (2. ** 31)
# Get nr. of samples of waveform data from the input (note: this is NOT \
# the way to do proper audio resampling, but will do for visualization \
# purposes)
if numChannels > 1:
data = data.T
x = numpy.arange(0, M, float(M) / N)
xp = numpy.arange(M)
out = numpy.zeros((numChannels, x.size))
# First interpolate all individuals channels
for n in range(numChannels):
out[n, :] = numpy.interp(x, xp, data[n, :])
# Then compute average of n channels
out = numpy.sum(out, 0) / numChannels
else:
out = numpy.interp(
numpy.arange(0, M, float(M) / N), numpy.arange(M), data
)
if args.logarithmic:
for i in range(len(out)):
out[i] = lin2log(out[i])
if args.normalize:
out /= numpy.max(abs(out))
# dump the waveform data as JSON file
with open(args.ofile, 'w') as outfile:
json.dump(
{
'data': [
float(
decimal.Decimal("%.{}f".format(args.precision) % item)
) for item in list(out)
]
}, outfile
)
print("JSON file written to disk.")