-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
First draft of timeit loading #13
base: master
Are you sure you want to change the base?
Changes from all commits
2f84e55
5dc5131
b2155e2
e162cff
fe4df4f
b74f461
b4be19c
0c9a78a
e943535
312116b
b39094a
09b93e8
14bce1d
ee348ff
b5c083a
92a728d
4942dea
dcf79a7
5642f8a
d71fbe6
967bece
493d4a4
48b9b21
ce5929f
503ef5e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -61,7 +61,6 @@ def __len__(self): | |
'soundfile', | ||
'sox', | ||
'audioread', | ||
# 'pydub', # too slow | ||
] | ||
|
||
for lib in libs: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,11 @@ | ||
import matplotlib | ||
matplotlib.use('Agg') | ||
import os | ||
import os.path | ||
import random | ||
import time | ||
import timeit | ||
import argparse | ||
import utils | ||
import loaders | ||
import numpy as np | ||
import functools | ||
|
||
|
||
def get_files(dir, extension): | ||
|
@@ -26,24 +24,31 @@ class AudioFolder(object): | |
def __init__( | ||
self, | ||
root, | ||
download=True, | ||
extension='wav', | ||
lib="librosa", | ||
extension='wav' | ||
): | ||
self.root = os.path.expanduser(root) | ||
self.data = [] | ||
self.audio_files = get_files(dir=self.root, extension=extension) | ||
self.loader_function = getattr(loaders, lib) | ||
|
||
def __getitem__(self, index): | ||
return self.loader_function(self.audio_files[index]) | ||
return self.audio_files[index] | ||
|
||
def __len__(self): | ||
return len(self.audio_files) | ||
|
||
|
||
def test_np_loading(fp, lib): | ||
import loaders | ||
load_function = getattr(loaders, 'load_' + lib) | ||
audio = load_function(fp) | ||
if np.max(audio) > 0: | ||
return True | ||
else: | ||
return False | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
parser = argparse.ArgumentParser(description='Process some integers.') | ||
parser.add_argument('--ext', type=str, default="wav") | ||
args = parser.parse_args() | ||
|
@@ -62,11 +67,10 @@ def __len__(self): | |
libs = [ | ||
'ar_gstreamer', | ||
'ar_ffmpeg', | ||
'ar_mad', | ||
'aubio', | ||
'pydub', | ||
'soundfile', | ||
'librosa', | ||
'soundfile', | ||
'librosa', | ||
'scipy', | ||
'scipy_mmap' | ||
] | ||
|
@@ -75,29 +79,24 @@ def __len__(self): | |
print("Testing: %s" % lib) | ||
for root, dirs, fnames in sorted(os.walk('AUDIO')): | ||
for audio_dir in dirs: | ||
try: | ||
duration = int(audio_dir) | ||
dataset = AudioFolder( | ||
os.path.join(root, audio_dir), | ||
lib='load_' + lib, | ||
extension=args.ext | ||
) | ||
|
||
|
||
start = time.time() | ||
|
||
for fp in dataset.audio_files: | ||
audio = dataset.loader_function(fp) | ||
np.max(audio) | ||
|
||
end = time.time() | ||
store.append( | ||
ext=args.ext, | ||
lib=lib, | ||
duration=duration, | ||
time=float(end-start) / len(dataset), | ||
duration = int(audio_dir) | ||
dataset = AudioFolder( | ||
os.path.join(root, audio_dir), | ||
extension=args.ext | ||
) | ||
|
||
# for fp in dataset.audio_files: | ||
for fp in dataset.audio_files: | ||
time = timeit.timeit( | ||
functools.partial(test_np_loading, fp, lib), | ||
number=10 | ||
Comment on lines
+89
to
+92
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now you're iterating over the files, loading each one 10 times in a row, then storing the time it took to load the last file 10 times. I think you'd want to divide the return value by 10, and accumulate the time over all files? Also you may want to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks Jan. Will have some time this weekend to finish this up |
||
) | ||
except: | ||
continue | ||
|
||
store.df.to_pickle("results/benchmark_%s_%s.pickle" % ("np", args.ext)) | ||
store.append( | ||
ext=args.ext, | ||
lib=lib, | ||
duration=duration, | ||
time=time, | ||
) | ||
|
||
store.df.to_pickle("results/benchmark_%s_%s.pickle" % ("np", args.ext)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@bmcfee do you know a way to ignore measurements when using timeit?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hrm, never thought about it; not seeing an obvious workaround in the timeit API.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
OK so I guess the only alternative then it is to do a dry run in the beginning to just check if functions return of valid output for a given test file...