-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun.py
174 lines (158 loc) · 7.99 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import os
import argparse
import subprocess
import time
class Benchmark:
def __init__(self, working_dir, name, program,
input_points_path,
num_of_iterations,
num_of_clusters,
num_of_threads,
output_dir,
benchmark_csv_path):
self.working_dir = working_dir
self.name = name
self.program = program
self.input_points_path = input_points_path
self.num_of_iterations = num_of_iterations
self.num_of_clusters = num_of_clusters
self.num_of_threads = num_of_threads
self.output_dir = output_dir
self.benchmark_csv_path = benchmark_csv_path
def run(self):
previous_working_directory = os.getcwd()
os.chdir(self.working_dir)
print(self.input_points_path)
subprocess.run([self.program,
'--input_points_path', self.input_points_path,
'--num_of_iterations', str(self.num_of_iterations),
'--num_of_clusters', str(self.num_of_clusters),
'--num_of_threads', str(self.num_of_threads),
'--output_dir', self.output_dir,
'--benchmark_output_csv', self.benchmark_csv_path])
os.chdir(previous_working_directory)
if __name__ == '__main__':
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument("--num_of_iterations", type=int, default=32)
arg_parser.add_argument("--num_of_clusters", type=int, default=12)
arg_parser.add_argument("--num_of_threads", type=int, default=16)
arg_parser.add_argument("--generate_data", type=bool, default=False)
arg_parser.add_argument("--benchmark_name", type=str, default='benchmark')
arg_parser.add_argument("--output_path", type=str, default='.')
arg_parser.add_argument("--append", type=bool, default=False)
args = arg_parser.parse_args()
num_of_iterations = args.num_of_iterations
num_of_clusters = args.num_of_clusters
num_of_threads = args.num_of_threads
generate_data = args.generate_data
benchmark_name = args.benchmark_name
output_path = args.output_path
append_to_previous_benchmark = args.append
dirs_to_make = ['data',
'data/clusters',
'data/benchmarks',
'data/labels',
'data/output',
'data/output/cpp_parallel',
'data/output/cpp_sequential',
'data/output/python_sklearn']
print('Creating directories')
for dir_to_make in dirs_to_make:
try:
os.makedirs(os.path.join(output_path, dir_to_make))
except FileExistsError as e:
print(dir_to_make, " already exists")
benchmark_output_csv = os.path.join('data/benchmarks', benchmark_name + ".csv")
mode = 'a' if append_to_previous_benchmark else 'w'
with open(benchmark_output_csv, mode) as f:
if not append_to_previous_benchmark:
f.write("file,type,num_of_samples,num_of_features,num_of_iterations,num_of_clusters,num_of_threads,"
"load_csv,fit,save_to_csv\n")
f.close()
print("num_of_iterations: ", num_of_iterations)
print("num_of_clusters: ", num_of_clusters)
print("num_of_threads: ", num_of_threads)
os.chdir(os.path.dirname(__file__))
print('Generating cmake...')
subprocess.run(['cmake',
'-DCMAKE_BUILD_TYPE=Release',
'-DCMAKE_DEPENDS_USE_COMPILER=FALSE',
'-GUnix Makefiles',
'-S .',
'-B cmake-build-release/'])
os.chdir('cmake-build-release/')
print('Building binaries...')
subprocess.run(['make', 'k_means_cpp_cpu_parallel'])
subprocess.run(['make', 'k_means_cpp_cpu_sequential'])
subprocess.run(['make', 'k_means_taskflow_cpu_parallel'])
os.chdir('..')
data_path = 'data/clusters/'
data_files = os.listdir(data_path)
data_files.sort()
os.chdir('bin/')
print('Benchmarking cpu sequential...')
start = time.time()
for csv_file in data_files:
path_to_input_points_from_exe_working_dir = str(os.path.join('..', data_path, csv_file))
print(path_to_input_points_from_exe_working_dir)
subprocess.run(['./k_means_cpp_cpu_sequential',
'--input_points_path', path_to_input_points_from_exe_working_dir,
'--num_of_iterations', str(num_of_iterations),
'--num_of_clusters', str(num_of_clusters),
'--output_dir', '../data/output/cpp_sequential/',
'--benchmark_output_csv', str(os.path.join('../data/benchmarks/', benchmark_name + ".csv"))])
print('Total elapsed time for cpu sequential: ', time.time() - start)
print('Benchmarking taskflow cpu parallel...')
start = time.time()
for csv_file in data_files:
path_to_input_points_from_exe_working_dir = str(os.path.join('..', data_path, csv_file))
print(path_to_input_points_from_exe_working_dir)
subprocess.run(['./k_means_taskflow_cpu_parallel',
'--input_points_path', path_to_input_points_from_exe_working_dir,
'--num_of_iterations', str(num_of_iterations),
'--num_of_clusters', str(num_of_clusters),
'--num_of_threads', str(num_of_threads),
'--output_dir', '../data/output/taskflow_cpu/',
'--benchmark_output_csv', str(os.path.join('../data/benchmarks/', benchmark_name + ".csv"))])
print('Total elapsed time for taskflow: ', time.time() - start)
print('Benchmarking cpu parallel...')
start = time.time()
for csv_file in data_files:
path_to_input_points_from_exe_working_dir = str(os.path.join('..', data_path, csv_file))
print(path_to_input_points_from_exe_working_dir)
subprocess.run(['./k_means_cpp_cpu_parallel',
'--input_points_path', path_to_input_points_from_exe_working_dir,
'--num_of_iterations', str(num_of_iterations),
'--num_of_clusters', str(num_of_clusters),
'--num_of_threads', str(num_of_threads),
'--output_dir', '../data/output/cpp_parallel/',
'--benchmark_output_csv', '../data/benchmarks/' + benchmark_name + ".csv"])
print('Total elapsed time for cpu parallel: ', time.time() - start)
print('Benchmarking python sklearn...')
os.chdir('../k_means_python_sklearn')
start = time.time()
for csv_file in data_files:
path_to_input_points_from_exe_working_dir = str(os.path.join('..', data_path, csv_file))
print(path_to_input_points_from_exe_working_dir)
subprocess.run(['python3.9', "k_means_sklearn.py",
'--input_points_path', path_to_input_points_from_exe_working_dir,
'--num_of_iterations', str(num_of_iterations),
'--num_of_clusters', str(num_of_clusters),
'--output_dir', '../data/output/python_sklearn/',
'--benchmark_output_csv', str(os.path.join('../data/benchmarks/', benchmark_name + ".csv"))])
print('Total elapsed time for python sklearn: ', time.time() - start)
#
# benchmarks = [
# Benchmark(working_dir='bin/', name='cpp_sequential', program='./k_means_cpp_cpu_sequential',
# num_of_iterations=num_of_iterations, num_of_clusters=num_of_clusters, num_of_threads=num_of_threads,
# output_dir='../data/output/cpp_sequential', )
# ]
# for benchmark in benchmarks:
# print('Benchmarking ', benchmark.name, ' ...')
# start = time.time()
# for csv_file in data_files:
# benchmark.input_points_path = str(os.path.join('..', data_path, csv_file))
# benchmark.run()
# print('Total elapsed time for ', benchmark.name, ' ', time.time() - start)
#
# print('Done')