-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcalculateOffloadThreshold.py
204 lines (184 loc) · 7.67 KB
/
calculateOffloadThreshold.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
import os
import sys
import csv
from enum import Enum
from texttable import Texttable
#### Supported Kernel enum
class kernels(Enum):
SGEMM = 1
DGEMM = 2
SGEMV = 3
DGEMV = 4
NONE = 0
#### Function to convert from string to kernels ENUM
def strToKernel(kernel:str) -> kernels:
if kernel == "sgemm":
return kernels.SGEMM
if kernel == "dgemm":
return kernels.DGEMM
if kernel == "sgemv":
return kernels.SGEMV
if kernel == "dgemv":
return kernels.DGEMV
return kernels.NONE
#### Supported Kernels table headers
gemmHeaders = ["Device", "M", "N", "K", "Total Prob. Size (KiB)", "GFLOP/s", "CPU GFLOP/s"]
gemvHeaders = ["Device", "M", "N", "Total Prob. Size (KiB)", "GFLOP/s", "CPU GFLOP/s"]
#### Offload threshold class
class offloadThreshold:
def __init__(self, kernel:kernels) -> None:
self.cpuGflops = 0.0
self.gpuGflops = 0.0
self.totalKib = 0.0
self.M = 0
self.N = 0
self.K = 0
#### Function to print table to console
def printResults(once:offloadThreshold, always:offloadThreshold, unified:offloadThreshold, kernel:kernels):
rows = []
if(kernel == kernels.DGEMM or kernel == kernels.SGEMM):
rows.append(gemmHeaders)
rows.append(["GPU (Offload Once)", once.M, once.N, once.K, round(once.totalKib, 3), round(once.gpuGflops, 3), round(once.cpuGflops, 3)])
rows.append(["GPU (Offload Always)", always.M, always.N, always.K, round(always.totalKib, 3), round(always.gpuGflops, 3), round(always.cpuGflops, 3)])
rows.append(["GPU (Unified Memory)", unified.M, unified.N, unified.K, round(unified.totalKib, 3), round(unified.gpuGflops, 3), round(unified.cpuGflops, 3)])
elif(kernel == kernels.DGEMV or kernel == kernels.SGEMV):
rows.append(gemvHeaders)
rows.append(["GPU (Offload Once)", once.M, once.N, round(once.totalKib, 3), round(once.gpuGflops, 3), round(once.cpuGflops, 3)])
rows.append(["GPU (Offload Always)", always.M, always.N, round(always.totalKib, 3), round(always.gpuGflops, 3), round(always.cpuGflops, 3)])
rows.append(["GPU (Unified Memory)", unified.M, unified.N, round(unified.totalKib, 3), round(unified.gpuGflops, 3), round(unified.cpuGflops, 3)])
else:
exit(1)
table = Texttable()
table.add_rows(rows)
print(table.draw())
#########################################################################################
cpuCSV_path = ""
gpuCSV_path = ""
# Define offload threshold classes
gpuOnce = offloadThreshold(kernels.NONE)
gpuAlways = offloadThreshold(kernels.NONE)
gpuUnified = offloadThreshold(kernels.NONE)
# Check both filenames have been passed in
if(len(sys.argv) == 3):
cpuCSV_path = sys.argv[1]
gpuCSV_path = sys.argv[2]
else:
print("ERROR - Must provide the CPU and GPU CSV file paths:")
print("\t\tpython calculateOffloadThreshold.py path/to/cpu.csv path/to/gpu.csv")
exit(1)
# Check both CSV files exist
if not os.path.isfile(cpuCSV_path):
print("ERROR - CPU CSV file \"{}\" cannot be found.".format(cpuCSV_path))
exit(1)
if not os.path.isfile(gpuCSV_path):
print("ERROR - GPU CSV file \"{}\" cannot be found.".format(gpuCSV_path))
exit(1)
# Open files
cpuFile = open(cpuCSV_path, 'r')
gpuFile = open(gpuCSV_path, 'r')
### For evey 1 CPU entry, there should be 3 GPU entries
### CSV header format ==== Device,Kernel,M,N,K,Total Problem Size (KiB),Iterations,Total Seconds,GFLOP/s
cpuLines = cpuFile.readlines()
gpuLines = gpuFile.readlines()
# Remove first line of file - header names
cpuLines.pop(0)
gpuLines.pop(0)
# Check that there are 3x as many GPU lines as CPU lines
if(len(cpuLines) * 3 != len(gpuLines)):
print("ERROR - Number of GPU records does not match number of CPU records (There should be 3 GPU entries for each CPU entry).")
exit(1)
# Go through all entries and find offload threshold
kernel = ""
prevGpuOgflops = 0.0
prevGpuAgflops = 0.0
prevGpuUgflops = 0.0
for cpu in cpuLines:
cpu = cpu.split(',')
gpuO = gpuLines.pop(0).split(',')
gpuA = gpuLines.pop(0).split(',')
gpuU = gpuLines.pop(0).split(',')
# Make sure entries are correct
if(cpu[0] != "cpu"):
print("ERROR - Non-CPU entry in CPU CSV file.")
exit(1)
if(gpuO[0] != "gpu_offloadOnce" or
gpuA[0] != "gpu_offloadAlways" or
gpuU[0] != "gpu_unified"):
print("ERROR - Non-GPU entry in GPU CSV file.")
exit(1)
# Make sure kernels are the same
if(cpu[1] != gpuO[1] or cpu[1] != gpuA[1] or cpu[1] != gpuU[1]):
print("ERROR - kernel mismatch.")
exit(1)
else:
kernel = cpu[1]
# Make sure all M values match
if(cpu[2] != gpuO[2] or cpu[2] != gpuA[2] or cpu[2] != gpuU[2]):
print("ERROR - values of M mismatch.")
exit(1)
# Make sure all N values match
if(cpu[3] != gpuO[3] or cpu[3] != gpuA[3] or cpu[3] != gpuU[3]):
print("ERROR - values of N mismatch.")
exit(1)
# Make sure all K values match
if(cpu[4] != gpuO[4] or cpu[4] != gpuA[4] or cpu[4] != gpuU[4]):
print("ERROR - values of K mismatch.")
exit(1)
# Make sure iteration count match
if(cpu[6] != gpuO[6] or cpu[6] != gpuA[6] or cpu[6] != gpuU[6]):
print("ERROR - number of iterations mismatch.")
exit(1)
# Check if offload structures should be reset (CPU.gflops >= GPU.gflops)
if(gpuOnce.M != 0 and float(cpu[8]) >= float(gpuO[8])):
# Do check to see if this is a momentary drop that we should ignore
if (prevGpuOgflops <= float(cpu[8])) and (float(gpuLines[0].split(',')[8]) <= float(cpu[8])):
gpuOnce.cpuGflops = 0.0
gpuOnce.gpuGflops = 0.0
gpuOnce.totalKib = 0.0
gpuOnce.M = 0
gpuOnce.N = 0
gpuOnce.K = 0
if(gpuAlways.M != 0 and float(cpu[8]) >= float(gpuA[8])):
# Do check to see if this is a momentary drop that we should ignore
if (prevGpuAgflops <= float(cpu[8])) and (float(gpuLines[1].split(',')[8]) <= float(cpu[8])):
gpuAlways.cpuGflops = 0.0
gpuAlways.gpuGflops = 0.0
gpuAlways.totalKib = 0.0
gpuAlways.M = 0
gpuAlways.N = 0
gpuAlways.K = 0
if(gpuUnified.M != 0 and float(cpu[8]) >= float(gpuU[8])):
# Do check to see if this is a momentary drop that we should ignore
if (prevGpuUgflops <= float(cpu[8])) and (float(gpuLines[2].split(',')[8]) <= float(cpu[8])):
gpuUnified.cpuGflops = 0.0
gpuUnified.gpuGflops = 0.0
gpuUnified.totalKib = 0.0
gpuUnified.M = 0
gpuUnified.N = 0
gpuUnified.K = 0
# Update offload threshold if GPU.gflops > CPU.gflops
if(gpuOnce.M == 0 and float(cpu[8]) < float(gpuO[8])):
gpuOnce.cpuGflops = float(cpu[8])
gpuOnce.gpuGflops = float(gpuO[8])
gpuOnce.totalKib = float(gpuO[5])
gpuOnce.M = int(gpuO[2])
gpuOnce.N = int(gpuO[3])
gpuOnce.K = int(gpuO[4])
if(gpuAlways.M == 0 and float(cpu[8]) < float(gpuA[8])):
gpuAlways.cpuGflops = float(cpu[8])
gpuAlways.gpuGflops = float(gpuA[8])
gpuAlways.totalKib = float(gpuA[5])
gpuAlways.M = int(gpuA[2])
gpuAlways.N = int(gpuA[3])
gpuAlways.K = int(gpuA[4])
if(gpuUnified.M == 0 and float(cpu[8]) < float(gpuU[8])):
gpuUnified.cpuGflops = float(cpu[8])
gpuUnified.gpuGflops = float(gpuU[8])
gpuUnified.totalKib = float(gpuU[5])
gpuUnified.M = int(gpuU[2])
gpuUnified.N = int(gpuU[3])
gpuUnified.K = int(gpuU[4])
prevGpuAgflops = float(gpuA[8])
prevGpuOgflops = float(gpuO[8])
prevGpuUgflops = float(gpuU[8])
printResults(gpuOnce, gpuAlways, gpuUnified, strToKernel(kernel))