-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmatrix.hpp
169 lines (149 loc) · 4.54 KB
/
matrix.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#pragma once
#include <device_launch_parameters.h>
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include <iostream>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <chrono>
#include <memory>
using namespace std;
//测试次数
#define N 500
//自己的cuda核函数
#define USE_MY_INT
//#define USE_MY_FLOAT
//#define USE_MY_DOUBLE
//cublas的矩阵计算
#define USE_INT8_T
//#define USE_INT8_N
//#define USE_FLOAT_T
//#define USE_FLOAT_N
//#define USE_DOUBLE_T
//#define USE_DOUBLE_N
//ISPC的矩阵计算
#define USE_ISPC_INT
//#define USE_ISPC_FLOAT
//#define USE_ISPC_DOUBLE
//是否开始验证矩阵正确和cpu的计算时间
#define USE_CPU_COST
//矩阵A、B、C的行数列数
int const A_ROW = 512;
int const A_COL = 512;
int const B_ROW = 512;
int const B_COL = 512;
// 用TIMER_START 定义一个变量记录开始的时间
#define TIMER_START(_X) auto _X##_start = std::chrono::system_clock::now(), _X##_stop = _X##_start
// 用TIMER_STOP 定义一个变量记录结束的时间
#define TIMER_STOP(_X) _X##_stop = std::chrono::system_clock::now()
// TIMER_MSEC 定义start到stop经历多少毫秒
#define TIMER_MSEC(_X) (1e-3 * std::chrono::duration_cast<std::chrono::microseconds>(_X##_stop - _X##_start).count())
template <typename T>
//默认打印初始化矩阵数值show为1
void MatrixINIT(int ROW, int COL, T* Matrix)
{
for (int i = 0; i < ROW; i++) {
for (int j = 0; j < COL; j++) {
Matrix[i * COL + j] = (T)(rand() % 100 + 1);
//Matrix[i * COL + j] = (T)(i * COL + j+1);
}
}
}
template <typename T>
//T等0表示不转置,1表示转置。
void Matrixshow(string matrix, int ROW, int COL, T* Matrix, int show = 0, int T_OR_N = 0, string T_ = "None")
{
if (show) {
cout << "矩阵" << matrix << ":" << endl << endl;
for (int i = 0; i < ROW; i++)
{
for (int j = 0; j < COL; j++)
{
if (T_OR_N)
{
if (T_ == "char") {
cout << (int)Matrix[j * ROW + i] << " "; //转置,按行优先顺序读取h_C相当于做了CT的结果
}
else
{
cout << (T)Matrix[j * ROW + i] << " "; //转置,按行优先顺序读取h_C相当于做了CT的结果
}
}
else
{
if (T_ == "char") {
cout << (int)Matrix[i * COL + j] << " ";//不转置,按行读取h_C相当于做了CTT=C的结果
}
else
{
cout << (T)Matrix[i * COL + j] << " ";//不转置,按行读取h_C相当于做了CTT=C的结果
}
}
}
cout << endl;
}
cout << endl;
}
}
template <typename T1, typename T2>
//T等0表示不转置,1表示转置。
void cpu_matrix_mult(T1* h_a, T1* h_b, int m, int n, int k, T2* h_C, T2* h_CC, int T_OR_N = 0)
{
T2 t;
TIMER_START(_X);
//同样计算500次
for (int kk = 0; kk < N; kk++)
{
memset(h_CC, 0, sizeof(T2) * m * n);
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < k; ++j)
{
T2 temp = 0.0f;
T2 comp = 0.0f;
for (int h = 0; h < n; ++h)
{
comp -= h_a[i * n + h] * h_b[h * k + j];
t = temp - comp;
comp = (t - temp) + comp;
temp = t;
}
h_CC[i * k + j] = temp;
}
}
}
TIMER_STOP(_X);
cout << "CPU耗费了: " << TIMER_MSEC(_X) << " ms " << "\n";
bool ok = 1;
for (int i = 0; i < m; ++i)
{
for (int j = 0; j < k; ++j)
{
if (T_OR_N)
{
if (fabs(h_CC[i * k + j] - h_C[j * m + i]) > (1.0e-10))
{
ok = 0;
cout << h_CC[i * k + j] << " - " << h_C[j * m + i] << " = " << h_CC[i * k + j] - h_C[j * m + i] << "\n";
}
}
else
{
if (fabs(h_CC[i * k + j] - h_C[i * k + j]) > (1.0e-10))
{
ok = 0;
cout << h_CC[i * k + j] << " - " << h_C[i * k + j] << " = " << h_CC[i * k + j] - h_C[i * k + j] << "\n";
}
}
}
}
if (ok)
{
cout << "Pass!!!\n \n";
}
else
{
cout << "Error!!!\n \n";
}
}