-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathminmax.cu
61 lines (47 loc) · 1.49 KB
/
minmax.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#ifndef _MINMAX_
#define _MINMAX_
#include<float.h>
float cpu_maxf(float *x, int n){
int i;
float m = -FLT_MIN;
for(i=0; i<n; i++) if (x[i] > m) m = x[i];
return m;
}
__device__ float atomicMaxf(float* address, float val)
{
int *address_as_int =(int*)address;
int old = *address_as_int, assumed;
while (val > __int_as_float(old)) {
assumed = old;
old = atomicCAS(address_as_int, assumed,
__float_as_int(val));
}
return __int_as_float(old);
}
__global__ void max_reduce( float* d_array,
float *d_max,
const size_t elements)
{
//if (threadIdx.x == 0) d_max[blockIdx.x] = 999.;
extern __shared__ float block_max[];
//__shared__ float block_max[blockDim.x];
int tid = threadIdx.x;
int gid = (blockDim.x * blockIdx.x) + tid;
block_max[tid] = -FLT_MAX;
//if (threadIdx.x == 0) d_max[blockIdx.x] = 999;
while (gid < elements) {
block_max[tid] = max(block_max[tid], d_array[gid]);
gid += gridDim.x*blockDim.x;
}
__syncthreads();
//if (threadIdx.x == 0) d_max[blockIdx.x] = 999;
gid = (blockDim.x * blockIdx.x) + tid; // 1
for (unsigned int s=blockDim.x/2; s>0; s>>=1)
{
if (tid < s && gid < elements)
block_max[tid] = max(block_max[tid], block_max[tid + s]);
__syncthreads();
}
if (threadIdx.x == 0) d_max[blockIdx.x] = block_max[0];
}
#endif