Skip to content

Commit

Permalink
exclusive scan avoid reading shared mem from multiple threads at the …
Browse files Browse the repository at this point in the history
…same time
  • Loading branch information
ryichando committed Jan 12, 2025
1 parent 8df4262 commit ccd9fba
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion src/cpp/csrmat/csrmat.cu
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,10 @@ __global__ void block_scan_kernel(unsigned *d_data, unsigned *d_block_sums,
unsigned idx = blockIdx.x * blockDim.x + threadIdx.x;
temp[tid] = (idx < n) ? d_data[idx] : 0;
__syncthreads();
unsigned last_element = temp[blockDim.x - 1];
unsigned last_element = 0;
if (tid == 0 && d_block_sums != nullptr) {
last_element = temp[blockDim.x - 1];
}
__syncthreads();
for (int offset = 1; offset < blockDim.x; offset *= 2) {
int index = (tid + 1) * offset * 2 - 1;
Expand Down

0 comments on commit ccd9fba

Please sign in to comment.