-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy pathseed_pos_table.cu
109 lines (79 loc) · 3.56 KB
/
seed_pos_table.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#include "seed_filter_interface.h"
#include "cuda_utils.h"
#include "tbb/parallel_sort.h"
#include "ntcoding.h"
#include "store_gpu.h"
#include <thrust/scan.h>
void InclusivePrefixScan (uint32_t* data, uint32_t len) {
int g;
{
std::unique_lock<std::mutex> locker(mu);
if (available_gpus.empty()) {
cv.wait(locker, [](){return !available_gpus.empty();});
}
g = available_gpus.back();
available_gpus.pop_back();
locker.unlock();
check_cuda_setDevice(g, "InclusivePrefixScan");
}
thrust::inclusive_scan(thrust::host, data, data + len, data);
{
std::unique_lock<std::mutex> locker(mu);
available_gpus.push_back(g);
locker.unlock();
cv.notify_one();
}
}
void SendSeedPosTable (uint32_t* index_table, uint32_t index_table_size, uint32_t* pos_table, uint32_t num_index){
for(int g = 0; g < NUM_DEVICES; g++){
check_cuda_setDevice(g, "SendSeedPosTable");
check_cuda_malloc((void**)&d_index_table[g], index_table_size*sizeof(uint32_t), "index_table");
check_cuda_memcpy((void*)d_index_table[g], (void*)index_table, index_table_size*sizeof(uint32_t), cudaMemcpyHostToDevice, "index_table");
check_cuda_malloc((void**)&d_pos_table[g], num_index*sizeof(uint32_t), "pos_table");
check_cuda_memcpy((void*)d_pos_table[g], (void*)pos_table, num_index*sizeof(uint32_t), cudaMemcpyHostToDevice, "pos_table");
}
}
void GenerateSeedPosTable(char* ref_str, size_t start_addr, uint32_t ref_length, uint32_t step, int shape_size, int kmer_size) {
assert(kmer_size <= 15);
assert(kmer_size > 3);
uint32_t *index_table;
uint32_t *pos_table;
uint32_t index_table_size;
uint32_t offset = (shape_size+1)%step;
uint32_t start_offset = step - offset;
index_table_size = ((uint32_t)1 << 2*kmer_size) + 1;
index_table = (uint32_t*) calloc(index_table_size, sizeof(uint32_t));
uint32_t num_steps = (ref_length - shape_size + offset) / step;
uint32_t* tmp_index_arr = (uint32_t*) malloc(num_steps * sizeof(uint32_t));
uint32_t* tmp_off_arr = (uint32_t*) malloc(num_steps * sizeof(uint32_t));
tbb::parallel_for( tbb::blocked_range<uint32_t>(0, num_steps, GRAIN_SIZE),
[&](tbb::blocked_range<uint32_t> r){
for (uint32_t i=r.begin(); i<r.end(); ++i){
uint32_t index = GetKmerIndexAtPos(ref_str, start_addr+start_offset+(i*step), shape_size);
tmp_index_arr[i] = index;
// valid index
if (index != (uint32_t) INVALID_KMER) {
tmp_off_arr[i] = __sync_fetch_and_add( &index_table[index+1], 1);
}
}
});
InclusivePrefixScan(index_table, index_table_size);
uint32_t num_index = index_table[index_table_size-1];
pos_table = (uint32_t*) malloc(num_index * sizeof(uint32_t));
tbb::parallel_for( tbb::blocked_range<uint32_t>(0, num_steps, GRAIN_SIZE),
[&](tbb::blocked_range<uint32_t> r){
for (uint32_t i=r.begin(); i<r.end(); ++i){
uint32_t index = tmp_index_arr[i];
// valid index
if (index != (uint32_t) INVALID_KMER) {
uint32_t curr_idx = index_table[index] + tmp_off_arr[i];
pos_table[curr_idx] = start_offset+(i*step);
}
}
});
SendSeedPosTable(index_table+1, index_table_size-1, pos_table, num_index);
free(tmp_index_arr);
free(tmp_off_arr);
free(index_table);
free(pos_table);
}