Skip to content

Commit

Permalink
Improve alignment rate slightly.
Browse files Browse the repository at this point in the history
  • Loading branch information
“chhylp123” committed Sep 15, 2019
1 parent 800a363 commit 699a931
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 21 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,10 @@ The output file of BitMapperBS must be first sorted into a coordinate-sorted BAM

### Changelog ###

(17) September 15, 2019: version 1.0.2.3 released.

>> Improve the alignment rate slightly.

(16) September 9, 2019: version 1.0.2.2 released.

>> Revise the output option of methylation extraction.
Expand Down
65 changes: 44 additions & 21 deletions Schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ int8_t mat_diff[25] = {
_rg_name_l *_ih_refGenName;
int refChromeCont;

char *versionN = "1.0.2.2";
char *versionN = "1.0.2.3";
long long mappingCnt[MAX_Thread];
unsigned int done;
long long mappedSeqCnt[MAX_Thread];
Expand Down Expand Up @@ -16513,7 +16513,8 @@ int inline process_rest_seed_filter_debug(

if (match_length1 == 0)
{
total_match_length1 = total_match_length1 + match_step;
///total_match_length1 = total_match_length1 + match_step;
total_match_length1 = determine_seed_offset_unmatch(current_read->length, total_match_length1, current_read->seq, match_step);
}
else
{
Expand Down Expand Up @@ -17459,7 +17460,8 @@ int inline process_rest_seed_filter_muti_thread(

if (match_length1 == 0)
{
total_match_length1 = total_match_length1 + match_step;
///total_match_length1 = total_match_length1 + match_step;
total_match_length1 = determine_seed_offset_unmatch((*current_read).length, total_match_length1, (*current_read).seq, match_step);
}
else
{
Expand Down Expand Up @@ -17760,7 +17762,8 @@ int inline process_rest_seed_debug(

if (match_length1 == 0)
{
total_match_length1 = total_match_length1 + match_step;
///total_match_length1 = total_match_length1 + match_step;
total_match_length1 = determine_seed_offset_unmatch(current_read->length, total_match_length1, current_read->seq, match_step);
}
else
{
Expand Down Expand Up @@ -18038,7 +18041,8 @@ int inline process_rest_seed_muti_thread(

if (match_length1 == 0)
{
total_match_length1 = total_match_length1 + match_step;
///total_match_length1 = total_match_length1 + match_step;
total_match_length1 = determine_seed_offset_unmatch((*current_read).length, total_match_length1, (*current_read).seq, match_step);
}
else
{
Expand Down Expand Up @@ -18334,7 +18338,8 @@ inline void get_candidates(

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read->length, total_match_length, current_read->seq, match_step);
}
else
{
Expand Down Expand Up @@ -18478,7 +18483,8 @@ inline void get_candidates(

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read->length, total_match_length, current_read->seq, match_step);
}
else
{
Expand Down Expand Up @@ -19711,7 +19717,11 @@ inline void get_candidates_muti_thread(

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read->length, total_match_length, current_read->seq, match_step);



}
else
{
Expand Down Expand Up @@ -19855,7 +19865,8 @@ inline void get_candidates_muti_thread(

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read->length, total_match_length, current_read->seq, match_step);
}
else
{
Expand Down Expand Up @@ -20632,7 +20643,8 @@ int Map_Pair_Seq_end_to_end(int thread_id)

if (match_length1 == 0)
{
total_match_length1 = total_match_length1 + match_step;
///total_match_length1 = total_match_length1 + match_step;
total_match_length1 = determine_seed_offset_unmatch(current_read1.length, total_match_length1, current_read1.seq, match_step);
}
else
{
Expand Down Expand Up @@ -20836,7 +20848,8 @@ int Map_Pair_Seq_end_to_end(int thread_id)

if (match_length2 == 0)
{
total_match_length2 = total_match_length2 + match_step;
///total_match_length2 = total_match_length2 + match_step;
total_match_length2 = determine_seed_offset_unmatch(current_read2.length, total_match_length2, current_read2.seq, match_step);
}
else
{
Expand Down Expand Up @@ -23141,7 +23154,8 @@ void* Map_Pair_Seq_split(void* arg)

if (match_length1 == 0)
{
total_match_length1 = total_match_length1 + match_step;
///total_match_length1 = total_match_length1 + match_step;
total_match_length1 = determine_seed_offset_unmatch(read_batch1[i].length, total_match_length1, read_batch1[i].seq, match_step);
}
else
{
Expand Down Expand Up @@ -23328,7 +23342,8 @@ void* Map_Pair_Seq_split(void* arg)

if (match_length2 == 0)
{
total_match_length2 = total_match_length2 + match_step;
///total_match_length2 = total_match_length2 + match_step;
total_match_length2 = determine_seed_offset_unmatch(read_batch2[i].length, total_match_length2, read_batch2[i].seq, match_step);
}
else
{
Expand Down Expand Up @@ -24697,7 +24712,8 @@ int Map_Single_Seq_end_to_end(int thread_id)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read.length, total_match_length, current_read.seq, match_step);
///total_match_length = total_match_length + match_step;
}
else
{
Expand Down Expand Up @@ -24855,7 +24871,8 @@ int Map_Single_Seq_end_to_end(int thread_id)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read.length, total_match_length, current_read.seq, match_step);
///total_match_length = total_match_length + match_step;
}
else
{
Expand Down Expand Up @@ -25777,7 +25794,8 @@ int Map_Single_Seq_end_to_end_pbat(int thread_id)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read.length, total_match_length, current_read.seq, match_step);
}
else
{
Expand Down Expand Up @@ -25984,7 +26002,8 @@ int Map_Single_Seq_end_to_end_pbat(int thread_id)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(current_read.length, total_match_length, current_read.seq, match_step);
}
else
{
Expand Down Expand Up @@ -27288,7 +27307,8 @@ void* Map_Single_Seq_split(void* arg)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(read_batch[i].length, total_match_length, read_batch[i].seq, match_step);
}
else
{
Expand Down Expand Up @@ -27480,7 +27500,8 @@ void* Map_Single_Seq_split(void* arg)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(read_batch[i].length, total_match_length, read_batch[i].seq, match_step);
}
else
{
Expand Down Expand Up @@ -28424,7 +28445,8 @@ void* Map_Single_Seq_split_pbat(void* arg)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(read_batch[i].length, total_match_length, read_batch[i].seq, match_step);
}
else
{
Expand Down Expand Up @@ -28614,7 +28636,8 @@ void* Map_Single_Seq_split_pbat(void* arg)

if (match_length == 0)
{
total_match_length = total_match_length + match_step;
///total_match_length = total_match_length + match_step;
total_match_length = determine_seed_offset_unmatch(read_batch[i].length, total_match_length, read_batch[i].seq, match_step);
}
else
{
Expand Down
27 changes: 27 additions & 0 deletions Schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -1503,6 +1503,33 @@ inline void C_to_T(char *Seq, char *bsSeq, int length, int* C_site)
bsSeq[length] = '\0';
}

inline int determine_seed_offset_unmatch(int readLen, int pre_seedOffset, char* read, int match_step)
{
///match_step = match_step / 2;

///if we don't have enough length to start a new seed, terminate seeding directly
if(readLen - pre_seedOffset < 18 || readLen - pre_seedOffset < match_step)
{
return readLen;
}


int return_offset = pre_seedOffset + match_step;

int i = 0;
for (i = 0; i < match_step; i++, pre_seedOffset++)
{
if(read[pre_seedOffset] == 'N')
{
///fprintf(stderr, "readLen: %d, pre_seedOffset: %d, match_step:%d\n", readLen, pre_seedOffset, match_step);
return pre_seedOffset + 1;
}
}

return return_offset;

}


inline void C_to_T_forward(char *Seq, char *bsSeq, int length, int* C_site)
{
Expand Down

0 comments on commit 699a931

Please sign in to comment.