From 73dd5eeb5beb24a388a44ac71a3237eef1de82b4 Mon Sep 17 00:00:00 2001 From: chhylp123 Date: Sun, 28 Apr 2024 15:11:21 -0400 Subject: [PATCH] gen_telo_end_t --- CommandLines.h | 2 +- Overlaps.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 1 deletion(-) diff --git a/CommandLines.h b/CommandLines.h index 9591e45..bf10db1 100644 --- a/CommandLines.h +++ b/CommandLines.h @@ -5,7 +5,7 @@ #include #include -#define HA_VERSION "0.19.9-r605" +#define HA_VERSION "0.19.9-r606" #define VERBOSE 0 diff --git a/Overlaps.cpp b/Overlaps.cpp index a7380b9..cecf95b 100644 --- a/Overlaps.cpp +++ b/Overlaps.cpp @@ -18,7 +18,9 @@ #include "inter.h" #include "gfa_ut.h" #include "assert.h" +#include "khash.h" +KHASH_SET_INIT_INT64(64) uint32_t debug_purge_dup = 0; @@ -167,6 +169,20 @@ typedef struct { ma_ug_t *ctg; } kvect_sec_t; + +typedef struct { + uint64_t n, mask; + uint8_t *hh; + uint64_t tlen, tm; +} telo_end_t; + +typedef struct { + All_reads *Rinf; + UC_Read *aux; + telo_end_t *u; + khash_t(64) *h; +} telo_end_pip_t; + ///this value has been updated at the first line of build_string_graph_without_clean long long min_thres; @@ -177,6 +193,36 @@ kv_u_trans_t *get_utg_ovlp(ma_ug_t **ug, asg_t* read_g, ma_hit_t_alloc* sources, R_to_U* ruIndex, int max_hang, int min_ovlp, kvec_asg_arc_t_warp* new_rtg_edges, bub_label_t* b_mask_t, uint8_t* r_het); void delete_useless_nodes(ma_ug_t **ug); +telo_end_t* gen_telo_end_t(All_reads *in, const char* motif, uint64_t motif_len, uint64_t n_thread) +{ + uint64_t j, k, c, x; int absent; + telo_end_t* p = NULL; CALLOC(p, 1); + p->tlen = strlen(motif); p->mask = (1ULL<<(p->tlen<<1)) - 1; + + for (j = 0, p->tm = 0; j < p->tlen; ++j) { + c = seq_nt6_table[(uint8_t)motif[j]]; + assert(c >= 0 && c <= 3); + p->tm = (p->tm<<2)|(c); + } + p->n = in->total_reads; CALLOC(p->hh, p->n); + + telo_end_pip_t *aux; CALLOC(aux, 1); + aux->Rinf = in; aux->u = p; CALLOC(aux->aux, n_thread); + for (k = 0; k < n_thread; k++) init_UC_Read(&(aux->aux[k])); + aux->h = kh_init(64); // hash table for all roations of the telomere motif + kh_resize(64, aux->h, (p->tlen*2)); + for (k = 0, x = p->tm; k < p->tlen; k++) { + kh_put(64, aux->h, x, &absent); + x = (((x>>((p->tlen-1)<<1))&(3ULL))|(x<<2))&p->mask; + } + assert(x == p->tm); + + + + + return p; +} + void init_bub_label_t(bub_label_t* x, uint32_t n_thres, uint32_t n_reads) { uint32_t i;