Skip to content

Commit

Permalink
r191: added -S to reject overlap between strands
Browse files Browse the repository at this point in the history
  • Loading branch information
lh3 committed Jan 10, 2024
1 parent ed8ae62 commit 1238a53
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 13 deletions.
8 changes: 4 additions & 4 deletions graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

void pg_post_process(const pg_opt_t *opt, pg_data_t *d)
{
int32_t i, j;
int32_t i, j, check_strand = !!(opt->flag&PG_F_CHECK_STRAND);
if (pg_verbose >= 3)
fprintf(stderr, "[M::%s::%s] %d genes and %d proteins\n", __func__, pg_timestamp(), d->n_gene, d->n_prot);
pg_cap_score_dom(d);
Expand All @@ -22,7 +22,7 @@ void pg_post_process(const pg_opt_t *opt, pg_data_t *d)
int32_t n_shadow, tot;
for (i = 0, tot = 0; i < g->n_hit; ++i)
if (!g->hit[i].flt) ++tot;
n_shadow = pg_shadow(opt, d, j, 0);
n_shadow = pg_shadow(opt, d, j, 0, check_strand);
fprintf(stderr, "[M::%s::%s] genome[%d]: %s; %d hits remain, of which %d are shadowed\n",
__func__, pg_timestamp(), j, g->label, tot, n_shadow);
}
Expand Down Expand Up @@ -86,7 +86,7 @@ static inline int32_t pg_get_score(const pg_graph_t *q, const pg_hit_t *a, int32

void pg_gen_arc(const pg_opt_t *opt, pg_graph_t *q)
{
int32_t j, i, i0, *seg_cnt = 0, use_ori = !!(opt->flag&PG_F_ORI_FOR_BRANCH);
int32_t j, i, i0, *seg_cnt = 0, use_ori = !!(opt->flag&PG_F_ORI_FOR_BRANCH), check_strand = !!(opt->flag&PG_F_CHECK_STRAND);
int64_t n_arc = 0, m_arc = 0, n_arc1 = 0, m_arc1 = 0;
pg_tmparc_t *p, *arc = 0, *arc1 = 0;

Expand All @@ -99,7 +99,7 @@ void pg_gen_arc(const pg_opt_t *opt, pg_graph_t *q)
uint32_t w, v = (uint32_t)-1;
int64_t vpos = -1;
int32_t vcid = -1, si = -1;
pg_shadow(opt, q->d, j, 0);
pg_shadow(opt, q->d, j, 0, check_strand);
pg_hit_sort(g, 1); // sort by pg_hit_t::cm
n_arc1 = 0;
memset(seg_cnt, 0, q->n_seg * sizeof(int32_t));
Expand Down
3 changes: 2 additions & 1 deletion main.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ int main(int argc, char *argv[])
pg_data_t *d;

pg_opt_init(&opt);
while ((c = ketopt(&o, argc, argv, 1, "d:e:l:f:g:p:b:B:y:Fr:c:a:wv:GD:C:T:X:I:P:m:JO", long_options)) >= 0) {
while ((c = ketopt(&o, argc, argv, 1, "d:e:l:f:g:p:b:B:y:Fr:c:a:wv:GD:C:T:X:I:P:m:JOS", long_options)) >= 0) {
// input options
if (c == 'd') opt.gene_delim = *o.arg;
else if (c == 'X') opt.excl = pg_read_list_dict(o.arg);
Expand All @@ -90,6 +90,7 @@ int main(int argc, char *argv[])
else if (c == 'F') opt.flag |= PG_F_FRAG_MODE;
else if (c == 'D') opt.local_dist = pg_parse_num(o.arg);
else if (c == 'C') opt.local_count = atoi(o.arg);
else if (c == 'S') opt.flag |= PG_F_CHECK_STRAND;
// output options
else if (c == 'w') opt.flag |= PG_F_WRITE_NO_WALK;
else if (c == 'G') opt.flag |= PG_F_WRITE_VTX_SEL;
Expand Down
6 changes: 4 additions & 2 deletions overlap.c
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ static inline int32_t pg_cds_len(const pg_hit_t *a, const pg_exon_t *e)
*/

// select among overlapping isoforms of the same gene
int32_t pg_flt_ov_isoform(const pg_opt_t *opt, pg_data_t *d, int32_t id)
int32_t pg_flt_ov_isoform(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t check_strand)
{
const pg_prot_t *prot = d->prot;
pg_genome_t *g = &d->genome[id];
Expand All @@ -75,6 +75,7 @@ int32_t pg_flt_ov_isoform(const pg_opt_t *opt, pg_data_t *d, int32_t id)
if (aj->flt || aj->ce <= ai->cs) continue; // no overlap
gj = prot[aj->pid].gid;
if (gi != gj) continue; // ignore isoforms from different genes
if (check_strand && ai->rev != aj->rev) continue;
hj = pg_hash_uint32(aj->pid);
x = pg_hit_overlap(g, aj, ai);
if (x>>32 == 0) continue; // no overlap on CDS
Expand All @@ -97,7 +98,7 @@ typedef struct {
} shadow_aux_t;

// test overlap between same or different genes
int32_t pg_shadow(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t cal_dom_sc)
int32_t pg_shadow(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t cal_dom_sc, int32_t check_strand)
{
const pg_prot_t *prot = d->prot;
pg_genome_t *g = &d->genome[id];
Expand All @@ -124,6 +125,7 @@ int32_t pg_shadow(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t cal_dom
pg_hit_t *aj = &g->hit[j];
if (aj->ce <= ai->cs) continue; // no overlap
if (aj->flt) continue;
if (check_strand && ai->rev != aj->rev) continue;
gj = prot[aj->pid].gid;
hj = pg_hash_uint32(aj->pid);
x = pg_hit_overlap(g, aj, ai);
Expand Down
3 changes: 2 additions & 1 deletion pangene.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

#include <stdint.h>

#define PG_VERSION "1.0-r186-dirty"
#define PG_VERSION "1.0-r191-dirty"

#define PG_F_WRITE_BED_RAW 0x1
#define PG_F_WRITE_BED_WALK 0x2
Expand All @@ -13,6 +13,7 @@
#define PG_F_FRAG_MODE 0x20
#define PG_F_NO_JOINT_PSEUDO 0x40
#define PG_F_ORI_FOR_BRANCH 0x80
#define PG_F_CHECK_STRAND 0x100

typedef struct {
uint64_t x, y;
Expand Down
4 changes: 2 additions & 2 deletions pgpriv.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ void pg_gen_vtx(const pg_opt_t *opt, pg_graph_t *q);
int32_t pg_mark_branch_flt_arc(const pg_opt_t *opt, pg_graph_t *q);
int32_t pg_mark_branch_flt_hit(const pg_opt_t *opt, pg_graph_t *q);

int32_t pg_flt_ov_isoform(const pg_opt_t *opt, pg_data_t *d, int32_t id);
int32_t pg_shadow(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t cal_dom_sc);
int32_t pg_flt_ov_isoform(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t check_stand);
int32_t pg_shadow(const pg_opt_t *opt, pg_data_t *d, int32_t id, int32_t cal_dom_sc, int32_t check_strand);

char *pg_strdup(const char *src);

Expand Down
6 changes: 3 additions & 3 deletions read.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ int32_t pg_read_paf(const pg_opt_t *opt, pg_data_t *d, const char *fn)
gzFile fp;
kstream_t *ks;
kstring_t str = {0,0,0};
int32_t dret, absent, n_tot = 0;
int32_t dret, absent, n_tot = 0, check_strand = !!(opt->flag&PG_F_CHECK_STRAND);
void *d_ctg, *hit_rank;
pg_genome_t *g;
pg_exons_t buf = {0,0,0};
Expand Down Expand Up @@ -245,13 +245,13 @@ int32_t pg_read_paf(const pg_opt_t *opt, pg_data_t *d, const char *fn)
n_pseudo = pg_flag_pseudo(d->prot, g);
PG_SET_FILTER(d, pseudo == 1);
pg_hit_sort(g, 0);
pg_shadow(opt, d, d->n_genome - 1, 1);
pg_shadow(opt, d, d->n_genome - 1, 1, check_strand);
for (i = 0; i < g->n_hit; ++i) {
pg_hit_t *a = &g->hit[i];
a->pid_dom0 = a->pid_dom;
a->pid_dom = -1, a->shadow = 0; // reset
}
n_flt_ov_iso = pg_flt_ov_isoform(opt, d, d->n_genome - 1);
n_flt_ov_iso = pg_flt_ov_isoform(opt, d, d->n_genome - 1, check_strand);
n_flt_chain = pg_flt_chain_shadow(d->prot, d->n_prot, g);
n_flt_subopt = pg_flt_subopt_isoform(d->prot, d->n_gene, g);
if (pg_verbose >= 3)
Expand Down

0 comments on commit 1238a53

Please sign in to comment.