Skip to content

Commit

Permalink
Release 1.16
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Aug 18, 2022
2 parents c28190b + bad8b54 commit e7f638b
Show file tree
Hide file tree
Showing 86 changed files with 8,269 additions and 7,358 deletions.
10 changes: 5 additions & 5 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ gcc_task:
ubuntu_task:
name: ubuntu-clang
container:
image: ubuntu:devel
image: ubuntu:latest
cpu: 2
memory: 1G

Expand All @@ -103,7 +103,7 @@ ubuntu_task:
apt-get install -y --no-install-suggests --no-install-recommends \
ca-certificates clang git autoconf automake \
make zlib1g-dev libbz2-dev liblzma-dev libcurl4-gnutls-dev \
libssl-dev libdeflate-dev libncurses5-dev
libssl-dev libdeflate-dev
<< : *COMPILE
<< : *TEST
Expand All @@ -113,7 +113,7 @@ ubuntu_task:
rockylinux_task:
name: rockylinux-gcc
container:
image: rockylinux:latest
image: rockylinux:9
cpu: 2
memory: 1G

Expand All @@ -126,8 +126,8 @@ rockylinux_task:
# NB: we could consider building a docker image with these
# preinstalled and specifying that instead, to speed up testing.
install_script: |
yum install -y autoconf automake make gcc perl-Data-Dumper zlib-devel \
bzip2 bzip2-devel xz-devel curl-devel openssl-devel ncurses-devel \
yum install -y autoconf automake make gcc perl-Data-Dumper perl-FindBin \
zlib-devel bzip2 bzip2-devel xz-devel curl-devel openssl-devel \
git diffutils
<< : *COMPILE
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.15.1
PACKAGE_VERSION = 1.16

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down
48 changes: 47 additions & 1 deletion NEWS
Original file line number Diff line number Diff line change
@@ -1,3 +1,50 @@
## Release 1.16 (18th August 2022)



* New plugin `bcftools +variant-distance` to annotate records with distance to the
nearest variant (#1690)


Changes affecting the whole of bcftools, or multiple commands:

* The -i/-e filtering expressions

- Added support for querying of multiple filters, for example `-i 'FILTER="A;B"'`
can be used to select sites with two filters "A" and "B" set. See the documentation
for more examples.

- Added modulo arithmetic operator

Changes affecting specific commands:

* bcftools annotate

- A bug introduced in 1.14 caused that records with INFO/END annotation would
incorrectly trigger `-c ~INFO/END` mode of comparison even when not explicitly
requested, which would result in not transferring the annotation from a tab-delimited
file (#1733)

* bcftools merge

- New `-m snp-ins-del` switch to merge SNVs, insertions and deletions separately (#1704)

* bcftools mpileup

- New NMBZ annotation for Mann-Whitney U-z test on number of mismatches within
supporting reads

- Suppress the output of MQSBZ and FS annotations in absence of alternate allele

* bcftools +scatter

- Fix erroneous addition of duplicate PG lines

* bcftools +setGT

- Custom genotypes (e.g. `-n c:1/1`) now correctly override ploidy


## Release 1.15.1 (7th April 2022)


Expand Down Expand Up @@ -44,7 +91,6 @@

## Release 1.15 (21st February 2022)


* New `bcftools head` subcommand for conveniently displaying the headers
of a VCF or BCF file. Without any options, this is equivalent to
`bcftools view --header-only --no-version` but more succinct and memorable.
Expand Down
156 changes: 120 additions & 36 deletions bam2bcf.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/* bam2bcf.c -- variant calling.
Copyright (C) 2010-2012 Broad Institute.
Copyright (C) 2012-2021 Genome Research Ltd.
Copyright (C) 2012-2022 Genome Research Ltd.
Author: Heng Li <[email protected]>
Expand Down Expand Up @@ -89,6 +89,39 @@ void bcf_call_destroy(bcf_callaux_t *bca)
free(bca->bases); free(bca->inscns); free(bca);
}

static int get_aux_nm(bam1_t *rec, int32_t qpos, int is_ref)
{
uint8_t *nm_tag = bam_aux_get(rec, "NM");
if ( !nm_tag ) return -1;
int64_t nm = bam_aux2i(nm_tag);

// Count indels as single events, not as the number of inserted/deleted
// bases (which is what NM does). Add soft clips as mismatches.
int i;
for (i=0; i < rec->core.n_cigar; i++)
{
int val = bam_get_cigar(rec)[i] & BAM_CIGAR_MASK;
if ( val==BAM_CSOFT_CLIP )
{
nm += bam_get_cigar(rec)[i] >> BAM_CIGAR_SHIFT;
}
else if ( val==BAM_CINS || val==BAM_CDEL )
{
val = bam_get_cigar(rec)[i] >> BAM_CIGAR_SHIFT;
if ( val > 1 ) nm -= val - 1;
}
}

// Take into account MNPs, 2% of de novo SNVs appear within 20bp of another de novo SNV
// http://www.genome.org/cgi/doi/10.1101/gr.239756.118
nm -= is_ref ? 1 : 2;

if ( nm < 0 ) nm = 0;
if ( nm >= B2B_N_NM ) nm = B2B_N_NM - 1;

return nm;
}

// position in the sequence with respect to the aligned part of the read
static int get_position(const bam_pileup1_t *p, int *len,
int *sc_len, int *sc_dist) {
Expand Down Expand Up @@ -158,6 +191,17 @@ void bcf_callaux_clean(bcf_callaux_t *bca, bcf_call_t *call)
if ( call->ADF ) memset(call->ADF,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
if ( call->ADR ) memset(call->ADR,0,sizeof(int32_t)*(call->n+1)*B2B_MAX_ALLELES);
if ( call->SCR ) memset(call->SCR,0,sizeof(*call->SCR)*(call->n+1));
if ( call->SCR ) memset(call->SCR,0,sizeof(*call->SCR)*(call->n+1));
if ( bca->fmt_flag&B2B_FMT_NMBZ )
{
memset(call->ref_nm,0,sizeof(*call->ref_nm)*(call->n+1)*B2B_N_NM);
memset(call->alt_nm,0,sizeof(*call->alt_nm)*(call->n+1)*B2B_N_NM);
}
else
{
memset(call->ref_nm,0,sizeof(*call->ref_nm)*B2B_N_NM);
memset(call->alt_nm,0,sizeof(*call->alt_nm)*B2B_N_NM);
}
memset(call->QS,0,sizeof(*call->QS)*call->n*B2B_MAX_ALLELES);
memset(bca->ref_scl, 0, 100*sizeof(int));
memset(bca->alt_scl, 0, 100*sizeof(int));
Expand Down Expand Up @@ -309,28 +353,38 @@ int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t
if (sc_len > 99) sc_len = 99;
}
}

int imq = mapQ * nqual_over_60;
int ibq = baseQ * nqual_over_60;
int inm = get_aux_nm(p->b,p->qpos,is_diff?0:1);

if ( bam_is_rev(p->b) )
bca->rev_mqs[imq]++;
else
bca->fwd_mqs[imq]++;

if ( bam_seqi(bam_get_seq(p->b),p->qpos) == ref_base )
if ( !is_diff )
{
bca->ref_pos[epos]++;
bca->ref_bq[ibq]++;
bca->ref_mq[imq]++;
bca->ref_scl[sc_len]++;
if ( inm>=0 )
{
bca->ref_nm[inm]++;
if ( r->ref_nm ) r->ref_nm[inm]++;
}
}
else
{
bca->alt_pos[epos]++;
bca->alt_bq[ibq]++;
bca->alt_mq[imq]++;
bca->alt_scl[sc_len]++;
if ( inm>=0 )
{
bca->alt_nm[inm]++;
if ( r->alt_nm ) r->alt_nm[inm]++;
}
}
}

Expand Down Expand Up @@ -798,6 +852,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
call->n_alleles = j;
if (call->n_alleles == 1) return -1; // no reliable supporting read. stop doing anything
}
int has_alt = (call->n_alleles==2 && call->unseen!=-1) ? 0 : 1;
/*
* Set the phread likelihood array (call->PL) This array is 15 entries long
* for each sample because that is size of an upper or lower triangle of a
Expand Down Expand Up @@ -914,6 +969,9 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
for (j = 0; j < 16; ++j) call->anno[j] += calls[i].anno[j];
}

// No need to calculate MWU tests when there is no ALT allele, this should speed up things slightly
if ( !has_alt ) return 0;

calc_SegBias(calls, call);

// calc_chisq_bias("XPOS", call->bcf_hdr->id[BCF_DT_CTG][call->tid].key, call->pos, bca->ref_pos, bca->alt_pos, bca->npos);
Expand All @@ -922,7 +980,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int

if (bca->fmt_flag & B2B_INFO_ZSCORE) {
// U z-normalised as +/- number of standard deviations from mean.
if (call->ori_ref < 0) {
if (call->ori_ref < 0) { // indel
if (bca->fmt_flag & B2B_INFO_RPB)
call->mwu_pos = calc_mwu_biasZ(bca->iref_pos, bca->ialt_pos,
bca->npos, 0, 1);
Expand All @@ -945,6 +1003,15 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
call->mwu_sc = calc_mwu_biasZ(bca->ref_scl, bca->alt_scl,
100, 0,1);
}
call->mwu_nm[0] = calc_mwu_biasZ(bca->ref_nm, bca->alt_nm, B2B_N_NM,0,1);
if ( bca->fmt_flag & B2B_FMT_NMBZ )
{
for (i=0; i<n; i++)
{
float val = calc_mwu_biasZ(calls[i].ref_nm, calls[i].alt_nm, B2B_N_NM,0,1);
call->mwu_nm[i+1] = val!=HUGE_VAL ? val : 0;
}
}
} else {
// Old method; U as probability between 0 and 1
if ( bca->fmt_flag & B2B_INFO_RPB )
Expand Down Expand Up @@ -976,7 +1043,7 @@ int bcf_call_combine(int n, const bcf_callret1_t *calls, bcf_callaux_t *bca, int
int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag, const bcf_callaux_t *bca, const char *ref)
{
extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two);
int i, j, nals = 1;
int i, j, nals = 1, has_alt = 0;

bcf_hdr_t *hdr = bc->bcf_hdr;
rec->rid = bc->tid;
Expand Down Expand Up @@ -1006,6 +1073,7 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
for (j = 0; j < bca->indelreg; ++j) kputc(ref[bc->pos+1+j], &bc->tmp);
}
nals++;
has_alt = 1;
}
}
else // SNP
Expand All @@ -1016,7 +1084,11 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
if (bc->a[i] < 0) break;
kputc(',', &bc->tmp);
if ( bc->unseen==i ) kputs("<*>", &bc->tmp);
else kputc("ACGT"[bc->a[i]], &bc->tmp);
else
{
kputc("ACGT"[bc->a[i]], &bc->tmp);
has_alt = 1;
}
nals++;
}
}
Expand Down Expand Up @@ -1052,40 +1124,46 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
bcf_update_info_float(hdr, rec, "I16", tmpf, 16);
bcf_update_info_float(hdr, rec, "QS", bc->qsum, nals);

if ( bc->vdb != HUGE_VAL ) bcf_update_info_float(hdr, rec, "VDB", &bc->vdb, 1);
if ( bc->seg_bias != HUGE_VAL ) bcf_update_info_float(hdr, rec, "SGB", &bc->seg_bias, 1);

if (bca->fmt_flag & B2B_INFO_ZSCORE) {
if ( bc->mwu_pos != HUGE_VAL )
bcf_update_info_float(hdr, rec, "RPBZ", &bc->mwu_pos, 1);
if ( bc->mwu_mq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQBZ", &bc->mwu_mq, 1);
if ( bc->mwu_mqs != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQSBZ", &bc->mwu_mqs, 1);
if ( bc->mwu_bq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "BQBZ", &bc->mwu_bq, 1);
if ( bc->mwu_sc != HUGE_VAL )
bcf_update_info_float(hdr, rec, "SCBZ", &bc->mwu_sc, 1);
} else {
if ( bc->mwu_pos != HUGE_VAL )
bcf_update_info_float(hdr, rec, "RPB", &bc->mwu_pos, 1);
if ( bc->mwu_mq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQB", &bc->mwu_mq, 1);
if ( bc->mwu_mqs != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQSB", &bc->mwu_mqs, 1);
if ( bc->mwu_bq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "BQB", &bc->mwu_bq, 1);
}
if ( has_alt )
{
if ( bc->vdb != HUGE_VAL ) bcf_update_info_float(hdr, rec, "VDB", &bc->vdb, 1);
if ( bc->seg_bias != HUGE_VAL ) bcf_update_info_float(hdr, rec, "SGB", &bc->seg_bias, 1);

if (bca->fmt_flag & B2B_INFO_ZSCORE) {
if ( bc->mwu_pos != HUGE_VAL )
bcf_update_info_float(hdr, rec, "RPBZ", &bc->mwu_pos, 1);
if ( bc->mwu_mq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQBZ", &bc->mwu_mq, 1);
if ( bc->mwu_mqs != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQSBZ", &bc->mwu_mqs, 1);
if ( bc->mwu_bq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "BQBZ", &bc->mwu_bq, 1);
if ( bc->mwu_nm[0] != HUGE_VAL )
bcf_update_info_float(hdr, rec, "NMBZ", bc->mwu_nm, 1);
if ( bc->mwu_sc != HUGE_VAL )
bcf_update_info_float(hdr, rec, "SCBZ", &bc->mwu_sc, 1);
} else {
if ( bc->mwu_pos != HUGE_VAL )
bcf_update_info_float(hdr, rec, "RPB", &bc->mwu_pos, 1);
if ( bc->mwu_mq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQB", &bc->mwu_mq, 1);
if ( bc->mwu_mqs != HUGE_VAL )
bcf_update_info_float(hdr, rec, "MQSB", &bc->mwu_mqs, 1);
if ( bc->mwu_bq != HUGE_VAL )
bcf_update_info_float(hdr, rec, "BQB", &bc->mwu_bq, 1);
}

if ( bc->strand_bias != HUGE_VAL )
bcf_update_info_float(hdr, rec, "FS", &bc->strand_bias, 1);
if ( bc->strand_bias != HUGE_VAL )
bcf_update_info_float(hdr, rec, "FS", &bc->strand_bias, 1);

#if CDF_MWU_TESTS
if ( bc->mwu_pos_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "RPB2", &bc->mwu_pos_cdf, 1);
if ( bc->mwu_mq_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "MQB2", &bc->mwu_mq_cdf, 1);
if ( bc->mwu_mqs_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "MQSB2", &bc->mwu_mqs_cdf, 1);
if ( bc->mwu_bq_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "BQB2", &bc->mwu_bq_cdf, 1);
if ( bc->mwu_pos_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "RPB2", &bc->mwu_pos_cdf, 1);
if ( bc->mwu_mq_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "MQB2", &bc->mwu_mq_cdf, 1);
if ( bc->mwu_mqs_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "MQSB2", &bc->mwu_mqs_cdf, 1);
if ( bc->mwu_bq_cdf != HUGE_VAL ) bcf_update_info_float(hdr, rec, "BQB2", &bc->mwu_bq_cdf, 1);
#endif
}

tmpf[0] = bc->ori_depth ? (float)bc->mq0/bc->ori_depth : 0;
bcf_update_info_float(hdr, rec, "MQ0F", tmpf, 1);

Expand Down Expand Up @@ -1144,5 +1222,11 @@ int bcf_call2bcf(bcf_call_t *bc, bcf1_t *rec, bcf_callret1_t *bcr, int fmt_flag,
if ( fmt_flag&B2B_FMT_QS )
bcf_update_format_int32(hdr, rec, "QS", bc->QS, rec->n_sample*rec->n_allele);

if ( has_alt )
{
if ( fmt_flag&B2B_FMT_NMBZ )
bcf_update_format_float(hdr, rec, "NMBZ", bc->mwu_nm+1, rec->n_sample);
}

return 0;
}
Loading

0 comments on commit e7f638b

Please sign in to comment.