Skip to content

Commit

Permalink
Release 1.17
Browse files Browse the repository at this point in the history
  • Loading branch information
daviesrob committed Feb 21, 2023
2 parents e7f638b + fac806b commit 116a87c
Show file tree
Hide file tree
Showing 144 changed files with 6,863 additions and 1,785 deletions.
4 changes: 2 additions & 2 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ rockylinux_task:

macosx_task:
name: macosx + clang
osx_instance:
image: catalina-base
macos_instance:
image: ghcr.io/cirruslabs/macos-ventura-base:latest

environment:
CC: clang
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ the INSTALL document), the use of this software is governed by the GPL license.

The MIT/Expat License

Copyright (C) 2012-2021 Genome Research Ltd.
Copyright (C) 2012-2023 Genome Research Ltd.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
6 changes: 4 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ OBJS = main.o vcfindex.o tabix.o \
vcfcall.o mcall.o vcmp.o gvcf.o reheader.o convert.o vcfconvert.o tsv2vcf.o \
vcfcnv.o vcfhead.o HMM.o consensus.o ploidy.o bin.o hclust.o version.o \
regidx.o smpl_ilist.o csq.o vcfbuf.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam_sample.o \
mpileup.o bam2bcf.o bam2bcf_indel.o bam2bcf_iaux.o read_consensus.o bam_sample.o \
vcfsort.o cols.o extsort.o dist.o abuf.o \
ccall.o em.o prob1.o kmin.o str_finder.o
PLUGIN_OBJS = vcfplugin.o
Expand Down Expand Up @@ -104,7 +104,7 @@ endif

include config.mk

PACKAGE_VERSION = 1.16
PACKAGE_VERSION = 1.17

# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git
# description of the working tree: either a release tag with the same value
Expand Down Expand Up @@ -279,6 +279,8 @@ consensus.o: consensus.c $(htslib_vcf_h) $(htslib_kstring_h) $(htslib_synced_bcf
mpileup.o: mpileup.c $(htslib_sam_h) $(htslib_faidx_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(htslib_hts_os_h) regidx.h $(bcftools_h) $(bam2bcf_h) $(bam_sample_h) $(gvcf_h)
bam2bcf.o: bam2bcf.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_kstring_h) $(htslib_kfunc_h) $(bam2bcf_h) mw.h
bam2bcf_indel.o: bam2bcf_indel.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h
bam2bcf_iaux.o: bam2bcf_iaux.c $(htslib_hts_h) $(htslib_sam_h) $(htslib_khash_str2int_h) $(bam2bcf_h) $(htslib_ksort_h) str_finder.h read_consensus.h cigar_state.h
read_consensus.o: read_consensus.c read_consensus.h cigar_state.h $(htslib_hts_h) $(htslib_sam_h)
bam_sample.o: bam_sample.c $(htslib_hts_h) $(htslib_kstring_h) $(htslib_khash_str2int_h) $(khash_str2str_h) $(bam_sample_h) $(bcftools_h)
version.o: version.h version.c
hclust.o: hclust.c $(htslib_hts_h) $(htslib_kstring_h) $(bcftools_h) hclust.h
Expand Down
167 changes: 165 additions & 2 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,6 +1,170 @@
## Release 1.16 (18th August 2022)
## Release 1.17 (21st February 2023)


Changes affecting the whole of bcftools, or multiple commands:

* The -i/-e filtering expressions

- Error checks were added to prevent incorrect use of vector arithmetics. For example,
when evaluating the sum of two vectors A and B, the resulting vector could contain
nonsense values when the input vectors were not of the same length. The fix introduces
the following logic:
- evaluate to C_i = A_i + B_i when length(A)==B(A) and set length(C)=length(A)
- evaluate to C_i = A_i + B_0 when length(B)=1 and set length(C)=length(A)
- evaluate to C_i = A_0 + B_i when length(A)=1 and set length(C)=length(B)
- throw an error when length(A)!=length(B) AND length(A)!=1 AND length(B)!=1

- Arrays in Number=R tags can be now subscripted by alleles found in FORMAT/GT. For example,

FORMAT/AD[GT] > 10 .. require support of more than 10 reads for each allele
FORMAT/AD[0:GT] > 10 .. same as above, but in the first sample
sSUM(FORMAT/AD[GT]) > 20 .. require total sample depth bigger than 20

* The commands `consensus -H` and `+split-vep -H`

- Drop unnecessary leading space in the first header column and newly print `#[1]columnName`
instead of the previous `# [1]columnName` (#1856)


Changes affecting specific commands:

* bcftools +allele-length

- Fix overflow for indels longer than 512bp and aggregate alleles equal or larger than
that in the same bin (#1837)

* bcftools annotate

- Support sample reordering of annotation file (#1785)

- Restore lost functionality of the --pair-logic option (#1808)

* bcftools call

- Fix a bug where too many alleles passed to `-C alleles` via `-T` caused memory
corruption (#1790)

- Fix a bug where indels constrained with `-C alleles -T` would sometimes be missed (#1706)

* bcftools consensus

- BREAKING CHANGE: the option `-I, --iupac-codes` newly outputs IUPAC codes based on FORMAT/GT
of all samples. The `-s, --samples` and `-S, --samples-file` options can be used to subset
samples. In order to ignore samples and consider only the REF and ALT columns (the original
behavior prior to 1.17), run with `-s -` (#1828)

* bcftools convert

- Make variantkey conversion work for sites without an ALT allele (#1806)

* bcftool csq

- Fix a bug where a MNV with multiple consequences (e.g. missense + stop_gained)
would report only the less severe one (#1810)

- GFF file parsing was made slightly more flexible, newly ids can be just 'XXX'
rather than, for example, 'gene:XXX'

- New gff2gff perl script to fix GFF formatting differences

* bcftools +fill-tags

- More of the available annotations are now added by the `-t all` option

* bcftools +fixref

- New INFO/FIXREF annotation

- New -m swap mode

* bcftools +mendelian

- The +mendelian plugin has been deprecated and replaced with +mendelian2. The
function of the plugin is the same but the command line options and the output
format has changed, and for this was introduced as a new plugin.

* bcftools mpileup

- Most of the annotations generated by mpileup are now optional via the
`-a, --annotate` option and add several new (mostly experimental) annotations.

- New option `--indels-2.0` for an EXPERIMENTAL indel calling model. This model aims
to address some known deficiencies of the current indel calling algorithm, specifically,
it uses diploid reference consensus sequence. Note that in the current version it
has the potential to increase sensitivity but at the cost of decreased specificity.

- Make the FS annotation (Fisher exact test strand bias) functional and remove it
from the default annotations

* bcftools norm

- New --multi-overlaps option allows to set overlapping alleles either to the
ref allele (the current default) or to a missing allele (#1764 and #1802)

- Fixed a bug in `-m -` which does not split missing FORMAT values correctly and
could lead to empty FORMAT fields such as `::` instead of the correct `:.:` (#1818)

- The `--atomize` option previously would not split complex indels such as C>GGG.
Newly these will be split into two records C>G and C>CGG (#1832)

* bcftools query

- Fix a rare bug where the printing of SAMPLE field with `query` was incorrectly
suppressed when the `-e` option contained a sample expression while the formatting
query did not. See #1783 for details.

* bcftools +setGT

- Add new `--new-gt X` option (#1800)

- Add new `--target-gt r:FLOAT` option to randomly select a proportion of genotypes (#1850)

- Fix a bug where `-t ./x` mode was advertised as selecting both phased and unphased
half-missing genotypes, but was in fact selecting only unphased genotypes (#1844)

* bcftools +split-vep

- New options `-g, --gene-list` and `--gene-list-fields` which allow to prioritize
consequences from a list of genes, or restrict output to the listed genes

- New `-H, --print-header` option to print the header with `-f`

- Work around a bug in the LOFTEE VEP plugin used to annotate gnomAD VCFs. There the
LoF_info subfield contains commas which, in general, makes it impossible to parse the
VEP subfields. The +split-vep plugin can now work with such files, replacing the offending
commas with slash (/) characters. See also https://github.com/Ensembl/ensembl-vep/issues/1351

- Newly the `-c, --columns` option can be omitted when a subfield is used in `-i/-e` filtering
expression. Note that `-c` may still have to be given when it is not possible to infer the
type of the subfield. Note that this is an experimental feature.

* bcftools stats

- The per-sample stats (PSC) would not be computed when `-i/-e` filtering options and
the `-s -` option were given but the expression did not include sample columns (1835)

* bcftools +tag2tag

- Revamp of the plugin to allow wider range of tag conversions, specifically all combinations
from FORMAT/GL,PL,GP to FORMAT/GL,PL,GP,GT

* bcftools +trio-dnm2

- New `-n, --strictly-novel` option to downplay alleles which violate Mendelian
inheritance but are not novel

- Allow to set the `--pn` and `--pns` options separately for SNVs and indels and make
the indel settings more strict by default

- Output missing FORMAT/VAF values in non-trio samples, rather than random nonsense values

* bcftools +variant-distance

- New option `-d, --direction` to choose the directionality: forward, reverse, nearest (the default)
or both (#1829)


## Release 1.16 (18th August 2022)

* New plugin `bcftools +variant-distance` to annotate records with distance to the
nearest variant (#1690)
Expand Down Expand Up @@ -44,7 +208,6 @@ Changes affecting specific commands:

- Custom genotypes (e.g. `-n c:1/1`) now correctly override ploidy


## Release 1.15.1 (7th April 2022)


Expand Down
21 changes: 17 additions & 4 deletions abuf.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* The MIT License
Copyright (c) 2021-2022 Genome Research Ltd.
Copyright (c) 2021-2023 Genome Research Ltd.
Author: Petr Danecek <[email protected]>
Expand Down Expand Up @@ -154,22 +154,33 @@ static void _atomize_allele(abuf_t *buf, bcf1_t *rec, int ial)
assert(atom);
if ( altb!='-' ) kputc(altb, &atom->alt);
if ( refb!='-' ) { kputc(refb, &atom->ref); atom->end++; }
continue;
}
else
buf->natoms++;
hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
atom = &buf->atoms[buf->natoms-1];
atom->ref.l = 0;
atom->alt.l = 0;
kputc(refb, &atom->ref);
kputc(altb, &atom->alt);
atom->beg = atom->end = i;
atom->ial = ial;

if ( rlen!=alen && (i+1>=rlen || i+1>=alen) ) // the next base is an indel combined with SNV, e.g. C>GGG?
{
buf->natoms++;
hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
atom = &buf->atoms[buf->natoms-1];
atom->ref.l = 0;
atom->alt.l = 0;
kputc(refb, &atom->ref);
kputc(altb, &atom->alt);
kputc(refb, &atom->alt);
atom->beg = atom->end = i;
atom->ial = ial;
}
continue;
}
if ( i+1>=rlen || i+1>=alen ) // is the next base a deletion?
if ( i+1>=rlen || i+1>=alen ) // is the next base an indel?
{
buf->natoms++;
hts_expand0(atom_t,buf->natoms,buf->matoms,buf->atoms);
Expand Down Expand Up @@ -742,6 +753,8 @@ void _abuf_split(abuf_t *buf, bcf1_t *rec)
_split_table_overlap(buf, j, atom);
}
}
// _split_table_print(buf);
// _split_table_print_atoms(buf);
assert( !buf->rbuf.n ); // all records should be flushed first in the SPLIT mode

// Create the output records, transferring all annotations:
Expand Down
Loading

0 comments on commit 116a87c

Please sign in to comment.