Skip to content

Commit

Permalink
vcf/header/record/value/map/info/definition: Add VCF 4.5 info definit…
Browse files Browse the repository at this point in the history
…ions
  • Loading branch information
zaeleus committed Aug 8, 2024
1 parent b51a448 commit 80db6e5
Show file tree
Hide file tree
Showing 4 changed files with 211 additions and 0 deletions.
6 changes: 6 additions & 0 deletions noodles-vcf/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## Unreleased

### Added

* vcf/header/record/value/map/info/definition: Add VCF 4.5 info definitions.

## 0.62.0 - 2024-08-04

### Added
Expand Down
2 changes: 2 additions & 0 deletions noodles-vcf/src/header/record/value/map/info/definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
mod v4_3;
mod v4_4;
mod v4_5;

use crate::header::{
record::value::map::info::{Number, Type},
Expand All @@ -13,6 +14,7 @@ pub(crate) fn definition(
key: &str,
) -> Option<(Number, Type, &'static str)> {
match (file_format.major(), file_format.minor()) {
(4, 5) => v4_5::definition(key),
(4, 4) => v4_4::definition(key),
(4, 3) => v4_3::definition(key),
_ => None,
Expand Down
201 changes: 201 additions & 0 deletions noodles-vcf/src/header/record/value/map/info/definition/v4_5.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
use crate::{
header::record::value::map::info::{Number, Type},
variant::record::info::field::key,
};

pub(super) fn definition(key: &str) -> Option<(Number, Type, &'static str)> {
match key {
key::ANCESTRAL_ALLELE => Some((Number::Count(1), Type::String, "Ancestral allele")),
key::ALLELE_COUNT => Some((
Number::AlternateBases,
Type::Integer,
"Allele count in genotypes, for each ALT allele, in the same order as listed",
)),
key::TOTAL_READ_DEPTHS => Some((
Number::ReferenceAlternateBases,
Type::Integer,
"Total read depth for each allele",
)),
key::FORWARD_STRAND_READ_DEPTHS => Some((
Number::ReferenceAlternateBases,
Type::Integer,
"Read depth for each allele on the forward strand",
)),
key::REVERSE_STRAND_READ_DEPTHS => Some((
Number::ReferenceAlternateBases,
Type::Integer,
"Read depth for each allele on the reverse strand",
)),
key::ALLELE_FREQUENCIES => Some((
Number::AlternateBases,
Type::Float,
"Allele frequency for each ALT allele in the same order as listed (estimated from primary data, not called genotypes)",
)),
key::TOTAL_ALLELE_COUNT => Some((
Number::Count(1),
Type::Integer,
"Total number of alleles in called genotypes",
)),
key::BASE_QUALITY => Some((Number::Count(1), Type::Float, "RMS base quality")),
key::CIGAR => Some((
Number::AlternateBases,
Type::String,
"Cigar string describing how to align an alternate allele to the reference allele",
)),
key::IS_IN_DB_SNP => Some((Number::Count(0), Type::Flag, "dbSNP membership")),
key::TOTAL_DEPTH => Some((
Number::Count(1),
Type::Integer,
"Combined depth across samples",
)),
key::IS_IN_HAP_MAP_2 => Some((Number::Count(0), Type::Flag, "HapMap2 membership")),
key::IS_IN_HAP_MAP_3 => Some((Number::Count(0), Type::Flag, "HapMap3 membership")),
key::MAPPING_QUALITY => Some((Number::Count(1), Type::Float, "RMS mapping quality")),
key::ZERO_MAPPING_QUALITY_COUNT => {
Some((Number::Count(1), Type::Integer, "Number of MAPQ == 0 reads"))
}
key::SAMPLES_WITH_DATA_COUNT => Some((
Number::Count(1),
Type::Integer,
"Number of samples with data",
)),
key::STRAND_BIAS => Some((Number::Count(4), Type::Integer, "Strand bias")),
key::IS_SOMATIC_MUTATION => Some((
Number::Count(0),
Type::Flag,
"Somatic mutation (for cancer genomics)",
)),
key::IS_VALIDATED => Some((
Number::Count(0),
Type::Flag,
"Validated by follow-up experiment",
)),
key::IS_IN_1000_GENOMES => {
Some((Number::Count(0), Type::Flag, "1000 Genomes membership"))
}

key::IS_IMPRECISE => Some((
Number::Count(0),
Type::Flag,
"Imprecise structural variation",
)),
key::IS_NOVEL => Some((
Number::Count(0),
Type::Flag,
"Indicates a novel structural variation",
)),
key::END_POSITION => Some((
Number::Count(1),
Type::Integer,
"Deprecated. Present for backwards compatibility with earlier versions of VCF.",
)),
key::SV_TYPE => Some((Number::Count(1), Type::String, "Type of structural variant")),
key::SV_LENGTHS => Some((Number::AlternateBases, Type::Integer, "Length of structural variant")),
key::POSITION_CONFIDENCE_INTERVALS => Some((
Number::Unknown,
Type::Integer,
"Confidence interval around POS for symbolic structural variants",
)),
key::END_CONFIDENCE_INTERVALS => Some((
Number::Unknown,
Type::Integer,
"Confidence interval around the inferred END for symbolic structural variants",
)),
key::MICROHOMOLOGY_LENGTHS => Some((
Number::AlternateBases,
Type::Integer,
"Length of base pair identical micro-homology at breakpoints",
)),
key::MICROHOMOLOGY_SEQUENCES => Some((
Number::AlternateBases,
Type::String,
"Sequence of base pair identical micro-homology at breakpoints",
)),
key::BREAKPOINT_IDS => Some((
Number::AlternateBases,
Type::String,
"ID of the assembled alternate allele in the assembly file",
)),
key::MOBILE_ELEMENT_INFO => Some((
Number::Unknown,
Type::String,
"Mobile element info of the form NAME,START,END,POLARITY",
)),
key::MOBILE_ELEMENT_TRANSDUCTION_INFO => Some((
Number::Unknown,
Type::String,
"Mobile element transduction info of the form CHR,START,END,POLARITY",
)),
key::DBV_ID => Some((
Number::AlternateBases,
Type::String,
"ID of this element in Database of Genomic Variation",
)),
key::DB_VAR_ID => Some((Number::AlternateBases, Type::String, "ID of this element in DBVAR")),
key::DB_RIP_ID => Some((Number::AlternateBases, Type::String, "ID of this element in DBRIP")),
key::MATE_BREAKEND_IDS => Some((Number::AlternateBases, Type::String, "ID of mate breakend")),
key::PARTNER_BREAKEND_ID => Some((Number::AlternateBases, Type::String, "ID of partner breakend")),
key::BREAKEND_EVENT_ID => Some((Number::AlternateBases, Type::String, "ID of associated event")),
key::EVENT_TYPE => Some((Number::AlternateBases, Type::String, "Type of associated event")),
key::BREAKEND_CONFIDENCE_INTERVALS => Some((
Number::Unknown,
Type::Integer,
"Confidence interval for the SVLEN field",
)),
key::BREAKEND_COPY_NUMBER => {
Some((Number::AlternateBases, Type::Float, "Copy number of CNV/breakpoint"))
}
key::COPY_NUMBER_CONFIDENCE_INTERVALS => Some((
Number::Unknown,
Type::Float,
"Confidence interval around copy number",
)),
key::SV_CLAIM => Some((
Number::AlternateBases,
Type::String,
"Claim made by the structural variant call. Valid values are D, J, DJ for abundance, adjacency and both respectively",
)),
key::TOTAL_REPEAT_SEQUENCE_COUNTS => Some((
Number::AlternateBases,
Type::Integer,
"Total number of repeat sequences in this allele",
)),
key::REPEAT_UNIT_SEQUENCES => Some((
Number::Unknown,
Type::String,
"Repeat unit sequence of the corresponding repeat sequence",
)),
key::REPEAT_UNIT_LENGTHS => Some((
Number::Unknown,
Type::Integer,
"Repeat unit length of the corresponding repeat sequence",
)),
key::REPEAT_UNIT_COUNTS => Some((
Number::Unknown,
Type::Float,
"Repeat unit count of corresponding repeat sequence",
)),
key::TOTAL_REPEAT_SEQUENCE_BASE_COUNTS => Some((
Number::Unknown,
Type::Integer,
"Total number of bases in the corresponding repeat sequence",
)),
key::REPEAT_UNIT_COUNT_CONFIDENCE_INTERVALS => Some((
Number::Unknown,
Type::Float,
"Confidence interval around RUC",
)),
key::TOTAL_REPEAT_SEQUENCE_BASE_COUNT_CONFIDENCE_INTERVALS => Some((
Number::Unknown,
Type::Integer,
"Confidence interval around RB",
)),
key::REPEAT_UNIT_BASE_COUNTS => Some((
Number::Unknown,
Type::Integer,
"Number of bases in each individual repeat unit",
)),

_ => None,
}
}
2 changes: 2 additions & 0 deletions noodles-vcf/src/variant/record/info/field/key.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ pub const IS_IMPRECISE: &str = "IMPRECISE";
pub const IS_NOVEL: &str = "NOVEL";

/// End position of the variant described in this record (`END`).
///
/// Deprecated in VCF 4.5.
pub const END_POSITION: &str = "END";

/// Type of structural variant (`SVTYPE`).
Expand Down

0 comments on commit 80db6e5

Please sign in to comment.