From 02135fcd124825ed4b97005b7e84c21cddfd0daa Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Thu, 21 Mar 2024 14:18:13 -0500 Subject: [PATCH] bcf/record/samples: Remove Samples::try_into_vcf_record_samples --- noodles-bcf/src/async/io/reader/record.rs | 52 ++++++++++++++++++++--- noodles-bcf/src/io/reader/record.rs | 52 ++++++++++++++++++++--- noodles-bcf/src/record/samples.rs | 37 +--------------- 3 files changed, 93 insertions(+), 48 deletions(-) diff --git a/noodles-bcf/src/async/io/reader/record.rs b/noodles-bcf/src/async/io/reader/record.rs index 2fc92727f..1b96227ce 100644 --- a/noodles-bcf/src/async/io/reader/record.rs +++ b/noodles-bcf/src/async/io/reader/record.rs @@ -36,10 +36,20 @@ mod tests { variant::{ record::{ info::{self, field::Value as InfoFieldValue}, - samples, AlternateBases, Filters, Info, + samples::{self, Sample}, + AlternateBases, Filters, Info, Samples, }, record_buf::{ - samples::{sample::Value as GenotypeFieldValue, Keys}, + samples::{ + sample::{ + value::{ + genotype::{allele::Phasing, Allele}, + Genotype, + }, + Value as GenotypeFieldValue, + }, + Keys, + }, Samples as VcfGenotypes, }, }, @@ -116,7 +126,28 @@ mod tests { // genotypes - let actual = record.samples()?.try_into_vcf_record_samples(&header)?; + let samples = record.samples()?; + + let column_names: Vec<_> = samples + .column_names(&header) + .map(|result| result.map(String::from)) + .collect::>()?; + let keys = Keys::try_from(column_names) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + let values = samples + .iter() + .map(|sample| { + sample + .iter(&header) + .map(|result| { + result.and_then(|(_, value)| value.map(|v| v.try_into()).transpose()) + }) + .collect() + }) + .collect::>()?; + + let actual = VcfGenotypes::new(keys, values); let expected = VcfGenotypes::new( Keys::try_from(vec![ @@ -128,21 +159,30 @@ mod tests { ])?, vec![ vec![ - Some(GenotypeFieldValue::from("0/0")), + Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![ + Allele::new(Some(0), Phasing::Unphased), + Allele::new(Some(0), Phasing::Unphased), + ])?)), Some(GenotypeFieldValue::from(10)), Some(GenotypeFieldValue::from(32)), Some(GenotypeFieldValue::from(vec![Some(32), Some(0)])), Some(GenotypeFieldValue::from(vec![Some(0), Some(10), Some(100)])), ], vec![ - Some(GenotypeFieldValue::from("0/1")), + Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![ + Allele::new(Some(0), Phasing::Unphased), + Allele::new(Some(1), Phasing::Unphased), + ])?)), Some(GenotypeFieldValue::from(10)), Some(GenotypeFieldValue::from(48)), Some(GenotypeFieldValue::from(vec![Some(32), Some(16)])), Some(GenotypeFieldValue::from(vec![Some(10), Some(0), Some(100)])), ], vec![ - Some(GenotypeFieldValue::from("1/1")), + Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![ + Allele::new(Some(1), Phasing::Unphased), + Allele::new(Some(1), Phasing::Unphased), + ])?)), Some(GenotypeFieldValue::from(10)), Some(GenotypeFieldValue::from(64)), Some(GenotypeFieldValue::from(vec![Some(0), Some(64)])), diff --git a/noodles-bcf/src/io/reader/record.rs b/noodles-bcf/src/io/reader/record.rs index 3446f942a..ad7bd917f 100644 --- a/noodles-bcf/src/io/reader/record.rs +++ b/noodles-bcf/src/io/reader/record.rs @@ -108,11 +108,21 @@ pub(crate) mod tests { use noodles_vcf::{ self as vcf, variant::{ - record::{info, samples, AlternateBases, Filters, Info}, + record::{ + info, + samples::{self, Sample}, + AlternateBases, Filters, Info, Samples, + }, record_buf::{ info::field::Value as InfoFieldValue, samples::{ - sample::{value::Array, Value as GenotypeFieldValue}, + sample::{ + value::{ + genotype::{allele::Phasing, Allele}, + Array, Genotype, + }, + Value as GenotypeFieldValue, + }, Keys, }, Samples as VcfGenotypes, @@ -186,7 +196,28 @@ pub(crate) mod tests { // genotypes - let actual = record.samples()?.try_into_vcf_record_samples(&header)?; + let samples = record.samples()?; + + let column_names: Vec<_> = samples + .column_names(&header) + .map(|result| result.map(String::from)) + .collect::>()?; + let keys = Keys::try_from(column_names) + .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; + + let values = samples + .iter() + .map(|sample| { + sample + .iter(&header) + .map(|result| { + result.and_then(|(_, value)| value.map(|v| v.try_into()).transpose()) + }) + .collect() + }) + .collect::>()?; + + let actual = VcfGenotypes::new(keys, values); let expected = VcfGenotypes::new( Keys::try_from(vec![ @@ -198,7 +229,10 @@ pub(crate) mod tests { ])?, vec![ vec![ - Some(GenotypeFieldValue::String(String::from("0/0"))), + Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![ + Allele::new(Some(0), Phasing::Unphased), + Allele::new(Some(0), Phasing::Unphased), + ])?)), Some(GenotypeFieldValue::Integer(10)), Some(GenotypeFieldValue::Integer(32)), Some(GenotypeFieldValue::Array(Array::Integer(vec![ @@ -212,7 +246,10 @@ pub(crate) mod tests { ]))), ], vec![ - Some(GenotypeFieldValue::String(String::from("0/1"))), + Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![ + Allele::new(Some(0), Phasing::Unphased), + Allele::new(Some(1), Phasing::Unphased), + ])?)), Some(GenotypeFieldValue::Integer(10)), Some(GenotypeFieldValue::Integer(48)), Some(GenotypeFieldValue::Array(Array::Integer(vec![ @@ -226,7 +263,10 @@ pub(crate) mod tests { ]))), ], vec![ - Some(GenotypeFieldValue::String(String::from("1/1"))), + Some(GenotypeFieldValue::Genotype(Genotype::try_from(vec![ + Allele::new(Some(1), Phasing::Unphased), + Allele::new(Some(1), Phasing::Unphased), + ])?)), Some(GenotypeFieldValue::Integer(10)), Some(GenotypeFieldValue::Integer(64)), Some(GenotypeFieldValue::Array(Array::Integer(vec![ diff --git a/noodles-bcf/src/record/samples.rs b/noodles-bcf/src/record/samples.rs index b29d5d577..8429517f7 100644 --- a/noodles-bcf/src/record/samples.rs +++ b/noodles-bcf/src/record/samples.rs @@ -5,7 +5,7 @@ pub mod series; use std::{io, iter}; -use noodles_vcf::{self as vcf, variant::record::Samples as _}; +use noodles_vcf as vcf; use self::series::read_series; pub use self::{sample::Sample, series::Series}; @@ -27,41 +27,6 @@ impl<'r> Samples<'r> { } } - /// Converts BCF record samples to VCF record samples. - /// - /// # Examples - /// - /// ``` - /// # use std::io; - /// use noodles_bcf::record::Samples; - /// use noodles_vcf as vcf; - /// - /// let bcf_samples = Samples::default(); - /// - /// let header = vcf::Header::default(); - /// let vcf_samples = bcf_samples.try_into_vcf_record_samples(&header)?; - /// - /// assert!(vcf_samples.is_empty()); - /// # Ok::<_, io::Error>(()) - /// ``` - pub fn try_into_vcf_record_samples( - &self, - header: &vcf::Header, - ) -> io::Result { - use crate::record::codec::decoder::read_samples; - - if self.is_empty() { - return Ok(vcf::variant::record_buf::Samples::default()); - } - - let mut reader = self.src; - - let genotypes = read_samples(&mut reader, header, self.len(), self.format_count()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?; - - Ok(genotypes) - } - /// Returns the number of fields per sample. /// /// # Examples