From ab5888c69bd407028d5a6efdebe191fc6b25c15d Mon Sep 17 00:00:00 2001 From: Michael Macias Date: Tue, 26 Nov 2024 11:41:47 -0600 Subject: [PATCH] gff/io/reader/line: Skip blank lines See _Generic Feature Format Version 3 (GFF3)_ (2020-08-18): "Blank lines should be ignored by parsers..." --- noodles-gff/CHANGELOG.md | 5 ++++ noodles-gff/src/async/io/reader/line.rs | 37 +++++++++++++++++++++-- noodles-gff/src/io/reader.rs | 2 +- noodles-gff/src/io/reader/line.rs | 39 +++++++++++++++++++++++-- 4 files changed, 78 insertions(+), 5 deletions(-) diff --git a/noodles-gff/CHANGELOG.md b/noodles-gff/CHANGELOG.md index 8f22fc9e4..f86d5034d 100644 --- a/noodles-gff/CHANGELOG.md +++ b/noodles-gff/CHANGELOG.md @@ -26,6 +26,11 @@ This changes `Reader::records` to `Reader::record_bufs`. + * gff/io/reader/line: Skip blank lines. + + See _Generic Feature Format Version 3 (GFF3)_ (2020-08-18): "Blank lines + should be ignored by parsers..." + * gff/line: Hoist buffer to line. This moves the owned line buffer to `Line`. The structure is now a struct diff --git a/noodles-gff/src/async/io/reader/line.rs b/noodles-gff/src/async/io/reader/line.rs index 6d9119cff..d2b8b5ba8 100644 --- a/noodles-gff/src/async/io/reader/line.rs +++ b/noodles-gff/src/async/io/reader/line.rs @@ -6,7 +6,40 @@ pub(super) async fn read_line(reader: &mut R, line: &mut Line) -> io::Result< where R: AsyncBufRead + Unpin, { + use crate::io::reader::line::is_blank; + let buf = &mut line.0; - buf.clear(); - super::read_line(reader, buf).await + + loop { + buf.clear(); + + let n = super::read_line(reader, buf).await?; + + if n == 0 || !is_blank(buf) { + return Ok(n); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test] + async fn test_read_line() -> io::Result<()> { + const DATA: &[u8] = b"\n#comment\n\t\n"; + + let mut line = Line::default(); + let mut lines: Vec = Vec::new(); + + let mut src = DATA; + + while read_line(&mut src, &mut line).await? != 0 { + lines.push(line.as_ref().into()); + } + + assert_eq!(lines, [String::from("#comment")]); + + Ok(()) + } } diff --git a/noodles-gff/src/io/reader.rs b/noodles-gff/src/io/reader.rs index d0268c7a7..942863687 100644 --- a/noodles-gff/src/io/reader.rs +++ b/noodles-gff/src/io/reader.rs @@ -1,6 +1,6 @@ //! GFF reader and iterators. -mod line; +pub(crate) mod line; mod line_bufs; mod record_bufs; diff --git a/noodles-gff/src/io/reader/line.rs b/noodles-gff/src/io/reader/line.rs index 8ed189bc5..b880b3a98 100644 --- a/noodles-gff/src/io/reader/line.rs +++ b/noodles-gff/src/io/reader/line.rs @@ -7,6 +7,41 @@ where R: BufRead, { let buf = &mut line.0; - buf.clear(); - super::read_line(reader, buf) + + loop { + buf.clear(); + + let n = super::read_line(reader, buf)?; + + if n == 0 || !is_blank(buf) { + return Ok(n); + } + } +} + +pub(crate) fn is_blank(s: &str) -> bool { + s.chars().all(char::is_whitespace) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_read_line() -> io::Result<()> { + const DATA: &[u8] = b"\n#comment\n\t\n"; + + let mut line = Line::default(); + let mut lines: Vec = Vec::new(); + + let mut src = DATA; + + while read_line(&mut src, &mut line)? != 0 { + lines.push(line.as_ref().into()); + } + + assert_eq!(lines, [String::from("#comment")]); + + Ok(()) + } }