Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade nom dependency to 7.1.3 #41

Merged
merged 5 commits into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "warc"
version = "0.3.3"
version = "0.4.0"
description = "A Rust library for reading and writing WARC files."
readme = "README.md"
repository = "https://github.com/jedireza/warc"
Expand All @@ -12,7 +12,7 @@ edition = "2018"

[dependencies]
chrono = "0.4.11"
nom = "5.1.1"
nom = "7.1.3"
url = "2"
uuid = { version = "0.8.1", features = ["v4"] }

Expand Down
6 changes: 3 additions & 3 deletions examples/read_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ fn main() -> Result<(), std::io::Error> {
match record {
Err(err) => println!("ERROR: {}\r\n", err),
Ok(record) => {
println!("{}: {}", WarcHeader::RecordID.to_string(), record.warc_id(),);
println!("{}: {}", WarcHeader::Date.to_string(), record.date(),);
println!("");
println!("{}: {}", WarcHeader::RecordID, record.warc_id(),);
println!("{}: {}", WarcHeader::Date, record.date(),);
println!();
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion examples/read_filtered.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ fn main() -> std::io::Result<()> {

let filtered_file_names: Vec<_> = args.map(|s| s.to_string_lossy().to_string()).collect();
if filtered_file_names.is_empty() {
return Err(usage_err!("one or more filtered file names not supplied"))?;
Err(usage_err!("one or more filtered file names not supplied"))?;
}

let mut file = WarcReader::from_path_gzip(warc_name)?;
Expand Down
6 changes: 3 additions & 3 deletions examples/read_gzip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ fn main() -> Result<(), std::io::Error> {
match record {
Err(err) => println!("ERROR: {}\r\n", err),
Ok(record) => {
println!("{}: {}", WarcHeader::RecordID.to_string(), record.warc_id());
println!("{}: {}", WarcHeader::Date.to_string(), record.date());
println!("");
println!("{}: {}", WarcHeader::RecordID, record.warc_id());
println!("{}: {}", WarcHeader::Date, record.date());
println!();
}
}
}
Expand Down
6 changes: 3 additions & 3 deletions examples/read_raw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ fn main() -> Result<(), std::io::Error> {
Ok((headers, _)) => {
println!(
"{}: {}",
WarcHeader::RecordID.to_string(),
WarcHeader::RecordID,
String::from_utf8_lossy(headers.as_ref().get(&WarcHeader::RecordID).unwrap())
);
println!(
"{}: {}",
WarcHeader::Date.to_string(),
WarcHeader::Date,
String::from_utf8_lossy(headers.as_ref().get(&WarcHeader::Date).unwrap())
);
println!("");
println!();
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::ParseHeaders(_) => write!(f, "Error parsing headers."),
Error::MissingHeader(ref h) => write!(f, "Missing required header: {}", h.to_string()),
Error::MissingHeader(ref h) => write!(f, "Missing required header: {}", h),
Error::MalformedHeader(ref h, ref r) => {
write!(f, "Malformed header: {}: {}", h.to_string(), r)
write!(f, "Malformed header: {}: {}", h, r)
}
Error::ReadData(_) => write!(f, "Error reading data source."),
Error::ReadOverflow => write!(f, "Read further than expected."),
Expand Down
44 changes: 29 additions & 15 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ fn version(input: &[u8]) -> IResult<&[u8], &str> {

let version_str = match str::from_utf8(version) {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(version) => version,
};
Expand All @@ -23,8 +26,7 @@ fn version(input: &[u8]) -> IResult<&[u8], &str> {
}

fn is_header_token_char(chr: u8) -> bool {
match chr {
0..=31
!matches!(chr, 0..=31
| 128..=255
| b'('
| b')'
Expand All @@ -43,9 +45,7 @@ fn is_header_token_char(chr: u8) -> bool {
| b'{'
| b'}'
| b' '
| b'\\' => false,
_ => true,
}
| b'\\')
}

fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
Expand All @@ -63,6 +63,7 @@ fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {

/// Parse a WARC header block.
// TODO: evaluate the use of `ErrorKind::Verify` here.
#[allow(clippy::type_complexity)]
pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)> {
let (input, version) = version(input)?;
let (input, headers) = many1(header)(input)?;
Expand All @@ -73,22 +74,31 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)
for header in headers {
let token_str = match str::from_utf8(header.0) {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(token) => token,
};

if content_length == None && token_str.to_lowercase() == "content-length" {
if content_length.is_none() && token_str.to_lowercase() == "content-length" {
let value_str = match str::from_utf8(header.1) {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(value) => value,
};

match value_str.parse::<usize>() {
Err(_) => {
return Err(nom::Err::Error((input, ErrorKind::Verify)));
return Err(nom::Err::Error(nom::error::Error::new(
input,
ErrorKind::Verify,
)));
}
Ok(len) => {
content_length = Some(len);
Expand All @@ -101,14 +111,15 @@ pub fn headers(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, usize)

// TODO: Technically if we didn't find a `content-length` header, the record is invalid. Should
// we be returning an error here instead?
if content_length == None {
if content_length.is_none() {
content_length = Some(0);
}

Ok((input, (version, warc_headers, content_length.unwrap())))
}

/// Parse an entire WARC record.
#[allow(clippy::type_complexity)]
pub fn record(input: &[u8]) -> IResult<&[u8], (&str, Vec<(&str, &[u8])>, &[u8])> {
let (input, (headers, _)) = tuple((headers, line_ending))(input)?;
let (input, (body, _, _)) = tuple((take(headers.2), line_ending, line_ending))(input)?;
Expand All @@ -125,13 +136,13 @@ mod tests {

#[test]
fn version_parsing() {
assert_eq!(version(&b"WARC/0.0\r\n"[..]), Ok((&b""[..], &"0.0"[..])));
assert_eq!(version(&b"WARC/0.0\r\n"[..]), Ok((&b""[..], "0.0")));

assert_eq!(version(&b"WARC/1.0\r\n"[..]), Ok((&b""[..], &"1.0"[..])));
assert_eq!(version(&b"WARC/1.0\r\n"[..]), Ok((&b""[..], "1.0")));

assert_eq!(
version(&b"WARC/2.0-alpha\r\n"[..]),
Ok((&b""[..], &"2.0-alpha"[..]))
Ok((&b""[..], "2.0-alpha"))
);
}

Expand Down Expand Up @@ -168,7 +179,10 @@ mod tests {

assert_eq!(
headers(&raw_invalid[..]),
Err(Err::Error((&b"\r\n"[..], ErrorKind::Verify)))
Err(Err::Error(nom::error::Error::new(
&b"\r\n"[..],
ErrorKind::Verify
)))
);

let raw = b"\
Expand Down
9 changes: 4 additions & 5 deletions src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,8 @@ mod streaming_trait {
impl<'t, T: Read + 't> Read for StreamingBody<'t, T> {
fn read(&mut self, data: &mut [u8]) -> std::io::Result<usize> {
let max_read = std::cmp::min(data.len(), *self.1 as usize);
self.0.read(&mut data[..max_read as usize]).map(|n| {
self.0.read(&mut data[..max_read]).inspect(|&n| {
*self.1 -= n as u64;
n
})
}
}
Expand Down Expand Up @@ -156,7 +155,7 @@ impl std::fmt::Display for RawRecordHeader {
fn fmt(&self, w: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
writeln!(w, "WARC/{}", self.version)?;
for (key, value) in self.as_ref().iter() {
writeln!(w, "{}: {}", key.to_string(), String::from_utf8_lossy(value))?;
writeln!(w, "{}: {}", key, String::from_utf8_lossy(value))?;
}
writeln!(w)?;

Expand Down Expand Up @@ -263,7 +262,7 @@ impl<T: BodyKind> Record<T> {
/// The current implementation generates random values based on UUID version 4.
///
pub fn generate_record_id() -> String {
format!("<{}>", Uuid::new_v4().to_urn().to_string())
format!("<{}>", Uuid::new_v4().to_urn())
}

fn parse_content_length(len: &str) -> Result<u64, WarcError> {
Expand Down Expand Up @@ -1058,7 +1057,7 @@ mod raw_tests {

let output = headers.to_string();

let expected_lines = vec![
let expected_lines = [
"WARC/1.0",
"warc-type: dunno",
"warc-date: 2024-01-01T00:00:00Z",
Expand Down
8 changes: 5 additions & 3 deletions src/record_type.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#![allow(missing_docs)]

use std::fmt::Display;
#[derive(Clone, Debug, PartialEq)]
pub enum RecordType {
WarcInfo,
Expand All @@ -12,8 +14,8 @@ pub enum RecordType {
Unknown(String),
}

impl ToString for RecordType {
fn to_string(&self) -> String {
impl Display for RecordType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let stringified = match *self {
RecordType::WarcInfo => "warcinfo",
RecordType::Response => "response",
Expand All @@ -25,7 +27,7 @@ impl ToString for RecordType {
RecordType::Continuation => "continuation",
RecordType::Unknown(ref val) => val.as_ref(),
};
stringified.to_string()
f.write_str(stringified)
}
}

Expand Down
8 changes: 5 additions & 3 deletions src/truncated_type.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#![allow(missing_docs)]

use std::fmt::Display;
#[derive(Clone, Debug, PartialEq)]
pub enum TruncatedType {
Length,
Expand All @@ -8,16 +10,16 @@ pub enum TruncatedType {
Unknown(String),
}

impl ToString for TruncatedType {
fn to_string(&self) -> String {
impl Display for TruncatedType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let stringified = match *self {
TruncatedType::Length => "length",
TruncatedType::Time => "time",
TruncatedType::Disconnect => "disconnect",
TruncatedType::Unspecified => "unspecified",
TruncatedType::Unknown(ref val) => val.as_ref(),
};
stringified.to_string()
f.write_str(stringified)
}
}

Expand Down
Loading
Loading