diff --git a/README.md b/README.md index cf59eb9..20151e7 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,10 @@ frequency and a measure of magnitude. The `-f` flag can be used for fractional weights, which can be integers or fractional numbers of the form `mm.nn`. +Negative weights are allowed. In the output, each entry is sorted by the +absolute value of its aggregate weight. This means that both large positive and +large negative entries will show up near the top. + Sometimes you want to group together lines that have different weights but are otherwise the same. The `-e` flag can be used to erase weights after applying them, by replacing them with `NNN`. Consider the following input. diff --git a/src/main.rs b/src/main.rs index fb0bb11..2d60fe5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -79,29 +79,29 @@ fn do_main() -> io::Result<()> { let erased_label = if erase { ", erased" } else { "" }; match weights { - Unit => process(readers, "", |line| (line, 1u64)), + Unit => process(readers, "", |line| (line, 1i64)), Integral => { - let re = Regex::new(r"(\d+)(\D*)$").unwrap(); + let re = Regex::new(r"(([+-]?)\d+)(\D*)$").unwrap(); process( readers, &format!(" (weighted integral{})", erased_label), |line| { if let Some(captures) = re.captures(&line) { - let weight = u64::from_str(&captures[1]).unwrap(); + let weight = i64::from_str(&captures[1]).unwrap(); let line = if erase { - re.replace(&line, "NNN${2}").to_string() + re.replace(&line, "NNN${3}").to_string() } else { line }; (line, weight) } else { - (line, 1u64) + (line, 1i64) } }, ) } Fractional => { - let re = Regex::new(r"(\d+(\.\d+)?)(\D*)$").unwrap(); + let re = Regex::new(r"(([+-]?)\d+(\.\d+)?)(\D*)$").unwrap(); process( readers, &format!(" (weighted fractional{})", erased_label), @@ -109,7 +109,7 @@ fn do_main() -> io::Result<()> { if let Some(captures) = re.captures(&line) { let weight = f64::from_str(&captures[1]).unwrap(); let line = if erase { - re.replace(&line, "NNN${3}").to_string() + re.replace(&line, "NNN${4}").to_string() } else { line }; @@ -123,7 +123,7 @@ fn do_main() -> io::Result<()> { } } -// `N` is either `u64` or `f64`, and `f64` values are always of the form +// `N` is either `i64` or `f64`, and `f64` values are always of the form // `mm.nn` so NaNs can't occur and the `PartialOrd` is actually infallible. fn process( readers: Vec>, @@ -132,7 +132,7 @@ fn process( ) -> io::Result<()> where F: Fn(String) -> (String, N), - N: AddAssign + Display + From + IntoF64 + PartialOrd, + N: Total, { let mut counts: FxHashMap = FxHashMap::default(); let mut total = N::from(0u32); @@ -151,7 +151,7 @@ where // sort them in alphabetical order. let mut counts: Vec<_> = counts.iter().collect(); counts.sort_unstable_by(|(line1, n1), (line2, n2)| { - (n2, line1).partial_cmp(&(n1, line2)).unwrap() + (n2.abs(), line1).partial_cmp(&(n1.abs(), line2)).unwrap() }); writeln!(io::stdout(), "{:.1} counts{}", total, label)?; @@ -174,25 +174,35 @@ where Ok(()) } -/// `f64` doesn't impl `From` or `TryFrom`, so we do it ourselves. We -/// are unlikely to see `u64` values that are so big that they cannot be -/// represented as `f64`s, so we make this infallible. -pub trait IntoF64: Copy { +pub trait Total: AddAssign + Copy + Display + From + PartialOrd { + /// `f64` doesn't impl `From` or `TryFrom`, so we do it + /// ourselves. We are unlikely to see `i64` values that are so big that + /// they cannot be represented as `f64`s, so we make this infallible. fn into_f64(self) -> f64; + + fn abs(self) -> Self; } -impl IntoF64 for f64 { +impl Total for f64 { fn into_f64(self) -> f64 { self } + + fn abs(self) -> f64 { + self.abs() + } } -impl IntoF64 for u64 { +impl Total for i64 { fn into_f64(self) -> f64 { let f = self as f64; - if f as u64 != self { - panic!("u64 too big to convert to f64") + if f as i64 != self { + panic!("i64 too big to convert to f64") } f } + + fn abs(self) -> i64 { + self.abs() + } } diff --git a/tests/cli.rs b/tests/cli.rs index 363a77f..b39498d 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -175,7 +175,60 @@ def (0.1%) run_tests(input, tests) } -fn run_tests(input: &str, tests: Vec<(Vec<&str>, &str)>) -> Result<(), Box> { +#[test] +fn signed_integral() -> Result<(), Box> { + let input = "\ +foo +3 +foo 4 +foo 5 +bar + -9 +bar + -10 +baz 23 - 2 +baz 23 - +1 +"; + + let tests = vec![( + vec!["-i", "-e"], + "\ +-4 counts (weighted integral, erased) +( 1) -19 (475.0%,475.0%): bar + NNN +( 2) 12 (-300.0%,175.0%): foo NNN +( 3) 3 (-75.0%,100.0%): baz 23 - NNN +", + )]; + + run_tests(input, tests) +} + +#[test] +fn signed_fractional() -> Result<(), Box> { + let input = "\ +foo +3.3 +foo 4.4 +foo 5.5 +bar + -6.6 +bar + -7.0 +baz 23 - 2 +baz 23 - +1 +"; + + let tests = vec![( + vec!["-f", "-e"], + "\ +2.6 counts (weighted fractional, erased) +( 1) -13.6 (-523.1%,-523.1%): bar + NNN +( 2) 13.2 (507.7%,-15.4%): foo NNN +( 3) 3.0 (115.4%,100.0%): baz 23 - NNN +", + )]; + + run_tests(input, tests) +} + +fn run_tests( + input: &'static str, + tests: Vec<(Vec<&'static str>, &'static str)>, +) -> Result<(), Box> { for (options, expected_output) in tests { let mut file = NamedTempFile::new()?; write!(file, "{}", input)?;