Skip to content

Commit

Permalink
Allow negative weights.
Browse files Browse the repository at this point in the history
  • Loading branch information
nnethercote committed Feb 25, 2022
1 parent 8443819 commit 6bbf2aa
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 19 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ frequency and a measure of magnitude.
The `-f` flag can be used for fractional weights, which can be integers or
fractional numbers of the form `mm.nn`.

Negative weights are allowed. In the output, each entry is sorted by the
absolute value of its aggregate weight. This means that both large positive and
large negative entries will show up near the top.

Sometimes you want to group together lines that have different weights but are
otherwise the same. The `-e` flag can be used to erase weights after applying
them, by replacing them with `NNN`. Consider the following input.
Expand Down
46 changes: 28 additions & 18 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,37 +79,37 @@ fn do_main() -> io::Result<()> {

let erased_label = if erase { ", erased" } else { "" };
match weights {
Unit => process(readers, "", |line| (line, 1u64)),
Unit => process(readers, "", |line| (line, 1i64)),
Integral => {
let re = Regex::new(r"(\d+)(\D*)$").unwrap();
let re = Regex::new(r"(([+-]?)\d+)(\D*)$").unwrap();
process(
readers,
&format!(" (weighted integral{})", erased_label),
|line| {
if let Some(captures) = re.captures(&line) {
let weight = u64::from_str(&captures[1]).unwrap();
let weight = i64::from_str(&captures[1]).unwrap();
let line = if erase {
re.replace(&line, "NNN${2}").to_string()
re.replace(&line, "NNN${3}").to_string()
} else {
line
};
(line, weight)
} else {
(line, 1u64)
(line, 1i64)
}
},
)
}
Fractional => {
let re = Regex::new(r"(\d+(\.\d+)?)(\D*)$").unwrap();
let re = Regex::new(r"(([+-]?)\d+(\.\d+)?)(\D*)$").unwrap();
process(
readers,
&format!(" (weighted fractional{})", erased_label),
|line| {
if let Some(captures) = re.captures(&line) {
let weight = f64::from_str(&captures[1]).unwrap();
let line = if erase {
re.replace(&line, "NNN${3}").to_string()
re.replace(&line, "NNN${4}").to_string()
} else {
line
};
Expand All @@ -123,7 +123,7 @@ fn do_main() -> io::Result<()> {
}
}

// `N` is either `u64` or `f64`, and `f64` values are always of the form
// `N` is either `i64` or `f64`, and `f64` values are always of the form
// `mm.nn` so NaNs can't occur and the `PartialOrd` is actually infallible.
fn process<F, N>(
readers: Vec<Box<dyn BufRead>>,
Expand All @@ -132,7 +132,7 @@ fn process<F, N>(
) -> io::Result<()>
where
F: Fn(String) -> (String, N),
N: AddAssign + Display + From<u32> + IntoF64 + PartialOrd,
N: Total,
{
let mut counts: FxHashMap<String, N> = FxHashMap::default();
let mut total = N::from(0u32);
Expand All @@ -151,7 +151,7 @@ where
// sort them in alphabetical order.
let mut counts: Vec<_> = counts.iter().collect();
counts.sort_unstable_by(|(line1, n1), (line2, n2)| {
(n2, line1).partial_cmp(&(n1, line2)).unwrap()
(n2.abs(), line1).partial_cmp(&(n1.abs(), line2)).unwrap()
});

writeln!(io::stdout(), "{:.1} counts{}", total, label)?;
Expand All @@ -174,25 +174,35 @@ where
Ok(())
}

/// `f64` doesn't impl `From<u64>` or `TryFrom<u64>`, so we do it ourselves. We
/// are unlikely to see `u64` values that are so big that they cannot be
/// represented as `f64`s, so we make this infallible.
pub trait IntoF64: Copy {
pub trait Total: AddAssign + Copy + Display + From<u32> + PartialOrd {
/// `f64` doesn't impl `From<i64>` or `TryFrom<i64>`, so we do it
/// ourselves. We are unlikely to see `i64` values that are so big that
/// they cannot be represented as `f64`s, so we make this infallible.
fn into_f64(self) -> f64;

fn abs(self) -> Self;
}

impl IntoF64 for f64 {
impl Total for f64 {
fn into_f64(self) -> f64 {
self
}

fn abs(self) -> f64 {
self.abs()
}
}

impl IntoF64 for u64 {
impl Total for i64 {
fn into_f64(self) -> f64 {
let f = self as f64;
if f as u64 != self {
panic!("u64 too big to convert to f64")
if f as i64 != self {
panic!("i64 too big to convert to f64")
}
f
}

fn abs(self) -> i64 {
self.abs()
}
}
55 changes: 54 additions & 1 deletion tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,60 @@ def (0.1%)
run_tests(input, tests)
}

fn run_tests(input: &str, tests: Vec<(Vec<&str>, &str)>) -> Result<(), Box<dyn std::error::Error>> {
#[test]
fn signed_integral() -> Result<(), Box<dyn std::error::Error>> {
let input = "\
foo +3
foo 4
foo 5
bar + -9
bar + -10
baz 23 - 2
baz 23 - +1
";

let tests = vec![(
vec!["-i", "-e"],
"\
-4 counts (weighted integral, erased)
( 1) -19 (475.0%,475.0%): bar + NNN
( 2) 12 (-300.0%,175.0%): foo NNN
( 3) 3 (-75.0%,100.0%): baz 23 - NNN
",
)];

run_tests(input, tests)
}

#[test]
fn signed_fractional() -> Result<(), Box<dyn std::error::Error>> {
let input = "\
foo +3.3
foo 4.4
foo 5.5
bar + -6.6
bar + -7.0
baz 23 - 2
baz 23 - +1
";

let tests = vec![(
vec!["-f", "-e"],
"\
2.6 counts (weighted fractional, erased)
( 1) -13.6 (-523.1%,-523.1%): bar + NNN
( 2) 13.2 (507.7%,-15.4%): foo NNN
( 3) 3.0 (115.4%,100.0%): baz 23 - NNN
",
)];

run_tests(input, tests)
}

fn run_tests(
input: &'static str,
tests: Vec<(Vec<&'static str>, &'static str)>,
) -> Result<(), Box<dyn std::error::Error>> {
for (options, expected_output) in tests {
let mut file = NamedTempFile::new()?;
write!(file, "{}", input)?;
Expand Down

0 comments on commit 6bbf2aa

Please sign in to comment.