Skip to content

Commit

Permalink
Add dedup summary
Browse files Browse the repository at this point in the history
  • Loading branch information
hkctkuy committed Nov 3, 2023
1 parent ccf5c63 commit 2e86a4a
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 10 deletions.
26 changes: 21 additions & 5 deletions casr/src/bin/casr-cluster.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,15 @@ fn stacktrace(path: &Path) -> Result<Stacktrace> {
///
/// # Return value
///
/// Number of clusters
fn make_clusters(inpath: &Path, outpath: Option<&Path>, jobs: usize, dedup: bool) -> Result<usize> {
/// * Number of clusters
/// * Number of valid casrep before crashiline deduplication
/// * Number of valid casrep after crashiline deduplication
fn make_clusters(
inpath: &Path,
outpath: Option<&Path>,
jobs: usize,
dedup: bool,
) -> Result<(usize, usize, usize)> {
// if outpath is "None" we consider that outpath and inpath are the same
let outpath = outpath.unwrap_or(inpath);
let dir = fs::read_dir(inpath).with_context(|| format!("File: {}", inpath.display()))?;
Expand Down Expand Up @@ -127,9 +134,13 @@ fn make_clusters(inpath: &Path, outpath: Option<&Path>, jobs: usize, dedup: bool
fs::create_dir_all(format!("{}/cl{}", &outpath.display(), i))?;
}

// Init before and after dedup counters
let before_cnt = casreps.len();
let mut after_cnt = before_cnt;

// Get clusters with crashline deduplication
if dedup {
dedup_crashlines(&crashlines, &mut clusters);
after_cnt = dedup_crashlines(&crashlines, &mut clusters);
}

for i in 0..clusters.len() {
Expand All @@ -147,7 +158,7 @@ fn make_clusters(inpath: &Path, outpath: Option<&Path>, jobs: usize, dedup: bool
),
)?;
}
Ok(cluster_cnt)
Ok((cluster_cnt, before_cnt, after_cnt))
}

/// Remove duplicate casreps
Expand Down Expand Up @@ -445,13 +456,18 @@ fn main() -> Result<()> {
} else if matches.contains_id("clustering") {
let paths: Vec<&PathBuf> = matches.get_many::<PathBuf>("clustering").unwrap().collect();

let result = make_clusters(
let (result, before, after) = make_clusters(
paths[0],
paths.get(1).map(|x| x.as_path()),
jobs,
dedup_crashlines,
)?;
println!("Number of clusters: {result}");
// print crashline dedup summary
if before != after {
println!("Number of reports before crashline deduplication: {before}");
println!("Number of reports after crashline deduplication: {after}");
}
} else if matches.contains_id("deduplication") {
let paths: Vec<&PathBuf> = matches
.get_many::<PathBuf>("deduplication")
Expand Down
28 changes: 27 additions & 1 deletion casr/tests/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2413,7 +2413,33 @@ fn test_casr_cluster_c() {

assert_eq!(clusters_cnt, 9, "Clusters count mismatch.");

// Check crashline deduplication:
// Check crashline deduplication
let re =
Regex::new(r"Number of reports before crashline deduplication: (?P<before>\d+)").unwrap();
let before_cnt = re
.captures(&res)
.unwrap()
.name("before")
.map(|x| x.as_str())
.unwrap()
.parse::<u32>()
.unwrap();

assert_eq!(before_cnt, 11, "Before count mismatch.");

let re =
Regex::new(r"Number of reports after crashline deduplication: (?P<after>\d+)").unwrap();
let after_cnt = re
.captures(&res)
.unwrap()
.name("after")
.map(|x| x.as_str())
.unwrap()
.parse::<u32>()
.unwrap();

assert_eq!(after_cnt, 10, "After count mismatch.");

// 2.casrep and 20.caserp without crashlines => no dedup
// 3.casrep and 30.caserp with crashlines => dedup
// Thus, cluster with 2.casrep has 2 casreps and others have 1 casrep
Expand Down
16 changes: 12 additions & 4 deletions libcasr/src/stacktrace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -259,25 +259,33 @@ pub fn cluster_stacktraces(stacktraces: &[Stacktrace]) -> Result<Vec<usize>> {
///
/// * 'clusters' - A vector of the same length as `crashlines`.
/// Vec\[i\] is the flat cluster number to which original casrep i belongs.
pub fn dedup_crashlines(crashlines: &[String], clusters: &mut [usize]) {
///
/// # Return value
///
/// Number of left casreps
pub fn dedup_crashlines(crashlines: &[String], clusters: &mut [usize]) -> usize {
// Count number of clusters
let cluster_num: usize = if !clusters.is_empty() {
*clusters.iter().max().unwrap()
} else {
return;
return 0;

Check warning on line 271 in libcasr/src/stacktrace.rs

View check run for this annotation

Codecov / codecov/patch

libcasr/src/stacktrace.rs#L271

Added line #L271 was not covered by tests
};
// Init dedup crashline list for each cluster
let mut unique_crashlines: Vec<HashSet<String>> = vec![HashSet::new(); cluster_num];

// Init unique crashline counter, e.i. left casrep
let mut unique_cnt = 0;
// Dedup reports by crashline
for (i, crashline) in crashlines.iter().enumerate() {
// Leave report in the cluster if crashline is absent
if crashline.is_empty() || unique_crashlines[clusters[i] - 1].insert(crashline.to_string())
{
continue;
unique_cnt += 1;
} else {
clusters[i] = 0;
}
clusters[i] = 0;
}
unique_cnt
}

/// Stack trace filtering trait.
Expand Down

0 comments on commit 2e86a4a

Please sign in to comment.