From dd9f727ed92688035c95934446fb0c6e44781817 Mon Sep 17 00:00:00 2001 From: Ilya Yegorov Date: Mon, 6 Nov 2023 10:15:22 +0300 Subject: [PATCH] [casr-cluster] Add dedup by crashline for each cluster (#170) --- casr/src/bin/casr-cluster.rs | 81 ++++++- .../casrep/test_clustering_small/20.casrep | 214 +++++++++++++++++ .../casrep/test_clustering_small/3.casrep | 1 + .../casrep/test_clustering_small/30.casrep | 217 ++++++++++++++++++ casr/tests/tests.rs | 42 ++++ docs/usage.md | 7 + libcasr/src/report.rs | 2 +- libcasr/src/stacktrace.rs | 46 +++- 8 files changed, 594 insertions(+), 16 deletions(-) create mode 100644 casr/tests/casr_tests/casrep/test_clustering_small/20.casrep create mode 100644 casr/tests/casr_tests/casrep/test_clustering_small/30.casrep diff --git a/casr/src/bin/casr-cluster.rs b/casr/src/bin/casr-cluster.rs index 66bffa3c..5b367953 100644 --- a/casr/src/bin/casr-cluster.rs +++ b/casr/src/bin/casr-cluster.rs @@ -2,7 +2,7 @@ use casr::util; use libcasr::{init_ignored_frames, stacktrace::*}; use anyhow::{bail, Context, Result}; -use clap::{Arg, ArgAction}; +use clap::{builder::FalseyValueParser, Arg, ArgAction}; use rayon::iter::{IndexedParallelIterator, ParallelIterator}; use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator}; @@ -11,7 +11,7 @@ use std::fs; use std::path::{Path, PathBuf}; use std::sync::RwLock; -/// Extract stack trace from casr (casr-san/casr-gdb) report +/// Extract stack trace from casr report /// /// # Arguments /// @@ -37,10 +37,19 @@ fn stacktrace(path: &Path) -> Result { /// /// * `jobs` - number of jobs for clustering process /// +/// * `dedup` - deduplicate casrep by crashline for each cluster, if true +/// /// # Return value /// -/// Number of clusters -fn make_clusters(inpath: &Path, outpath: Option<&Path>, jobs: usize) -> Result { +/// * Number of clusters +/// * Number of valid casrep before crashiline deduplication +/// * Number of valid casrep after crashiline deduplication +fn make_clusters( + inpath: &Path, + outpath: Option<&Path>, + jobs: usize, + dedup: bool, +) -> Result<(usize, usize, usize)> { // if outpath is "None" we consider that outpath and inpath are the same let outpath = outpath.unwrap_or(inpath); let dir = fs::read_dir(inpath).with_context(|| format!("File: {}", inpath.display()))?; @@ -70,21 +79,31 @@ fn make_clusters(inpath: &Path, outpath: Option<&Path>, jobs: usize) -> Result> = RwLock::new(Vec::new()); + // Crashlines from casreps + let crashlines: RwLock> = RwLock::new(Vec::new()); // Casreps with stacktraces, that we can parse let filtered_casreps: RwLock> = RwLock::new(Vec::new()); // Casreps with stacktraces, that we cannot parse let mut badreports: RwLock> = RwLock::new(Vec::new()); custom_pool.install(|| { (0..len).into_par_iter().for_each(|i| { - if let Ok(trace) = stacktrace(casreps[i].as_path()) { - traces.write().unwrap().push(trace); - filtered_casreps.write().unwrap().push(casreps[i].clone()); + if let Ok(report) = util::report_from_file(casreps[i].as_path()) { + if let Ok(trace) = report.filtered_stacktrace() { + traces.write().unwrap().push(trace); + filtered_casreps.write().unwrap().push(casreps[i].clone()); + if dedup { + crashlines.write().unwrap().push(report.crashline); + } + } else { + badreports.write().unwrap().push(casreps[i].clone()); + } } else { badreports.write().unwrap().push(casreps[i].clone()); } }) }); let stacktraces = traces.read().unwrap(); + let crashlines = crashlines.read().unwrap(); let casreps = filtered_casreps.read().unwrap(); let badreports = badreports.get_mut().unwrap(); @@ -106,14 +125,29 @@ fn make_clusters(inpath: &Path, outpath: Option<&Path>, jobs: usize) -> Result, jobs: usize) -> Result Result<()> { reports in this directory will not be deleted.", ), ) + .arg( + Arg::new("unique-crashline") + .long("unique-crashline") + .env("CASR_CLUSTER_UNIQUE_CRASHLINE") + .action(ArgAction::SetTrue) + .value_parser(FalseyValueParser::new()) + .help("Leave reports with unique crash lines in each cluster") + ) .arg( Arg::new("deduplication") .short('d') @@ -387,17 +429,24 @@ fn main() -> Result<()> { .value_parser(clap::value_parser!(u32).range(1..)) ) .get_matches(); + init_ignored_frames!("cpp", "rust", "python", "go", "java"); + // Get number of threads let jobs = if let Some(jobs) = matches.get_one::("jobs") { *jobs as usize } else { std::cmp::max(1, num_cpus::get() / 2) }; + // Get ignore path if let Some(path) = matches.get_one::("ignore") { util::add_custom_ignored_frames(path)?; } + + // Get env var + let dedup_crashlines = matches.get_flag("unique-crashline"); + if matches.contains_id("similarity") { let casreps: Vec<&PathBuf> = matches.get_many::("similarity").unwrap().collect(); println!( @@ -407,8 +456,18 @@ fn main() -> Result<()> { } else if matches.contains_id("clustering") { let paths: Vec<&PathBuf> = matches.get_many::("clustering").unwrap().collect(); - let result = make_clusters(paths[0], paths.get(1).map(|x| x.as_path()), jobs)?; + let (result, before, after) = make_clusters( + paths[0], + paths.get(1).map(|x| x.as_path()), + jobs, + dedup_crashlines, + )?; println!("Number of clusters: {result}"); + // Print crashline dedup summary + if before != after { + println!("Number of reports before crashline deduplication: {before}"); + println!("Number of reports after crashline deduplication: {after}"); + } } else if matches.contains_id("deduplication") { let paths: Vec<&PathBuf> = matches .get_many::("deduplication") diff --git a/casr/tests/casr_tests/casrep/test_clustering_small/20.casrep b/casr/tests/casr_tests/casrep/test_clustering_small/20.casrep new file mode 100644 index 00000000..7b620ee4 --- /dev/null +++ b/casr/tests/casr_tests/casrep/test_clustering_small/20.casrep @@ -0,0 +1,214 @@ +{ + "Date": "2021-07-14T19:53:11.220700+03:00", + "Uname": "Linux titanfall 5.8.0-59-generic #66~20.04.1-Ubuntu SMP Thu Jun 17 11:14:10 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux", + "OS": "Ubuntu", + "OSRelease": "20.04", + "Architecture": "amd64", + "ExecutablePath": "/usr/local/bin/thumbnail", + "ProcEnviron": [ + "SHELL=/bin/zsh", + "COLORTERM=truecolor", + "SUDO_GID=1000", + "LC_ADDRESS=ru_RU.UTF-8", + "LC_NAME=ru_RU.UTF-8", + "SUDO_COMMAND=./scrypt.sh", + "LC_MONETARY=ru_RU.UTF-8", + "SUDO_USER=avgor46", + "PWD=/home/avgor46/testdoc", + "LOGNAME=root", + "XAUTHORITY=/run/user/1000/gdm/Xauthority", + "HOME=/root", + "LC_PAPER=ru_RU.UTF-8", + "LANG=en_US.UTF-8", + "LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:", + "TERM=xterm-256color", + "LC_IDENTIFICATION=ru_RU.UTF-8", + "USER=root", + "DISPLAY=:0", + "SHLVL=1", + "LC_TELEPHONE=ru_RU.UTF-8", + "LC_MEASUREMENT=ru_RU.UTF-8", + "LC_TIME=ru_RU.UTF-8", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin", + "SUDO_UID=1000", + "MAIL=/var/mail/root", + "LC_NUMERIC=ru_RU.UTF-8", + "_=/home/avgor46/sydr/build/caesar", + "LC_ALL=C", + "LINES=60", + "COLUMNS=204" + ], + "ProcCmdline": "thumbnail ./fuz2thumbnail/main/crashes/id:000015,sig:11,src:000072,time:1869165,op:havoc,rep:8 crash.tiff", + "ProcStatus": [ + "process 2943495", + "Name:\tthumbnail", + "Umask:\t0022", + "State:\tt (tracing stop)", + "Tgid:\t2943495", + "Ngid:\t0", + "Pid:\t2943495", + "PPid:\t2943493", + "TracerPid:\t2943493", + "Uid:\t0\t0\t0\t0", + "Gid:\t0\t0\t0\t0", + "FDSize:\t64", + "Groups:\t0 ", + "NStgid:\t2943495", + "NSpid:\t2943495", + "NSpgid:\t2943495", + "NSsid:\t2286199", + "VmPeak:\t 450428 kB", + "VmSize:\t 450428 kB", + "VmLck:\t 0 kB", + "VmPin:\t 0 kB", + "VmHWM:\t 3396 kB", + "VmRSS:\t 3396 kB", + "RssAnon:\t 1200 kB", + "RssFile:\t 2196 kB", + "RssShmem:\t 0 kB", + "VmData:\t 443604 kB", + "VmStk:\t 132 kB", + "VmExe:\t 8 kB", + "VmLib:\t 2916 kB", + "VmPTE:\t 52 kB", + "VmSwap:\t 0 kB", + "HugetlbPages:\t 0 kB", + "CoreDumping:\t0", + "THP_enabled:\t1", + "Threads:\t1", + "SigQ:\t0/127573", + "SigPnd:\t0000000000000000", + "ShdPnd:\t0000000000000000", + "SigBlk:\t0000000000000000", + "SigIgn:\t0000000000000000", + "SigCgt:\t0000000000000000", + "CapInh:\t0000000000000000", + "CapPrm:\t000000ffffffffff", + "CapEff:\t000000ffffffffff", + "CapBnd:\t000000ffffffffff", + "CapAmb:\t0000000000000000", + "NoNewPrivs:\t0", + "Seccomp:\t0", + "Speculation_Store_Bypass:\tthread vulnerable", + "Cpus_allowed:\tfff", + "Cpus_allowed_list:\t0-11", + "Mems_allowed:\t00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001", + "Mems_allowed_list:\t0", + "voluntary_ctxt_switches:\t5", + "nonvoluntary_ctxt_switches:\t0" + ], + "ProcMaps": [ + " 0x555555554000 0x555555556000 0x2000 0x0 /usr/local/bin/thumbnail", + " 0x555555556000 0x555555558000 0x2000 0x2000 /usr/local/bin/thumbnail", + " 0x555555558000 0x555555559000 0x1000 0x4000 /usr/local/bin/thumbnail", + " 0x555555559000 0x55555555a000 0x1000 0x4000 /usr/local/bin/thumbnail", + " 0x55555555a000 0x55555555b000 0x1000 0x5000 /usr/local/bin/thumbnail", + " 0x55555555b000 0x555555675000 0x11a000 0x0 [heap]", + " 0x7fffdc941000 0x7ffff7949000 0x1b008000 0x0 ", + " 0x7ffff7949000 0x7ffff794b000 0x2000 0x0 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff794b000 0x7ffff795c000 0x11000 0x2000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff795c000 0x7ffff7962000 0x6000 0x13000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7962000 0x7ffff7963000 0x1000 0x19000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7963000 0x7ffff7964000 0x1000 0x19000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7964000 0x7ffff7965000 0x1000 0x1a000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7965000 0x7ffff7969000 0x4000 0x0 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7969000 0x7ffff79ad000 0x44000 0x4000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff79ad000 0x7ffff79e7000 0x3a000 0x48000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff79e7000 0x7ffff79e8000 0x1000 0x82000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff79e8000 0x7ffff79e9000 0x1000 0x82000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff79e9000 0x7ffff79ea000 0x1000 0x83000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff79ea000 0x7ffff79f5000 0xb000 0x0 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff79f5000 0x7ffff7bf4000 0x1ff000 0xb000 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7bf4000 0x7ffff7bf5000 0x1000 0xa000 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7bf5000 0x7ffff7bf8000 0x3000 0xb000 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7bf8000 0x7ffff7c1d000 0x25000 0x0 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7c1d000 0x7ffff7d95000 0x178000 0x25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7d95000 0x7ffff7ddf000 0x4a000 0x19d000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7ddf000 0x7ffff7de0000 0x1000 0x1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7de0000 0x7ffff7de3000 0x3000 0x1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7de3000 0x7ffff7de6000 0x3000 0x1ea000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7de6000 0x7ffff7dea000 0x4000 0x0 ", + " 0x7ffff7dea000 0x7ffff7df9000 0xf000 0x0 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7df9000 0x7ffff7ea0000 0xa7000 0xf000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7ea0000 0x7ffff7f37000 0x97000 0xb6000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7f37000 0x7ffff7f38000 0x1000 0x14c000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7f38000 0x7ffff7f39000 0x1000 0x14d000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7f39000 0x7ffff7f41000 0x8000 0x0 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7f41000 0x7ffff7f76000 0x35000 0x8000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7f76000 0x7ffff7f9f000 0x29000 0x3d000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7f9f000 0x7ffff7fa0000 0x1000 0x66000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7fa0000 0x7ffff7fa2000 0x2000 0x66000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7fa2000 0x7ffff7fa3000 0x1000 0x68000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7fa3000 0x7ffff7fa5000 0x2000 0x0 ", + " 0x7ffff7fc5000 0x7ffff7fc9000 0x4000 0x0 /home/avgor46/testdoc/fuz2thumbnail/main/crashes/id:000015,sig:11,src:000072,time:1869165,op:havoc,rep:8", + " 0x7ffff7fc9000 0x7ffff7fcd000 0x4000 0x0 [vvar]", + " 0x7ffff7fcd000 0x7ffff7fcf000 0x2000 0x0 [vdso]", + " 0x7ffff7fcf000 0x7ffff7fd0000 0x1000 0x0 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7fd0000 0x7ffff7ff3000 0x23000 0x1000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ff3000 0x7ffff7ffb000 0x8000 0x24000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ffc000 0x7ffff7ffd000 0x1000 0x2c000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ffd000 0x7ffff7ffe000 0x1000 0x2d000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ffe000 0x7ffff7fff000 0x1000 0x0 ", + " 0x7ffffffde000 0x7ffffffff000 0x21000 0x0 [stack]", + " 0xffffffffff600000 0xffffffffff601000 0x1000 0x0 [vsyscall]" + ], + "CrashSeverity": { + "Type": "NOT_CRITICAL", + "ShortDescription": "AccessViolation", + "Description": "Access violation", + "Explanation": "The target crashed due to an access violation but there is not enough additional information available to determine crash severity." + }, + "Stacktrace": [ + "#0 __memmove_avx_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:383", + "#1 0x00007ffff7f70bcb in TIFFReadRawStrip1 (tif=0x55555555bbf0, strip=0, buf=0x7fffdc941010, size=453000805, module=0x7ffff7f96a30 \"TIFFReadRawStrip\") at tif_read.c:208", + "#2 0x0000555555556e6e in cpStrips (out=0x55555555b2a0, in=0x55555555bbf0) at thumbnail.c:279", + "#3 cpIFD (out=, in=) at thumbnail.c:337", + "#4 main (argc=, argv=) at thumbnail.c:116" + ], + "Prstatus": { + "registers": { + "cs": 51, + "ds": 0, + "eflags": 66199, + "es": 0, + "fs": 0, + "gs": 0, + "r10": 34, + "r11": 582, + "r12": 453000805, + "r13": 140736894078992, + "r14": 140737353706032, + "r15": 140736894078992, + "r8": 140737353706032, + "r9": 93824993281112, + "rax": 140736894078992, + "rbp": 0, + "rbx": 93824992263152, + "rcx": 453000805, + "rdi": 140736894078992, + "rdx": 453000805, + "rip": 140737351542752, + "rsi": 140741195878594, + "rsp": 140737488347784, + "ss": 43 + } + }, + "Disassembly": [ + "=> 0x7ffff7d867e0 <__memmove_avx_unaligned_erms+368>:\tvmovdqu ymm4,YMMWORD PTR [rsi]", + " 0x7ffff7d867e4 <__memmove_avx_unaligned_erms+372>:\tvmovdqu ymm5,YMMWORD PTR [rsi+rdx*1-0x20]", + " 0x7ffff7d867ea <__memmove_avx_unaligned_erms+378>:\tvmovdqu ymm6,YMMWORD PTR [rsi+rdx*1-0x40]", + " 0x7ffff7d867f0 <__memmove_avx_unaligned_erms+384>:\tvmovdqu ymm7,YMMWORD PTR [rsi+rdx*1-0x60]", + " 0x7ffff7d867f6 <__memmove_avx_unaligned_erms+390>:\tvmovdqu ymm8,YMMWORD PTR [rsi+rdx*1-0x80]", + " 0x7ffff7d867fc <__memmove_avx_unaligned_erms+396>:\tmov r11,rdi", + " 0x7ffff7d867ff <__memmove_avx_unaligned_erms+399>:\tlea rcx,[rdi+rdx*1-0x20]", + " 0x7ffff7d86804 <__memmove_avx_unaligned_erms+404>:\tmov r8,rdi", + " 0x7ffff7d86807 <__memmove_avx_unaligned_erms+407>:\tand r8,0x1f", + " 0x7ffff7d8680b <__memmove_avx_unaligned_erms+411>:\tsub r8,0x20", + " 0x7ffff7d8680f <__memmove_avx_unaligned_erms+415>:\tsub rsi,r8", + " 0x7ffff7d86812 <__memmove_avx_unaligned_erms+418>:\tsub rdi,r8", + " 0x7ffff7d86815 <__memmove_avx_unaligned_erms+421>:\tadd rdx,r8", + " 0x7ffff7d86818 <__memmove_avx_unaligned_erms+424>:\tcmp rdx,QWORD PTR [rip+0x62979] # 0x7ffff7de9198 <__x86_shared_non_temporal_threshold>", + " 0x7ffff7d8681f <__memmove_avx_unaligned_erms+431>:\tja 0x7ffff7d8692c <__memmove_avx_unaligned_erms+700>", + " 0x7ffff7d86825 <__memmove_avx_unaligned_erms+437>:\tvmovdqu ymm0,YMMWORD PTR [rsi]" + ] +} diff --git a/casr/tests/casr_tests/casrep/test_clustering_small/3.casrep b/casr/tests/casr_tests/casrep/test_clustering_small/3.casrep index 5f776d67..88e5dc86 100644 --- a/casr/tests/casr_tests/casrep/test_clustering_small/3.casrep +++ b/casr/tests/casr_tests/casrep/test_clustering_small/3.casrep @@ -167,6 +167,7 @@ "#5 t2p_write_pdf (t2p=0x5555555672a0, input=0x555555567ea0, output=0x555555568f10) at tiff2pdf.c:5133", "#6 0x00005555555568d4 in main (argc=, argv=) at tiff2pdf.c:763" ], + "CrashLine": "malloc.c:1466", "Prstatus": { "registers": { "cs": 51, diff --git a/casr/tests/casr_tests/casrep/test_clustering_small/30.casrep b/casr/tests/casr_tests/casrep/test_clustering_small/30.casrep new file mode 100644 index 00000000..88e5dc86 --- /dev/null +++ b/casr/tests/casr_tests/casrep/test_clustering_small/30.casrep @@ -0,0 +1,217 @@ +{ + "Date": "2021-07-14T19:56:06.484123+03:00", + "Uname": "Linux titanfall 5.8.0-59-generic #66~20.04.1-Ubuntu SMP Thu Jun 17 11:14:10 UTC 2021 x86_64 x86_64 x86_64 GNU/Linux", + "OS": "Ubuntu", + "OSRelease": "20.04", + "Architecture": "amd64", + "ExecutablePath": "/usr/local/bin/tiff2pdf", + "ProcEnviron": [ + "SHELL=/bin/zsh", + "COLORTERM=truecolor", + "SUDO_GID=1000", + "LC_ADDRESS=ru_RU.UTF-8", + "LC_NAME=ru_RU.UTF-8", + "SUDO_COMMAND=./scrypt.sh", + "LC_MONETARY=ru_RU.UTF-8", + "SUDO_USER=avgor46", + "PWD=/home/avgor46/testdoc", + "LOGNAME=root", + "XAUTHORITY=/run/user/1000/gdm/Xauthority", + "HOME=/root", + "LC_PAPER=ru_RU.UTF-8", + "LANG=en_US.UTF-8", + "LS_COLORS=rs=0:di=01;34:ln=01;36:mh=00:pi=40;33:so=01;35:do=01;35:bd=40;33;01:cd=40;33;01:or=40;31;01:mi=00:su=37;41:sg=30;43:ca=30;41:tw=30;42:ow=34;42:st=37;44:ex=01;32:*.tar=01;31:*.tgz=01;31:*.arc=01;31:*.arj=01;31:*.taz=01;31:*.lha=01;31:*.lz4=01;31:*.lzh=01;31:*.lzma=01;31:*.tlz=01;31:*.txz=01;31:*.tzo=01;31:*.t7z=01;31:*.zip=01;31:*.z=01;31:*.dz=01;31:*.gz=01;31:*.lrz=01;31:*.lz=01;31:*.lzo=01;31:*.xz=01;31:*.zst=01;31:*.tzst=01;31:*.bz2=01;31:*.bz=01;31:*.tbz=01;31:*.tbz2=01;31:*.tz=01;31:*.deb=01;31:*.rpm=01;31:*.jar=01;31:*.war=01;31:*.ear=01;31:*.sar=01;31:*.rar=01;31:*.alz=01;31:*.ace=01;31:*.zoo=01;31:*.cpio=01;31:*.7z=01;31:*.rz=01;31:*.cab=01;31:*.wim=01;31:*.swm=01;31:*.dwm=01;31:*.esd=01;31:*.jpg=01;35:*.jpeg=01;35:*.mjpg=01;35:*.mjpeg=01;35:*.gif=01;35:*.bmp=01;35:*.pbm=01;35:*.pgm=01;35:*.ppm=01;35:*.tga=01;35:*.xbm=01;35:*.xpm=01;35:*.tif=01;35:*.tiff=01;35:*.png=01;35:*.svg=01;35:*.svgz=01;35:*.mng=01;35:*.pcx=01;35:*.mov=01;35:*.mpg=01;35:*.mpeg=01;35:*.m2v=01;35:*.mkv=01;35:*.webm=01;35:*.ogm=01;35:*.mp4=01;35:*.m4v=01;35:*.mp4v=01;35:*.vob=01;35:*.qt=01;35:*.nuv=01;35:*.wmv=01;35:*.asf=01;35:*.rm=01;35:*.rmvb=01;35:*.flc=01;35:*.avi=01;35:*.fli=01;35:*.flv=01;35:*.gl=01;35:*.dl=01;35:*.xcf=01;35:*.xwd=01;35:*.yuv=01;35:*.cgm=01;35:*.emf=01;35:*.ogv=01;35:*.ogx=01;35:*.aac=00;36:*.au=00;36:*.flac=00;36:*.m4a=00;36:*.mid=00;36:*.midi=00;36:*.mka=00;36:*.mp3=00;36:*.mpc=00;36:*.ogg=00;36:*.ra=00;36:*.wav=00;36:*.oga=00;36:*.opus=00;36:*.spx=00;36:*.xspf=00;36:", + "TERM=xterm-256color", + "LC_IDENTIFICATION=ru_RU.UTF-8", + "USER=root", + "DISPLAY=:0", + "SHLVL=1", + "LC_TELEPHONE=ru_RU.UTF-8", + "LC_MEASUREMENT=ru_RU.UTF-8", + "LC_TIME=ru_RU.UTF-8", + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/bin", + "SUDO_UID=1000", + "MAIL=/var/mail/root", + "LC_NUMERIC=ru_RU.UTF-8", + "_=/home/avgor46/sydr/build/caesar", + "LC_ALL=C", + "LINES=60", + "COLUMNS=204" + ], + "ProcCmdline": "tiff2pdf ./fuz3tiff2pdf/main/crashes/id:000002,sig:06,src:000000,time:1940,op:havoc,rep:2", + "ProcStatus": [ + "process 2943807", + "Name:\ttiff2pdf", + "Umask:\t0022", + "State:\tt (tracing stop)", + "Tgid:\t2943807", + "Ngid:\t0", + "Pid:\t2943807", + "PPid:\t2943805", + "TracerPid:\t2943805", + "Uid:\t0\t0\t0\t0", + "Gid:\t0\t0\t0\t0", + "FDSize:\t64", + "Groups:\t0 ", + "NStgid:\t2943807", + "NSpid:\t2943807", + "NSpgid:\t2943807", + "NSsid:\t2286199", + "VmPeak:\t 9144 kB", + "VmSize:\t 9144 kB", + "VmLck:\t 0 kB", + "VmPin:\t 0 kB", + "VmHWM:\t 2640 kB", + "VmRSS:\t 2640 kB", + "RssAnon:\t 208 kB", + "RssFile:\t 2432 kB", + "RssShmem:\t 0 kB", + "VmData:\t 2284 kB", + "VmStk:\t 132 kB", + "VmExe:\t 44 kB", + "VmLib:\t 2916 kB", + "VmPTE:\t 44 kB", + "VmSwap:\t 0 kB", + "HugetlbPages:\t 0 kB", + "CoreDumping:\t0", + "THP_enabled:\t1", + "Threads:\t1", + "SigQ:\t0/127573", + "SigPnd:\t0000000000000000", + "ShdPnd:\t0000000000000000", + "SigBlk:\t0000000000000000", + "SigIgn:\t0000000000000000", + "SigCgt:\t0000000000000000", + "CapInh:\t0000000000000000", + "CapPrm:\t000000ffffffffff", + "CapEff:\t000000ffffffffff", + "CapBnd:\t000000ffffffffff", + "CapAmb:\t0000000000000000", + "NoNewPrivs:\t0", + "Seccomp:\t0", + "Speculation_Store_Bypass:\tthread vulnerable", + "Cpus_allowed:\tfff", + "Cpus_allowed_list:\t0-11", + "Mems_allowed:\t00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000000,00000001", + "Mems_allowed_list:\t0", + "voluntary_ctxt_switches:\t5", + "nonvoluntary_ctxt_switches:\t0" + ], + "ProcMaps": [ + " 0x555555554000 0x555555556000 0x2000 0x0 /usr/local/bin/tiff2pdf", + " 0x555555556000 0x555555561000 0xb000 0x2000 /usr/local/bin/tiff2pdf", + " 0x555555561000 0x555555565000 0x4000 0xd000 /usr/local/bin/tiff2pdf", + " 0x555555565000 0x555555566000 0x1000 0x10000 /usr/local/bin/tiff2pdf", + " 0x555555566000 0x555555567000 0x1000 0x11000 /usr/local/bin/tiff2pdf", + " 0x555555567000 0x55555578b000 0x224000 0x0 [heap]", + " 0x7ffff7945000 0x7ffff7949000 0x4000 0x0 ", + " 0x7ffff7949000 0x7ffff7958000 0xf000 0x0 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7958000 0x7ffff79ff000 0xa7000 0xf000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff79ff000 0x7ffff7a96000 0x97000 0xb6000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7a96000 0x7ffff7a97000 0x1000 0x14c000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7a97000 0x7ffff7a98000 0x1000 0x14d000 /usr/lib/x86_64-linux-gnu/libm-2.31.so", + " 0x7ffff7a98000 0x7ffff7a9a000 0x2000 0x0 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7a9a000 0x7ffff7aab000 0x11000 0x2000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7aab000 0x7ffff7ab1000 0x6000 0x13000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7ab1000 0x7ffff7ab2000 0x1000 0x19000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7ab2000 0x7ffff7ab3000 0x1000 0x19000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7ab3000 0x7ffff7ab4000 0x1000 0x1a000 /usr/lib/x86_64-linux-gnu/libz.so.1.2.11", + " 0x7ffff7ab4000 0x7ffff7ab8000 0x4000 0x0 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7ab8000 0x7ffff7afc000 0x44000 0x4000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7afc000 0x7ffff7b36000 0x3a000 0x48000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7b36000 0x7ffff7b37000 0x1000 0x82000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7b37000 0x7ffff7b38000 0x1000 0x82000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7b38000 0x7ffff7b39000 0x1000 0x83000 /usr/lib/x86_64-linux-gnu/libjpeg.so.8.2.2", + " 0x7ffff7b39000 0x7ffff7b44000 0xb000 0x0 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7b44000 0x7ffff7d43000 0x1ff000 0xb000 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7d43000 0x7ffff7d44000 0x1000 0xa000 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7d44000 0x7ffff7d47000 0x3000 0xb000 /usr/lib/x86_64-linux-gnu/libjbig.so.0", + " 0x7ffff7d47000 0x7ffff7d6c000 0x25000 0x0 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7d6c000 0x7ffff7ee4000 0x178000 0x25000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7ee4000 0x7ffff7f2e000 0x4a000 0x19d000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7f2e000 0x7ffff7f2f000 0x1000 0x1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7f2f000 0x7ffff7f32000 0x3000 0x1e7000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7f32000 0x7ffff7f35000 0x3000 0x1ea000 /usr/lib/x86_64-linux-gnu/libc-2.31.so", + " 0x7ffff7f35000 0x7ffff7f39000 0x4000 0x0 ", + " 0x7ffff7f39000 0x7ffff7f41000 0x8000 0x0 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7f41000 0x7ffff7f76000 0x35000 0x8000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7f76000 0x7ffff7f9f000 0x29000 0x3d000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7f9f000 0x7ffff7fa0000 0x1000 0x66000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7fa0000 0x7ffff7fa2000 0x2000 0x66000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7fa2000 0x7ffff7fa3000 0x1000 0x68000 /usr/local/lib/libtiff.so.3.9.6", + " 0x7ffff7fa3000 0x7ffff7fa5000 0x2000 0x0 ", + " 0x7ffff7fc9000 0x7ffff7fcd000 0x4000 0x0 [vvar]", + " 0x7ffff7fcd000 0x7ffff7fcf000 0x2000 0x0 [vdso]", + " 0x7ffff7fcf000 0x7ffff7fd0000 0x1000 0x0 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7fd0000 0x7ffff7ff3000 0x23000 0x1000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ff3000 0x7ffff7ffb000 0x8000 0x24000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ffb000 0x7ffff7ffc000 0x1000 0x0 /home/avgor46/testdoc/fuz3tiff2pdf/main/crashes/id:000002,sig:06,src:000000,time:1940,op:havoc,rep:2", + " 0x7ffff7ffc000 0x7ffff7ffd000 0x1000 0x2c000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ffd000 0x7ffff7ffe000 0x1000 0x2d000 /usr/lib/x86_64-linux-gnu/ld-2.31.so", + " 0x7ffff7ffe000 0x7ffff7fff000 0x1000 0x0 ", + " 0x7ffffffde000 0x7ffffffff000 0x21000 0x0 [stack]", + " 0xffffffffff600000 0xffffffffff601000 0x1000 0x0 [vsyscall]" + ], + "CrashSeverity": { + "Type": "NOT_CRITICAL", + "ShortDescription": "AccessViolation", + "Description": "Access violation", + "Explanation": "The target crashed due to an access violation but there is not enough additional information available to determine crash severity." + }, + "Stacktrace": [ + "#0 unlink_chunk (p=p@entry=0x55555556b610, av=0x7ffff7f32b80 ) at malloc.c:1466", + "#1 0x00007ffff7de2773 in _int_malloc (av=av@entry=0x7ffff7f32b80 , bytes=bytes@entry=128) at malloc.c:4041", + "#2 0x00007ffff7de42d4 in __GI___libc_malloc (bytes=128) at malloc.c:3058", + "#3 0x000055555555ae1b in t2p_readwrite_pdf_image_tile (t2p=0x5555555672a0, input=0x555555567ea0, output=0x555555568f10, tile=0) at tiff2pdf.c:2767", + "#4 0x00005555555605f8 in t2p_write_pdf (output=0x555555568f10, input=0x555555567ea0, t2p=0x5555555672a0) at tiff2pdf.c:5252", + "#5 t2p_write_pdf (t2p=0x5555555672a0, input=0x555555567ea0, output=0x555555568f10) at tiff2pdf.c:5133", + "#6 0x00005555555568d4 in main (argc=, argv=) at tiff2pdf.c:763" + ], + "CrashLine": "malloc.c:1466", + "Prstatus": { + "registers": { + "cs": 51, + "ds": 0, + "eflags": 66054, + "es": 0, + "fs": 0, + "gs": 0, + "r10": 0, + "r11": 140737353296864, + "r12": 3968, + "r13": 144, + "r14": 4112, + "r15": 7, + "r8": 140737353296992, + "r9": 1, + "rax": 140737353298432, + "rbp": 128, + "rbx": 140737353296768, + "rcx": 4113, + "rdi": 93824992327184, + "rdx": 4894906143429623808, + "rip": 140737351907943, + "rsi": 8, + "rsp": 140737488347536, + "ss": 43 + } + }, + "Disassembly": [ + "=> 0x7ffff7ddfa67 :\tcmp rdi,QWORD PTR [rdx+0x28]", + " 0x7ffff7ddfa6b :\tjne 0x7ffff7ddfaeb ", + " 0x7ffff7ddfa6d :\tmov rcx,QWORD PTR [rdi+0x28]", + " 0x7ffff7ddfa71 :\tcmp rdi,QWORD PTR [rcx+0x20]", + " 0x7ffff7ddfa75 :\tjne 0x7ffff7ddfaeb ", + " 0x7ffff7ddfa77 :\tcmp QWORD PTR [rax+0x20],0x0", + " 0x7ffff7ddfa7c :\tje 0x7ffff7ddfa90 ", + " 0x7ffff7ddfa7e :\tmov QWORD PTR [rdx+0x28],rcx", + " 0x7ffff7ddfa82 :\tmov rax,QWORD PTR [rdi+0x28]", + " 0x7ffff7ddfa86 :\tmov QWORD PTR [rax+0x20],rdx", + " 0x7ffff7ddfa8a :\tadd rsp,0x8", + " 0x7ffff7ddfa8e :\tret ", + " 0x7ffff7ddfa8f :\tnop", + " 0x7ffff7ddfa90 :\tcmp rdi,rdx", + " 0x7ffff7ddfa93 :\tje 0x7ffff7ddfad0 ", + " 0x7ffff7ddfa95 :\tmovq xmm0,rdx" + ] +} diff --git a/casr/tests/tests.rs b/casr/tests/tests.rs index f7a79743..0e080722 100644 --- a/casr/tests/tests.rs +++ b/casr/tests/tests.rs @@ -2386,6 +2386,7 @@ fn test_casr_cluster_c() { let output = Command::new(*EXE_CASR_CLUSTER.read().unwrap()) .args(["-c", &paths[0], &paths[1]]) + .env("CASR_CLUSTER_UNIQUE_CRASHLINE", "1") .output() .expect("failed to start casr-cluster"); @@ -2412,6 +2413,47 @@ fn test_casr_cluster_c() { assert_eq!(clusters_cnt, 9, "Clusters count mismatch."); + // Check crashline deduplication + let re = + Regex::new(r"Number of reports before crashline deduplication: (?P\d+)").unwrap(); + let before_cnt = re + .captures(&res) + .unwrap() + .name("before") + .map(|x| x.as_str()) + .unwrap() + .parse::() + .unwrap(); + + assert_eq!(before_cnt, 11, "Before count mismatch."); + + let re = + Regex::new(r"Number of reports after crashline deduplication: (?P\d+)").unwrap(); + let after_cnt = re + .captures(&res) + .unwrap() + .name("after") + .map(|x| x.as_str()) + .unwrap() + .parse::() + .unwrap(); + + assert_eq!(after_cnt, 10, "After count mismatch."); + + // 2.casrep and 20.caserp without crashlines => no dedup + // 3.casrep and 30.caserp with crashlines => dedup + // Thus, cluster with 2.casrep has 2 casreps and others have 1 casrep + for i in 1..clusters_cnt + 1 { + let cluster_path = paths[1].to_owned() + "/cl" + &i.to_string(); + let size = std::fs::read_dir(cluster_path.clone()).unwrap().count(); + let num = if Path::new(&(cluster_path + "/2.casrep")).exists() { + 2 + } else { + 1 + }; + assert_eq!(size, num); + } + let _ = std::fs::remove_dir_all(&paths[1]); } diff --git a/docs/usage.md b/docs/usage.md index f7f110bb..2646d9c2 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -223,6 +223,9 @@ Tool for clustering CASR reports Cluster CASR reports. If two directories are set, clusters will be placed in the second directory. If one directory is provided, clusters will be placed there, but reports in this directory will not be deleted. + --unique-crashline + Leave reports with unique crash lines in each cluster [env: + CASR_CLUSTER_UNIQUE_CRASHLINE=] -d, --deduplicate Deduplicate CASR reports. If two directories are set, deduplicated reports are copied to the second directory. If one directory is provided, duplicated reports @@ -298,6 +301,10 @@ For the **--ignore ** option, file format should be as follows: Headers may be in different order, one of them may be missing. Frames that match these regular expressions will be not considered during analysis. +For `CASR_CLUSTER_UNIQUE_CRASHLINE` a `false` literal is `n`, `no`, `f`, +`false`, `off` or `0`. An absent environment variable will also be considered as +`false`. Anything else will considered as true. + ## casr-cli App provides text-based user interface to view CASR reports, prints joint statistics for diff --git a/libcasr/src/report.rs b/libcasr/src/report.rs index 36600b49..5921a2e4 100644 --- a/libcasr/src/report.rs +++ b/libcasr/src/report.rs @@ -784,7 +784,7 @@ pub fn dedup_reports(casreps: &[CrashReport]) -> Result> { /// /// An vector of the same length as `[CrashReport]` /// Vec\[i\] is the flat cluster number to which original `CrashReport` i belongs. -pub fn cluster_reports(casreps: &[CrashReport]) -> Result> { +pub fn cluster_reports(casreps: &[CrashReport]) -> Result> { let traces: Vec = casreps .iter() .map(|report| report.filtered_stacktrace()) diff --git a/libcasr/src/stacktrace.rs b/libcasr/src/stacktrace.rs index 78e8d876..54cdb97e 100644 --- a/libcasr/src/stacktrace.rs +++ b/libcasr/src/stacktrace.rs @@ -176,7 +176,7 @@ pub fn similarity(first: &Stacktrace, second: &Stacktrace) -> f64 { /// /// # Return value /// -/// An vector of the same length as `stacktraces`. +/// A vector of the same length as `stacktraces`. /// Vec\[i\] is false, if original stacktrace i is a duplicate of any element of `stacktraces`. pub fn dedup_stacktraces(stacktraces: &[Stacktrace]) -> Vec { let mut traces = HashSet::new(); @@ -194,9 +194,9 @@ pub fn dedup_stacktraces(stacktraces: &[Stacktrace]) -> Vec { /// /// # Return value /// -/// An vector of the same length as `stacktraces`. +/// A vector of the same length as `stacktraces`. /// Vec\[i\] is the flat cluster number to which original stack trace i belongs. -pub fn cluster_stacktraces(stacktraces: &[Stacktrace]) -> Result> { +pub fn cluster_stacktraces(stacktraces: &[Stacktrace]) -> Result> { // Writing distance matrix // Only the values in the upper triangle are explicitly represented, // not including the diagonal @@ -243,13 +243,51 @@ pub fn cluster_stacktraces(stacktraces: &[Stacktrace]) -> Result> { let mut flat_clusters = vec![0; len]; for (i, (_, nums)) in clusters.into_iter().enumerate() { for num in nums { - flat_clusters[num] = i as u32 + 1; // Number clusters from 1, not 0 + flat_clusters[num] = i + 1; // Number clusters from 1, not 0 } } Ok(flat_clusters) } +/// Perform crashline deduplication for each cluster: +/// Reset Vec\[i\] to 0 if report crashline is duplicate of some other. +/// +/// # Arguments +/// +/// * `crashlines` - slice of crashlines as String +/// +/// * 'clusters' - A vector of the same length as `crashlines`. +/// Vec\[i\] is the flat cluster number to which original casrep i belongs. +/// +/// # Return value +/// +/// Number of left casreps +pub fn dedup_crashlines(crashlines: &[String], clusters: &mut [usize]) -> usize { + // Count number of clusters + let cluster_num: usize = if !clusters.is_empty() { + *clusters.iter().max().unwrap() + } else { + return 0; + }; + // Init dedup crashline list for each cluster + let mut unique_crashlines: Vec> = vec![HashSet::new(); cluster_num]; + + // Init unique crashline counter, e.i. left casreps + let mut unique_cnt = 0; + // Dedup reports by crashline + for (i, crashline) in crashlines.iter().enumerate() { + // Leave report in the cluster if crashline is absent + if crashline.is_empty() || unique_crashlines[clusters[i] - 1].insert(crashline.to_string()) + { + unique_cnt += 1; + } else { + clusters[i] = 0; + } + } + unique_cnt +} + /// Stack trace filtering trait. pub trait Filter { /// Filter frames from the stack trace that are not related to analyzed code containing crash.