From c07dfdd220e9622d15dfcfa55d1e985208854511 Mon Sep 17 00:00:00 2001 From: kikeg Date: Fri, 31 Jan 2025 20:32:58 +0100 Subject: [PATCH] Add repot_bench to test with on-memory files. Interestingly, file read doesn't slow down things at all... --- core/benches/report_bench.rs | 84 ++++++++++----- core/benches/testing/mod.rs | 203 ++++++++++++++++++++++++++++------- 2 files changed, 219 insertions(+), 68 deletions(-) diff --git a/core/benches/report_bench.rs b/core/benches/report_bench.rs index 3305dd1..e29cadb 100644 --- a/core/benches/report_bench.rs +++ b/core/benches/report_bench.rs @@ -2,19 +2,46 @@ use std::path::Path; use bumpalo::Bump; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use log::LevelFilter; use okane_core::{ - load::{self, LoadError}, - report, syntax, + load::LoadError, + report::{self, query::PostingQuery}, + syntax, }; +use pretty_assertions::assert_eq; +use testing::{new_example, ExampleInput, FakeFileSink, FileSink, RealFileSink}; pub mod testing; fn load_benchmark(c: &mut Criterion) { - let input = testing::ExampleInput::new(Path::new("report_bench")).unwrap(); - c.bench_function("load-with-counter", |b| { + let input = new_example::(Path::new("report_bench")).unwrap(); + + basic_asserts(&input); + + c.bench_function("load-on-memory", |b| { b.iter(|| { let mut count = 0; - load::new_loader(input.rootpath().to_owned()) + input + .new_loader() + .load(|_, _, _: &syntax::tracked::LedgerEntry| { + count += 1; + Ok::<(), LoadError>(()) + }) + .unwrap(); + black_box(()); + black_box(count) + }) + }); + + let input = new_example::(Path::new("report_bench")).unwrap(); + + basic_asserts(&input); + + c.bench_function("load-on-file", |b| { + b.iter(|| { + let mut count = 0; + input + .new_loader() .load(|_, _, _: &syntax::tracked::LedgerEntry| { count += 1; Ok::<(), LoadError>(()) @@ -27,34 +54,27 @@ fn load_benchmark(c: &mut Criterion) { } fn report_process_benchmark(c: &mut Criterion) { + let input = new_example::(Path::new("report_bench")).unwrap(); let opts = report::ProcessOptions::default(); - let input = testing::ExampleInput::new(Path::new("report_bench")).unwrap(); + c.bench_function("process", |b| { b.iter(|| { let arena = Bump::new(); let mut ctx = report::ReportContext::new(&arena); - let ret = report::process( - &mut ctx, - load::new_loader(input.rootpath().to_owned()), - &opts, - ) - .expect("report::process must succeed"); + let ret = report::process(&mut ctx, input.new_loader(), &opts) + .expect("report::process must succeed"); black_box(ret); }) }); } fn query_postings(c: &mut Criterion) { - let input = testing::ExampleInput::new(Path::new("report_bench")).unwrap(); + let input = new_example::(Path::new("report_bench")).unwrap(); let arena = Bump::new(); let mut ctx = report::ReportContext::new(&arena); let opts = report::ProcessOptions::default(); - let ledger = report::process( - &mut ctx, - load::new_loader(input.rootpath().to_owned()), - &opts, - ) - .expect("report::process must succeed"); + let ledger = + report::process(&mut ctx, input.new_loader(), &opts).expect("report::process must succeed"); c.bench_function("query-posting-one-account", |b| { b.iter(|| { @@ -66,16 +86,12 @@ fn query_postings(c: &mut Criterion) { }); } fn query_balance(c: &mut Criterion) { - let input = testing::ExampleInput::new(Path::new("report_bench")).unwrap(); + let input = new_example::(Path::new("report_bench")).unwrap(); let arena = Bump::new(); let mut ctx = report::ReportContext::new(&arena); let opts = report::ProcessOptions::default(); - let ledger = report::process( - &mut ctx, - load::new_loader(input.rootpath().to_owned()), - &opts, - ) - .expect("report::process must succeed"); + let ledger = + report::process(&mut ctx, input.new_loader(), &opts).expect("report::process must succeed"); c.bench_function("query-balance-default", |b| { b.iter(|| { @@ -84,9 +100,23 @@ fn query_balance(c: &mut Criterion) { }); } +fn basic_asserts(input: &ExampleInput) { + let arena = Bump::new(); + let mut ctx = report::ReportContext::new(&arena); + let opts = report::ProcessOptions::default(); + let ledger = + report::process(&mut ctx, input.new_loader(), &opts).expect("report::process must succeed"); + let num_txns = ledger.transactions().count(); + + assert_eq!(testing::num_transactions(), num_txns as u64); +} + #[ctor::ctor] fn init() { - let _ = env_logger::builder().is_test(true).try_init(); + let _ = env_logger::builder() + .is_test(true) + .filter_level(LevelFilter::max()) + .try_init(); } criterion_group!( diff --git a/core/benches/testing/mod.rs b/core/benches/testing/mod.rs index 72557f6..d75b2ea 100644 --- a/core/benches/testing/mod.rs +++ b/core/benches/testing/mod.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashMap, fs::{self, File}, io::{self, BufWriter, Write}, path::{Path, PathBuf}, @@ -6,15 +7,17 @@ use std::{ }; use chrono::NaiveDate; +use okane_core::load; /// Metadata containing the reference to the generated input. -pub struct ExampleInput { +pub struct ExampleInput { rootdir: PathBuf, rootfile: PathBuf, cleanup: bool, + sink: T, } -impl Drop for ExampleInput { +impl Drop for ExampleInput { fn drop(&mut self) { if self.cleanup { let _ignore = std::fs::remove_dir_all(&self.rootdir).inspect_err(|x| { @@ -47,13 +50,23 @@ const NUM_SUB_FILES: usize = 16; const NUM_THREADS: usize = 2; const NUM_TRANSACTIONS_PER_FILE: usize = 500; -impl ExampleInput { +pub fn num_transactions() -> u64 { + ((YEAR_END - YEAR_BEGIN) as u64) * (NUM_SUB_FILES as u64) * (NUM_TRANSACTIONS_PER_FILE as u64) +} + +pub fn new_example( + subdir: &Path, +) -> Result, ExampleInputError> { + ExampleInput::::new(subdir) +} + +impl ExampleInput { /// Creates an example used for benchmarks. /// Created example is left as-is, unless `OKANE_BENCH_CLEANUP` is set. /// If `OKANE_BENCH_CLEANUP` is set, /// * Always recreate the input. /// * Clean up the created input. - pub fn new(subdir: &Path) -> Result { + pub fn new(subdir: &Path) -> Result { let cleanup = !std::env::var(CLEANUP_KEY) .or_else(|x| { if let std::env::VarError::NotPresent = &x { @@ -65,28 +78,20 @@ impl ExampleInput { .is_empty(); let rootdir = Path::new(env!("CARGO_TARGET_TMPDIR")).join(subdir); let rootfile = rootdir.join("root.ledger"); - if !cleanup { - match fs::metadata(&rootfile) { - Err(error) => log::warn!( - "std::fs::metadata() failed on {}, retry creation: {}", - rootfile.to_string_lossy(), - error - ), - Ok(_) => { - return Ok(ExampleInput { - rootdir, - rootfile, - cleanup, - }) - } - } + if !cleanup && T::shortcut(&rootfile) { + return Ok(Self { + rootdir, + rootfile, + cleanup, + sink: T::new(), + }); } let before_input = std::time::Instant::now(); fs::remove_dir_all(&rootdir).or_else(|e| match e.kind() { io::ErrorKind::NotFound => Ok(()), _ => Err(e), })?; - fs::create_dir(&rootdir)?; + fs::create_dir_all(&rootdir)?; // Assuming you have 50 accounts & 100 years of records. let mut tasks = Vec::new(); let (tx, rx) = mpsc::channel(); @@ -100,26 +105,36 @@ impl ExampleInput { years.push(year); } let years = years.as_slice(); - for i2 in 0..NUM_SUB_FILES / NUM_THREADS { - let i = i1 * NUM_SUB_FILES / NUM_THREADS + i2; - let dirname = dir_name(i); - let subdirpath = rootdir.join(&dirname); - let ret = || -> Result<(), io::Error> { + let ret = || -> Result { + let mut sink = T::new(); + for i2 in 0..NUM_SUB_FILES / NUM_THREADS { + let i = i1 * NUM_SUB_FILES / NUM_THREADS + i2; + let dirname = dir_name(i); + let subdirpath = rootdir.join(&dirname); fs::create_dir(&subdirpath)?; for year in years { - prepare_leaf_file(&subdirpath.join(leaf_file(*year)), i, *year)?; + prepare_leaf_file( + &mut sink, + &subdirpath.join(leaf_file(*year)), + i, + *year, + )?; } - prepare_middle_file(&rootdir, &dirname, years) - }(); - thread_tx.send(ret).expect("send must not fail"); - } + prepare_middle_file(&mut sink, &rootdir, &dirname, years)?; + } + Ok(sink) + }(); + thread_tx.send(ret).expect("send must not fail"); })); } + let mut sink = T::new(); for _ in 0..tasks.len() { - rx.recv_timeout(std::time::Duration::from_secs(150)) + let another = rx + .recv_timeout(std::time::Duration::from_secs(150)) .expect("Can't wait 1 minute on the recv task")?; + sink.merge(another); } - prepare_root_file(&rootfile, 0..NUM_SUB_FILES)?; + prepare_root_file(&mut sink, &rootfile, 0..NUM_SUB_FILES)?; for jh in tasks.into_iter() { jh.join().expect("thread join must not fail"); } @@ -130,35 +145,136 @@ impl ExampleInput { rootdir, rootfile, cleanup, + sink, }) } pub fn rootpath(&self) -> &Path { &self.rootfile } + + pub fn new_loader(&self) -> load::Loader { + load::Loader::new(self.rootfile.clone(), self.sink.clone_as_filesystem()) + } } -fn prepare_root_file(rootfile: &Path, dirs: std::ops::Range) -> Result<(), std::io::Error> { - let mut w = BufWriter::new(File::create(rootfile)?); +pub trait FileSink: Send + 'static { + type FileSystem: load::FileSystem; + + fn new() -> Self; + + fn clone_as_filesystem(&self) -> Self::FileSystem; + + fn shortcut(rootfile: &Path) -> bool; + + fn merge(&mut self, other: Self); + + /// Gives the writer for the given path. + fn writer<'a>( + &'a mut self, + path: &Path, + ) -> Result, std::io::Error>; +} + +pub struct RealFileSink; + +impl FileSink for RealFileSink { + type FileSystem = load::ProdFileSystem; + + fn new() -> Self { + RealFileSink + } + + fn clone_as_filesystem(&self) -> Self::FileSystem { + load::ProdFileSystem + } + + fn shortcut(rootfile: &Path) -> bool { + match fs::metadata(&rootfile) { + Err(error) => { + log::warn!( + "std::fs::metadata() failed on {}, retry creation: {}", + rootfile.to_string_lossy(), + error + ); + + false + } + Ok(_) => true, + } + } + + fn merge(&mut self, _other: Self) {} + + fn writer<'a>( + &'a mut self, + path: &Path, + ) -> Result, std::io::Error> { + Ok(Box::new(BufWriter::new(File::create(path)?))) + } +} + +pub struct FakeFileSink { + files: HashMap>, +} + +impl FileSink for FakeFileSink { + type FileSystem = load::FakeFileSystem; + + fn new() -> Self { + Self { + files: HashMap::default(), + } + } + + fn clone_as_filesystem(&self) -> Self::FileSystem { + self.files.clone().into() + } + + fn shortcut(_rootfile: &Path) -> bool { + false + } + + fn merge(&mut self, other: Self) { + for (k, v) in other.files { + self.files.insert(k, v); + } + } + + fn writer<'a>( + &'a mut self, + path: &Path, + ) -> Result, std::io::Error> { + Ok(Box::new(self.files.entry(path.to_owned()).or_default())) + } +} + +fn prepare_root_file( + sink: &mut T, + rootfile: &Path, + dirs: std::ops::Range, +) -> Result<(), std::io::Error> { + let mut w = sink.writer(rootfile)?; for dir in dirs { writeln!(w, "include {}.ledger", dir_name(dir))?; } - w.into_inner()?.sync_all() + Ok(()) } -fn prepare_middle_file( +fn prepare_middle_file( + sink: &mut T, rootdir: &Path, dirname: &str, years: &[Year], ) -> Result<(), std::io::Error> { let mut target = PathBuf::from(rootdir); target.push(format!("{}.ledger", dirname)); - let mut w = BufWriter::new(File::create(&target)?); + let mut w = sink.writer(&target)?; for year in years { let leaf = leaf_file(*year); writeln!(w, "include {dirname}/{leaf}")?; } - w.into_inner()?.sync_all() + Ok(()) } fn payee(dir: usize, year: Year, i: usize) -> String { @@ -168,8 +284,13 @@ fn payee(dir: usize, year: Year, i: usize) -> String { ) } -fn prepare_leaf_file(target: &Path, dir: usize, year: Year) -> Result<(), std::io::Error> { - let mut w = BufWriter::new(File::create(target)?); +fn prepare_leaf_file( + sink: &mut T, + target: &Path, + dir: usize, + year: Year, +) -> Result<(), std::io::Error> { + let mut w = sink.writer(&target)?; for i in 0..NUM_TRANSACTIONS_PER_FILE { let ordinal = (i * 365 / NUM_TRANSACTIONS_PER_FILE + 1) as u32; let date = NaiveDate::from_yo_opt(year.0, ordinal) @@ -192,7 +313,7 @@ fn prepare_leaf_file(target: &Path, dir: usize, year: Year) -> Result<(), std::i "{date} {payee}\n Assets:Account{dir:02} {amount}\n {other_account} {other_amount}\n\n", )?; } - w.into_inner()?.sync_all() + Ok(()) } fn dir_name(i: usize) -> String {