From 9f44977270042a9585dce7624d7ea42ae0b2c8f5 Mon Sep 17 00:00:00 2001 From: Chen Chen Date: Fri, 12 Jul 2024 12:43:56 -0500 Subject: [PATCH] Implement simple caching for `-fstype` (#419) * Implement simple caching for -fstype * Using lstat instead of stat in -fstype --- src/find/matchers/fs.rs | 67 ++++++++++++++++++++++++++++++++++++----- src/find/mod.rs | 4 ++- tests/find_cmd_tests.rs | 17 ++++++++++- 3 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/find/matchers/fs.rs b/src/find/matchers/fs.rs index 331179c0..40090f77 100644 --- a/src/find/matchers/fs.rs +++ b/src/find/matchers/fs.rs @@ -3,6 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. +use std::cell::RefCell; use std::path::Path; use std::{ error::Error, @@ -11,10 +12,16 @@ use std::{ use super::Matcher; +/// The latest mapping from dev_id to fs_type, used for saving mount info reads +pub struct Cache { + dev_id: String, + fs_type: String, +} + /// Get the filesystem type of a file. /// 1. get the metadata of the file /// 2. get the device ID of the metadata -/// 3. search the filesystem list +/// 3. search the cache, then the filesystem list /// /// Returns an empty string when no file system list matches. /// @@ -24,14 +31,26 @@ use super::Matcher; /// /// This is only supported on Unix. #[cfg(unix)] -pub fn get_file_system_type(path: &Path) -> Result> { +pub fn get_file_system_type( + path: &Path, + cache: &RefCell>, +) -> Result> { use std::os::unix::fs::MetadataExt; - let metadata = match path.metadata() { + // use symlink_metadata (lstat under the hood) instead of metadata (stat) to make sure that it + // does not return an error when there is a (broken) symlink; this is aligned with GNU find. + let metadata = match path.symlink_metadata() { Ok(metadata) => metadata, Err(err) => Err(err)?, }; let dev_id = metadata.dev().to_string(); + + if let Some(cache) = cache.borrow().as_ref() { + if cache.dev_id == dev_id { + return Ok(cache.fs_type.clone()); + } + } + let fs_list = match uucore::fsext::read_fs_list() { Ok(fs_list) => fs_list, Err(err) => Err(err)?, @@ -41,6 +60,12 @@ pub fn get_file_system_type(path: &Path) -> Result> { .find(|fs| fs.dev_id == dev_id) .map_or_else(String::new, |fs| fs.fs_type); + // cache the latest query if not a match before + cache.replace(Some(Cache { + dev_id, + fs_type: result.clone(), + })); + Ok(result) } @@ -50,11 +75,15 @@ pub fn get_file_system_type(path: &Path) -> Result> { /// This is only supported on Unix. pub struct FileSystemMatcher { fs_text: String, + cache: RefCell>, } impl FileSystemMatcher { pub fn new(fs_text: String) -> Self { - Self { fs_text } + Self { + fs_text, + cache: RefCell::new(None), + } } } @@ -66,7 +95,7 @@ impl Matcher for FileSystemMatcher { } #[cfg(unix)] { - match get_file_system_type(file_info.path()) { + match get_file_system_type(file_info.path(), &self.cache) { Ok(result) => result == self.fs_text, Err(_) => { writeln!( @@ -89,9 +118,14 @@ mod tests { #[cfg(unix)] fn test_fs_matcher() { use crate::find::{ - matchers::{fs::get_file_system_type, tests::get_dir_entry_for, Matcher}, + matchers::{ + fs::{get_file_system_type, Cache}, + tests::get_dir_entry_for, + Matcher, + }, tests::FakeDependencies, }; + use std::cell::RefCell; use std::fs::File; use tempfile::Builder; @@ -105,7 +139,26 @@ mod tests { let _ = File::create(foo_path).expect("create temp file"); let file_info = get_dir_entry_for(&temp_dir.path().to_string_lossy(), "foo"); - let target_fs_type = get_file_system_type(file_info.path()).unwrap(); + // create an empty cache for initial fs type lookup + let empty_cache = RefCell::new(None); + let target_fs_type = get_file_system_type(file_info.path(), &empty_cache).unwrap(); + + // should work with unmatched cache, and the cache should be set to the last query result + let unmatched_cache = RefCell::new(Some(Cache { + dev_id: "foo".to_string(), + fs_type: "bar".to_string(), + })); + let target_fs_type_unmatched_cache = + get_file_system_type(file_info.path(), &unmatched_cache).unwrap(); + assert_eq!( + target_fs_type, target_fs_type_unmatched_cache, + "get_file_system_type should return correct result with unmatched cache" + ); + assert_eq!( + unmatched_cache.borrow().as_ref().unwrap().fs_type, + target_fs_type, + "get_file_system_type should set the cache to the last query result" + ); // should match fs type let matcher = super::FileSystemMatcher::new(target_fs_type.clone()); diff --git a/src/find/mod.rs b/src/find/mod.rs index f521035b..49832016 100644 --- a/src/find/mod.rs +++ b/src/find/mod.rs @@ -1206,10 +1206,12 @@ mod tests { fn test_fs_matcher() { use crate::find::tests::FakeDependencies; use matchers::fs::get_file_system_type; + use std::cell::RefCell; use std::path::Path; let path = Path::new("./test_data/simple/subdir"); - let target_fs_type = get_file_system_type(path).unwrap(); + let empty_cache = RefCell::new(None); + let target_fs_type = get_file_system_type(path, &empty_cache).unwrap(); // should match fs type let deps = FakeDependencies::new(); diff --git a/tests/find_cmd_tests.rs b/tests/find_cmd_tests.rs index 025123ee..b0b2eda0 100644 --- a/tests/find_cmd_tests.rs +++ b/tests/find_cmd_tests.rs @@ -785,10 +785,12 @@ fn find_age_range() { #[serial(working_dir)] fn find_fs() { use findutils::find::matchers::fs::get_file_system_type; + use std::cell::RefCell; use std::path::Path; let path = Path::new("./test_data/simple/subdir"); - let target_fs_type = get_file_system_type(path).unwrap(); + let empty_cache = RefCell::new(None); + let target_fs_type = get_file_system_type(path, &empty_cache).unwrap(); // match fs type Command::cargo_bin("find") @@ -828,6 +830,19 @@ fn find_fs() { .success() .stdout(predicate::str::is_empty()) .stderr(predicate::str::is_empty()); + + let path = Path::new("./test_data/links"); + let empty_cache = RefCell::new(None); + let target_fs_type = get_file_system_type(path, &empty_cache).unwrap(); + + // working with broken links + Command::cargo_bin("find") + .expect("found binary") + .args(["./test_data/links", "-fstype", &target_fs_type]) + .assert() + .success() + .stdout(predicate::str::contains("./test_data/links/link-missing")) + .stderr(predicate::str::is_empty()); } #[test]