Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support Dust tree by age #413

Merged
merged 1 commit into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions completions/_dust
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ _dust() {
'-y+[just like -mtime, but based on file change time]: : ' \
'--ctime=[just like -mtime, but based on file change time]: : ' \
'--files0-from=[run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input]: :_files' \
'-m+[Directory '\''size'\'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time]: :(a c m)' \
'--filetime=[Directory '\''size'\'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time]: :(a c m)' \
'-p[Subdirectories will not have their path shortened]' \
'--full-paths[Subdirectories will not have their path shortened]' \
'-L[dereference sym links - Treat sym links as directories and go into them]' \
Expand Down
2 changes: 2 additions & 0 deletions completions/_dust.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ Register-ArgumentCompleter -Native -CommandName 'dust' -ScriptBlock {
[CompletionResult]::new('-y', 'y', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file change time')
[CompletionResult]::new('--ctime', 'ctime', [CompletionResultType]::ParameterName, 'just like -mtime, but based on file change time')
[CompletionResult]::new('--files0-from', 'files0-from', [CompletionResultType]::ParameterName, 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input')
[CompletionResult]::new('-m', 'm', [CompletionResultType]::ParameterName, 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time')
[CompletionResult]::new('--filetime', 'filetime', [CompletionResultType]::ParameterName, 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time')
[CompletionResult]::new('-p', 'p', [CompletionResultType]::ParameterName, 'Subdirectories will not have their path shortened')
[CompletionResult]::new('--full-paths', 'full-paths', [CompletionResultType]::ParameterName, 'Subdirectories will not have their path shortened')
[CompletionResult]::new('-L', 'L ', [CompletionResultType]::ParameterName, 'dereference sym links - Treat sym links as directories and go into them')
Expand Down
10 changes: 9 additions & 1 deletion completions/dust.bash
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ _dust() {

case "${cmd}" in
dust)
opts="-d -T -n -p -X -I -L -x -s -r -c -C -b -B -z -R -f -i -v -e -t -w -P -D -F -o -S -j -M -A -y -h -V --depth --threads --number-of-lines --full-paths --ignore-directory --ignore-all-in-file --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --force-colors --no-percent-bars --bars-on-right --min-size --screen-reader --skip-total --filecount --ignore_hidden --invert-filter --filter --file_types --terminal_width --no-progress --print-errors --only-dir --only-file --output-format --stack-size --output-json --mtime --atime --ctime --files0-from --help --version [PATH]..."
opts="-d -T -n -p -X -I -L -x -s -r -c -C -b -B -z -R -f -i -v -e -t -w -P -D -F -o -S -j -M -A -y -m -h -V --depth --threads --number-of-lines --full-paths --ignore-directory --ignore-all-in-file --dereference-links --limit-filesystem --apparent-size --reverse --no-colors --force-colors --no-percent-bars --bars-on-right --min-size --screen-reader --skip-total --filecount --ignore_hidden --invert-filter --filter --file_types --terminal_width --no-progress --print-errors --only-dir --only-file --output-format --stack-size --output-json --mtime --atime --ctime --files0-from --filetime --help --version [PATH]..."
if [[ ${cur} == -* || ${COMP_CWORD} -eq 1 ]] ; then
COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") )
return 0
Expand Down Expand Up @@ -163,6 +163,14 @@ _dust() {
COMPREPLY=($(compgen -f "${cur}"))
return 0
;;
--filetime)
COMPREPLY=($(compgen -W "a c m" -- "${cur}"))
return 0
;;
-m)
COMPREPLY=($(compgen -W "a c m" -- "${cur}"))
return 0
;;
*)
COMPREPLY=()
;;
Expand Down
2 changes: 2 additions & 0 deletions completions/dust.elv
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ set edit:completion:arg-completer[dust] = {|@words|
cand -y 'just like -mtime, but based on file change time'
cand --ctime 'just like -mtime, but based on file change time'
cand --files0-from 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input'
cand -m 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time'
cand --filetime 'Directory ''size'' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time'
cand -p 'Subdirectories will not have their path shortened'
cand --full-paths 'Subdirectories will not have their path shortened'
cand -L 'dereference sym links - Treat sym links as directories and go into them'
Expand Down
1 change: 1 addition & 0 deletions completions/dust.fish
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ complete -c dust -s M -l mtime -d '+/-n matches files modified more/less than n
complete -c dust -s A -l atime -d 'just like -mtime, but based on file access time' -r
complete -c dust -s y -l ctime -d 'just like -mtime, but based on file change time' -r
complete -c dust -l files0-from -d 'run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input' -r -F
complete -c dust -s m -l filetime -d 'Directory \'size\' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time' -r -f -a "{a '',c '',m ''}"
complete -c dust -s p -l full-paths -d 'Subdirectories will not have their path shortened'
complete -c dust -s L -l dereference-links -d 'dereference sym links - Treat sym links as directories and go into them'
complete -c dust -s x -l limit-filesystem -d 'Only count the files and directories on the same filesystem as the supplied directory'
Expand Down
9 changes: 8 additions & 1 deletion man-page/dust.1
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
.SH NAME
Dust \- Like du but more intuitive
.SH SYNOPSIS
\fBdust\fR [\fB\-d\fR|\fB\-\-depth\fR] [\fB\-T\fR|\fB\-\-threads\fR] [\fB\-n\fR|\fB\-\-number\-of\-lines\fR] [\fB\-p\fR|\fB\-\-full\-paths\fR] [\fB\-X\fR|\fB\-\-ignore\-directory\fR] [\fB\-I\fR|\fB\-\-ignore\-all\-in\-file\fR] [\fB\-L\fR|\fB\-\-dereference\-links\fR] [\fB\-x\fR|\fB\-\-limit\-filesystem\fR] [\fB\-s\fR|\fB\-\-apparent\-size\fR] [\fB\-r\fR|\fB\-\-reverse\fR] [\fB\-c\fR|\fB\-\-no\-colors\fR] [\fB\-C\fR|\fB\-\-force\-colors\fR] [\fB\-b\fR|\fB\-\-no\-percent\-bars\fR] [\fB\-B\fR|\fB\-\-bars\-on\-right\fR] [\fB\-z\fR|\fB\-\-min\-size\fR] [\fB\-R\fR|\fB\-\-screen\-reader\fR] [\fB\-\-skip\-total\fR] [\fB\-f\fR|\fB\-\-filecount\fR] [\fB\-i\fR|\fB\-\-ignore_hidden\fR] [\fB\-v\fR|\fB\-\-invert\-filter\fR] [\fB\-e\fR|\fB\-\-filter\fR] [\fB\-t\fR|\fB\-\-file_types\fR] [\fB\-w\fR|\fB\-\-terminal_width\fR] [\fB\-P\fR|\fB\-\-no\-progress\fR] [\fB\-\-print\-errors\fR] [\fB\-D\fR|\fB\-\-only\-dir\fR] [\fB\-F\fR|\fB\-\-only\-file\fR] [\fB\-o\fR|\fB\-\-output\-format\fR] [\fB\-S\fR|\fB\-\-stack\-size\fR] [\fB\-j\fR|\fB\-\-output\-json\fR] [\fB\-M\fR|\fB\-\-mtime\fR] [\fB\-A\fR|\fB\-\-atime\fR] [\fB\-y\fR|\fB\-\-ctime\fR] [\fB\-\-files0\-from\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fIPATH\fR]
\fBdust\fR [\fB\-d\fR|\fB\-\-depth\fR] [\fB\-T\fR|\fB\-\-threads\fR] [\fB\-n\fR|\fB\-\-number\-of\-lines\fR] [\fB\-p\fR|\fB\-\-full\-paths\fR] [\fB\-X\fR|\fB\-\-ignore\-directory\fR] [\fB\-I\fR|\fB\-\-ignore\-all\-in\-file\fR] [\fB\-L\fR|\fB\-\-dereference\-links\fR] [\fB\-x\fR|\fB\-\-limit\-filesystem\fR] [\fB\-s\fR|\fB\-\-apparent\-size\fR] [\fB\-r\fR|\fB\-\-reverse\fR] [\fB\-c\fR|\fB\-\-no\-colors\fR] [\fB\-C\fR|\fB\-\-force\-colors\fR] [\fB\-b\fR|\fB\-\-no\-percent\-bars\fR] [\fB\-B\fR|\fB\-\-bars\-on\-right\fR] [\fB\-z\fR|\fB\-\-min\-size\fR] [\fB\-R\fR|\fB\-\-screen\-reader\fR] [\fB\-\-skip\-total\fR] [\fB\-f\fR|\fB\-\-filecount\fR] [\fB\-i\fR|\fB\-\-ignore_hidden\fR] [\fB\-v\fR|\fB\-\-invert\-filter\fR] [\fB\-e\fR|\fB\-\-filter\fR] [\fB\-t\fR|\fB\-\-file_types\fR] [\fB\-w\fR|\fB\-\-terminal_width\fR] [\fB\-P\fR|\fB\-\-no\-progress\fR] [\fB\-\-print\-errors\fR] [\fB\-D\fR|\fB\-\-only\-dir\fR] [\fB\-F\fR|\fB\-\-only\-file\fR] [\fB\-o\fR|\fB\-\-output\-format\fR] [\fB\-S\fR|\fB\-\-stack\-size\fR] [\fB\-j\fR|\fB\-\-output\-json\fR] [\fB\-M\fR|\fB\-\-mtime\fR] [\fB\-A\fR|\fB\-\-atime\fR] [\fB\-y\fR|\fB\-\-ctime\fR] [\fB\-\-files0\-from\fR] [\fB\-m\fR|\fB\-\-filetime\fR] [\fB\-h\fR|\fB\-\-help\fR] [\fB\-V\fR|\fB\-\-version\fR] [\fIPATH\fR]
.SH DESCRIPTION
Like du but more intuitive
.SH OPTIONS
Expand Down Expand Up @@ -115,6 +115,13 @@ just like \-mtime, but based on file change time
\fB\-\-files0\-from\fR
run dust on NUL\-terminated file names specified in file; if argument is \-, then read names from standard input
.TP
\fB\-m\fR, \fB\-\-filetime\fR
Directory \*(Aqsize\*(Aq is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time
.br

.br
[\fIpossible values: \fRa, c, m]
.TP
\fB\-h\fR, \fB\-\-help\fR
Print help
.TP
Expand Down
12 changes: 12 additions & 0 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -294,4 +294,16 @@ pub fn build_cli() -> Command {
.num_args(1)
.help("run dust on NUL-terminated file names specified in file; if argument is -, then read names from standard input"),
)
.arg(
Arg::new("filetime")
.short('m')
.long("filetime")
.num_args(1)
.value_parser([
PossibleValue::new("a").alias("accessed"),
PossibleValue::new("c").alias("changed"),
PossibleValue::new("m").alias("modified"),
])
.help("Directory 'size' is max filetime of child files instead of disk size. while a/c/m for last accessed/changed/modified time"),
)
}
70 changes: 69 additions & 1 deletion src/config.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::node::FileTime;
use chrono::{Local, TimeZone};
use clap::ArgMatches;
use config_file::FromConfigFile;
Expand Down Expand Up @@ -84,6 +85,20 @@ impl Config {
})
.to_lowercase()
}

pub fn get_filetime(&self, options: &ArgMatches) -> Option<FileTime> {
let out_fmt = options.get_one::<String>("filetime");
match out_fmt {
None => None,
Some(x) => match x.as_str() {
"m" | "modified" => Some(FileTime::Modified),
"a" | "accessed" => Some(FileTime::Accessed),
"c" | "changed" => Some(FileTime::Changed),
_ => unreachable!(),
},
}
}

pub fn get_skip_total(&self, options: &ArgMatches) -> bool {
Some(true) == self.skip_total || options.get_flag("skip_total")
}
Expand Down Expand Up @@ -159,7 +174,7 @@ impl Config {
)
}

pub fn get_created_time_operator(&self, options: &ArgMatches) -> Option<(Operater, i64)> {
pub fn get_changed_time_operator(&self, options: &ArgMatches) -> Option<(Operater, i64)> {
get_filter_time_operator(
options.get_one::<String>("ctime"),
get_current_date_epoch_seconds(),
Expand Down Expand Up @@ -259,6 +274,7 @@ mod tests {
#[allow(unused_imports)]
use super::*;
use chrono::{Datelike, Timelike};
use clap::builder::PossibleValue;
use clap::{value_parser, Arg, ArgMatches, Command};

#[test]
Expand Down Expand Up @@ -338,4 +354,56 @@ mod tests {
)
.get_matches_from(args)
}

#[test]
fn test_get_filetime() {
// No config and no flag.
let c = Config::default();
let args = get_filetime_args(vec!["dust"]);
assert_eq!(c.get_filetime(&args), None);

// Config is not defined and flag is defined as access time
let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "a"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Accessed));

let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "accessed"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Accessed));

// Config is not defined and flag is defined as modified time
let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "m"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Modified));

let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "modified"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Modified));

// Config is not defined and flag is defined as changed time
let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "c"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Changed));

let c = Config::default();
let args = get_filetime_args(vec!["dust", "--filetime", "changed"]);
assert_eq!(c.get_filetime(&args), Some(FileTime::Changed));
}

fn get_filetime_args(args: Vec<&str>) -> ArgMatches {
Command::new("Dust")
.arg(
Arg::new("filetime")
.short('m')
.long("filetime")
.num_args(1)
.value_parser([
PossibleValue::new("a").alias("accessed"),
PossibleValue::new("c").alias("changed"),
PossibleValue::new("m").alias("modified"),
])
.help("Directory 'size' is max filetime of child files instead of disk size. while a/c/m for accessed/changed/modified time"),
)
.get_matches_from(args)
}
}
70 changes: 57 additions & 13 deletions src/dir_walker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use std::collections::HashSet;
use crate::node::build_node;
use std::fs::DirEntry;

use crate::node::FileTime;
use crate::platform::get_metadata;

#[derive(Debug)]
Expand All @@ -40,6 +41,7 @@ pub struct WalkData<'a> {
pub filter_changed_time: Option<(Operater, i64)>,
pub use_apparent_size: bool,
pub by_filecount: bool,
pub by_filetime: &'a Option<FileTime>,
pub ignore_hidden: bool,
pub follow_links: bool,
pub progress_data: Arc<PAtomicInfo>,
Expand All @@ -57,19 +59,15 @@ pub fn walk_it(dirs: HashSet<PathBuf>, walk_data: &WalkData) -> Vec<Node> {

prog_data.state.store(Operation::PREPARING, ORDERING);

clean_inodes(node, &mut inodes, walk_data.use_apparent_size)
clean_inodes(node, &mut inodes, walk_data)
})
.collect();
top_level_nodes
}

// Remove files which have the same inode, we don't want to double count them.
fn clean_inodes(
x: Node,
inodes: &mut HashSet<(u64, u64)>,
use_apparent_size: bool,
) -> Option<Node> {
if !use_apparent_size {
fn clean_inodes(x: Node, inodes: &mut HashSet<(u64, u64)>, walk_data: &WalkData) -> Option<Node> {
if !walk_data.use_apparent_size {
if let Some(id) = x.inode_device {
if !inodes.insert(id) {
return None;
Expand All @@ -82,12 +80,25 @@ fn clean_inodes(
tmp.sort_by(sort_by_inode);
let new_children: Vec<_> = tmp
.into_iter()
.filter_map(|c| clean_inodes(c, inodes, use_apparent_size))
.filter_map(|c| clean_inodes(c, inodes, walk_data))
.collect();

let actual_size = if walk_data.by_filetime.is_some() {
// If by_filetime is Some, directory 'size' is the maximum filetime among child files instead of disk size
new_children
.iter()
.map(|c| c.size)
.chain(std::iter::once(x.size))
.max()
.unwrap_or(0)
} else {
// If by_filetime is None, directory 'size' is the sum of disk sizes or file counts of child files
x.size + new_children.iter().map(|c| c.size).sum::<u64>()
};

Some(Node {
name: x.name,
size: x.size + new_children.iter().map(|c| c.size).sum::<u64>(),
size: actual_size,
children: new_children,
inode_device: x.inode_device,
depth: x.depth,
Expand Down Expand Up @@ -270,28 +281,61 @@ mod tests {
}
}

#[cfg(test)]
fn create_walker<'a>(use_apparent_size: bool) -> WalkData<'a> {
use crate::PIndicator;
let indicator = PIndicator::build_me();
WalkData {
ignore_directories: HashSet::new(),
filter_regex: &[],
invert_filter_regex: &[],
allowed_filesystems: HashSet::new(),
filter_modified_time: Some((Operater::GreaterThan, 0)),
filter_accessed_time: Some((Operater::GreaterThan, 0)),
filter_changed_time: Some((Operater::GreaterThan, 0)),
use_apparent_size,
by_filecount: false,
by_filetime: &None,
ignore_hidden: false,
follow_links: false,
progress_data: indicator.data.clone(),
errors: Arc::new(Mutex::new(RuntimeErrors::default())),
}
}

#[test]
#[allow(clippy::redundant_clone)]
fn test_should_ignore_file() {
let mut inodes = HashSet::new();
let n = create_node();
let walkdata = create_walker(false);

// First time we insert the node
assert_eq!(clean_inodes(n.clone(), &mut inodes, false), Some(n.clone()));
assert_eq!(
clean_inodes(n.clone(), &mut inodes, &walkdata),
Some(n.clone())
);

// Second time is a duplicate - we ignore it
assert_eq!(clean_inodes(n.clone(), &mut inodes, false), None);
assert_eq!(clean_inodes(n.clone(), &mut inodes, &walkdata), None);
}

#[test]
#[allow(clippy::redundant_clone)]
fn test_should_not_ignore_files_if_using_apparent_size() {
let mut inodes = HashSet::new();
let n = create_node();
let walkdata = create_walker(true);

// If using apparent size we include Nodes, even if duplicate inodes
assert_eq!(clean_inodes(n.clone(), &mut inodes, true), Some(n.clone()));
assert_eq!(clean_inodes(n.clone(), &mut inodes, true), Some(n.clone()));
assert_eq!(
clean_inodes(n.clone(), &mut inodes, &walkdata),
Some(n.clone())
);
assert_eq!(
clean_inodes(n.clone(), &mut inodes, &walkdata),
Some(n.clone())
);
}

#[test]
Expand Down
Loading
Loading