Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support hard links for similar images and videos with -L #1201

Merged
merged 1 commit into from
Feb 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions czkawka_cli/src/commands.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,8 @@ pub struct SimilarImagesArgs {
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(
short = 'g',
Expand Down Expand Up @@ -355,6 +357,8 @@ pub struct SimilarVideosArgs {
#[clap(flatten)]
pub delete_method: DMethod,
#[clap(flatten)]
pub allow_hard_links: AllowHardLinks,
#[clap(flatten)]
pub dry_run: DryRun,
#[clap(
short,
Expand Down
4 changes: 4 additions & 0 deletions czkawka_cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
hash_size,
delete_method,
dry_run,
allow_hard_links,
} = similar_images;

let mut item = SimilarImages::new();
Expand All @@ -198,6 +199,7 @@ fn similar_images(similar_images: SimilarImagesArgs, stop_receiver: &Receiver<()
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_similarity(return_similarity_from_similarity_preset(&similarity_preset, hash_size));
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);

item.find_similar_images(Some(stop_receiver), Some(progress_sender));

Expand Down Expand Up @@ -272,6 +274,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
maximal_file_size,
delete_method,
dry_run,
allow_hard_links,
} = similar_videos;

let mut item = SimilarVideos::new();
Expand All @@ -282,6 +285,7 @@ fn similar_videos(similar_videos: SimilarVideosArgs, stop_receiver: &Receiver<()
item.set_tolerance(tolerance);
item.set_delete_method(delete_method.delete_method);
item.set_dry_run(dry_run.dry_run);
item.set_ignore_hard_links(!allow_hard_links.allow_hard_links);

item.find_similar_videos(Some(stop_receiver), Some(progress_sender));

Expand Down
210 changes: 206 additions & 4 deletions czkawka_core/src/common_dir_traversal.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::collections::BTreeMap;
use std::fs;
use std::fs::{DirEntry, FileType, Metadata, ReadDir};
use std::fs::{DirEntry, FileType, Metadata};
#[cfg(target_family = "unix")]
use std::os::unix::fs::MetadataExt;
use std::path::{Path, PathBuf};
use std::sync::atomic::Ordering;
use std::time::UNIX_EPOCH;
Expand Down Expand Up @@ -92,7 +94,7 @@ pub enum Collect {
Files,
}

#[derive(Eq, PartialEq, Copy, Clone)]
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
enum EntryType {
File,
Dir,
Expand Down Expand Up @@ -546,9 +548,17 @@ fn process_symlink_in_symlink_mode(
fe_result.push(fe);
}

pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<ReadDir> {
pub fn common_read_dir(current_folder: &Path, warnings: &mut Vec<String>) -> Option<Vec<Result<DirEntry, std::io::Error>>> {
match fs::read_dir(current_folder) {
Ok(t) => Some(t),
Ok(t) => {
// Make directory traversal order stable
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this will not have big performance impact with bigger folders, because comparing big number of paths are quite time consuming(paths are compared by each component alone)

let mut r: Vec<_> = t.collect();
r.sort_by_key(|d| match d {
Ok(f) => f.path(),
_ => PathBuf::new(),
});
Some(r)
}
Err(e) => {
warnings.push(flc!(
"core_cannot_open_dir",
Expand Down Expand Up @@ -634,3 +644,195 @@ pub fn get_modified_time(metadata: &Metadata, warnings: &mut Vec<String>, curren
}
}
}

#[cfg(target_family = "windows")]
pub fn inode(_fe: &FileEntry) -> Option<u64> {
None
}

#[cfg(target_family = "unix")]
pub fn inode(fe: &FileEntry) -> Option<u64> {
if let Ok(meta) = fs::metadata(&fe.path) {
Some(meta.ino())
} else {
None
}
}

pub fn take_1_per_inode((k, mut v): (Option<u64>, Vec<FileEntry>)) -> Vec<FileEntry> {
if k.is_some() {
v.drain(1..);
}
v
}

#[cfg(test)]
mod tests {
use super::*;
use crate::common_tool::*;
use once_cell::sync::Lazy;
use std::fs;
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::time::{Duration, SystemTime};
use tempfile::TempDir;

impl CommonData for CommonToolData {
fn get_cd(&self) -> &CommonToolData {
self
}
fn get_cd_mut(&mut self) -> &mut CommonToolData {
self
}
}

static NOW: Lazy<SystemTime> = Lazy::new(|| SystemTime::UNIX_EPOCH + Duration::new(100, 0));
const CONTENT: &[u8; 1] = b"a";

fn create_files(dir: &TempDir) -> io::Result<(PathBuf, PathBuf, PathBuf)> {
let (src, hard, other) = (dir.path().join("a"), dir.path().join("b"), dir.path().join("c"));

let mut file = File::create(&src)?;
file.write_all(CONTENT)?;
fs::hard_link(&src, &hard)?;
file.set_modified(*NOW)?;

let mut file = File::create(&other)?;
file.write_all(CONTENT)?;
file.set_modified(*NOW)?;
Ok((src, hard, other))
}

#[test]
fn test_traversal() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, hard, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();

let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);

match DirTraversalBuilder::new().group_by(|_fe| ()).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_values().flatten().collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: hard,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}

#[cfg(target_family = "unix")]
#[test]
fn test_traversal_group_by_inode() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, _, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();

let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);

match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}

#[cfg(target_family = "windows")]
#[test]
fn test_traversal_group_by_inode() -> io::Result<()> {
let dir = tempfile::Builder::new().tempdir()?;
let (src, hard, other) = create_files(&dir)?;
let secs = NOW.duration_since(SystemTime::UNIX_EPOCH).unwrap().as_secs();

let mut common_data = CommonToolData::new(ToolType::SimilarImages);
common_data.directories.set_included_directory([dir.path().to_owned()].to_vec());
common_data.set_minimal_file_size(0);

match DirTraversalBuilder::new().group_by(inode).common_data(&common_data).build().run() {
DirTraversalResult::SuccessFiles {
warnings: _,
grouped_file_entries,
} => {
let actual: Vec<_> = grouped_file_entries.into_iter().flat_map(take_1_per_inode).collect();
assert_eq!(
[
FileEntry {
path: src,
size: 1,
modified_date: secs,
},
FileEntry {
path: hard,
size: 1,
modified_date: secs,
},
FileEntry {
path: other,
size: 1,
modified_date: secs,
},
]
.to_vec(),
actual
);
}
_ => {
panic!("Expect SuccessFiles.");
}
};
Ok(())
}
}
14 changes: 10 additions & 4 deletions czkawka_core/src/similar_images.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::common::{
HEIC_EXTENSIONS, IMAGE_RS_SIMILAR_IMAGES_EXTENSIONS, RAW_IMAGE_EXTENSIONS,
};
use crate::common_cache::{get_similar_images_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
Expand Down Expand Up @@ -122,6 +122,7 @@ pub struct SimilarImages {
hash_alg: HashAlg,
image_filter: FilterType,
exclude_images_with_same_size: bool,
ignore_hard_links: bool,
}

#[derive(Default)]
Expand All @@ -145,6 +146,7 @@ impl SimilarImages {
hash_alg: HashAlg::Gradient,
image_filter: FilterType::Lanczos3,
exclude_images_with_same_size: false,
ignore_hard_links: false,
}
}

Expand Down Expand Up @@ -188,7 +190,7 @@ impl SimilarImages {
let heic_extensions = HEIC_EXTENSIONS.iter().collect::<HashSet<_>>();

let result = DirTraversalBuilder::new()
.group_by(|_fe| ())
.group_by(inode)
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.common_data(&self.common_data)
Expand All @@ -199,8 +201,8 @@ impl SimilarImages {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.images_to_check = grouped_file_entries
.into_values()
.flatten()
.into_iter()
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
.map(|fe| {
let fe_str = fe.path.to_string_lossy().to_string();
let extension_lowercase = fe.path.extension().unwrap_or_default().to_string_lossy().to_lowercase();
Expand Down Expand Up @@ -1090,6 +1092,10 @@ impl SimilarImages {
pub fn set_similarity(&mut self, similarity: u32) {
self.similarity = similarity;
}

pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
self.ignore_hard_links = ignore_hard_links;
}
}

#[cfg(test)]
Expand Down
14 changes: 10 additions & 4 deletions czkawka_core/src/similar_videos.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use vid_dup_finder_lib::{NormalizedTolerance, VideoHash};

use crate::common::{check_if_stop_received, delete_files_custom, prepare_thread_handler_common, send_info_and_wait_for_ending_all_threads, VIDEO_FILES_EXTENSIONS};
use crate::common_cache::{get_similar_videos_cache_file, load_cache_from_file_generalized_by_path, save_cache_to_file_generalized};
use crate::common_dir_traversal::{CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_dir_traversal::{inode, take_1_per_inode, CheckingMethod, DirTraversalBuilder, DirTraversalResult, FileEntry, ProgressData, ToolType};
use crate::common_tool::{CommonData, CommonToolData, DeleteMethod};
use crate::common_traits::{DebugPrint, PrintResults, ResultEntry};
use crate::flc;
Expand Down Expand Up @@ -83,6 +83,7 @@ pub struct SimilarVideos {
videos_to_check: BTreeMap<String, VideosEntry>,
tolerance: i32,
exclude_videos_with_same_size: bool,
ignore_hard_links: bool,
}

impl CommonData for SimilarVideos {
Expand Down Expand Up @@ -111,6 +112,7 @@ impl SimilarVideos {
tolerance: 10,
exclude_videos_with_same_size: false,
similar_referenced_vectors: vec![],
ignore_hard_links: false,
}
}

Expand Down Expand Up @@ -149,7 +151,7 @@ impl SimilarVideos {
}

let result = DirTraversalBuilder::new()
.group_by(|_fe| ())
.group_by(inode)
.stop_receiver(stop_receiver)
.progress_sender(progress_sender)
.common_data(&self.common_data)
Expand All @@ -160,8 +162,8 @@ impl SimilarVideos {
match result {
DirTraversalResult::SuccessFiles { grouped_file_entries, warnings } => {
self.videos_to_check = grouped_file_entries
.into_values()
.flatten()
.into_iter()
.flat_map(if self.ignore_hard_links { |(_, fes)| fes } else { take_1_per_inode })
.map(|fe| (fe.path.to_string_lossy().to_string(), fe.into_videos_entry()))
.collect();
self.common_data.text_messages.warnings.extend(warnings);
Expand Down Expand Up @@ -454,4 +456,8 @@ impl SimilarVideos {
pub fn get_use_reference(&self) -> bool {
self.common_data.use_reference_folders
}

pub fn set_ignore_hard_links(&mut self, ignore_hard_links: bool) {
self.ignore_hard_links = ignore_hard_links;
}
}
Loading