diff --git a/CHANGELOG.md b/CHANGELOG.md index e4b013bb4..50a127dc5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Features - Add `--ignore-parent` option to override `--no-ignore-parent`, see #1958 (@tmchow) +- Add `--sort` arg to CLI to sort by path/size/dates, see #1875 and #1982 (@DeflateAwning) ## Bugfixes - Handle invalid working directories gracefully when using `--full-path`, see #1900 (@Xavrir). diff --git a/Cargo.lock b/Cargo.lock index fb070409e..336140872 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -131,6 +131,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures", + "rand_core", +] + [[package]] name = "clap" version = "4.6.0" @@ -187,6 +198,15 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + [[package]] name = "crossbeam-channel" version = "0.5.15" @@ -316,6 +336,7 @@ dependencies = [ "nix 0.31.2", "normpath", "nu-ansi-term", + "rand", "regex", "regex-syntax", "tempfile", @@ -355,6 +376,7 @@ dependencies = [ "cfg-if", "libc", "r-efi", + "rand_core", "wasip2", "wasip3", ] @@ -656,6 +678,23 @@ version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + [[package]] name = "redox_syscall" version = "0.7.3" diff --git a/Cargo.toml b/Cargo.toml index c85026188..418875965 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -75,6 +75,7 @@ diff = "0.1" tempfile = "3.27" filetime = "0.2" test-case = "3.3" +rand = "0.10.1" [profile.dev] debug = "line-tables-only" diff --git a/src/cli.rs b/src/cli.rs index 9c54d7c2c..79cf35ea8 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -11,6 +11,7 @@ use clap::{ use clap_complete::Shell; use normpath::PathExt; +use crate::config::SortKey; use crate::error::print_error; use crate::exec::CommandSet; use crate::filesystem; @@ -546,9 +547,26 @@ pub struct Opts { /// /// Amount of time in milliseconds to buffer, before streaming the search /// results to the console. - #[arg(long, hide = true, value_parser = parse_millis)] + #[arg(long, hide = true, value_parser = parse_millis, conflicts_with("sort"))] pub max_buffer_time: Option, + /// Sort search results by the given key before printing or executing commands via `--exec`/`--exec-batch`. + /// + /// This option results in slower execution as parallel execution is effectively disabled. + /// + /// Warning: This option significantly increases memory usage. + /// All results are buffered in memory before outputting. + #[arg( + long, + value_name = "key", + value_enum, + hide_short_help = true, + help = "Sort results by: path, size, created, or modified", + long_help, + conflicts_with_all(&["list_details", "max_buffer_time"]) + )] + pub sort: Option, + ///Limit the number of search results to 'count' and quit immediately. #[arg( long, diff --git a/src/config.rs b/src/config.rs index a027812a4..e1eea3092 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,6 @@ use std::{path::PathBuf, sync::Arc, time::Duration}; +use clap::ValueEnum; use lscolors::LsColors; use regex::bytes::RegexSet; @@ -70,6 +71,9 @@ pub struct Config { /// `max_buffer_time`. pub max_buffer_time: Option, + /// Maximum size of the output buffer before flushing results to the console. + pub max_buffer_size: usize, + /// `None` if the output should not be colorized. Otherwise, a `LsColors` instance that defines /// how to style different filetypes. pub ls_colors: Option, @@ -133,6 +137,9 @@ pub struct Config { /// Names that should stop traversal down their parent. (e.g. https://bford.info/cachedir/). pub ignore_contain: Vec, + + /// The key to sort results by + pub sort_key: Option, } impl Config { @@ -141,3 +148,15 @@ impl Config { self.command.is_none() } } + +#[derive(Copy, Clone, PartialEq, Eq, Debug, ValueEnum)] +pub enum SortKey { + /// Sort by path + Path, + /// Sort by file size + Size, + /// Sort by creation time + Created, + /// Sort by modification time + Modified, +} diff --git a/src/filesystem.rs b/src/filesystem.rs index 4a04f9d52..29499f1a6 100644 --- a/src/filesystem.rs +++ b/src/filesystem.rs @@ -59,6 +59,15 @@ pub fn is_empty(entry: &dir_entry::DirEntry) -> bool { } } +pub fn file_size(entry: &dir_entry::DirEntry) -> Option { + let file_type = entry.file_type()?; + if file_type.is_dir() { + None + } else { + entry.metadata().map(|m| m.len()) + } +} + #[cfg(any(unix, target_os = "redox"))] pub fn is_block_device(ft: fs::FileType) -> bool { ft.is_block_device() diff --git a/src/main.rs b/src/main.rs index 8b7bd936c..2d40d5cc6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -17,6 +17,7 @@ use std::env; use std::io::IsTerminal; use std::path::Path; use std::sync::Arc; +use std::time::Duration; use anyhow::{Context, Result, anyhow, bail}; use clap::{CommandFactory, Parser}; @@ -33,6 +34,7 @@ use crate::filetypes::FileTypes; use crate::filter::OwnerFilter; use crate::filter::TimeFilter; use crate::regex_helper::{pattern_has_uppercase_char, pattern_matches_strings_with_leading_dot}; +use crate::walk::DEFAULT_MAX_BUFFER_LENGTH; // We use jemalloc for performance reasons, see https://github.com/sharkdp/fd/pull/481 // FIXME: re-enable jemalloc on macOS, see comment in Cargo.toml file for more infos @@ -273,7 +275,16 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result Some(Duration::from_secs(3600 * 24 * 365 * 10)), // 10 years - arbitrarily-large. + None => opts.max_buffer_time, + }, + max_buffer_size: match opts.sort { + // If sorting is enabled, allow a practically infinite buffer size. + Some(_) => usize::MAX, + None => DEFAULT_MAX_BUFFER_LENGTH, + }, ls_colors, hyperlink, interactive_terminal, @@ -336,6 +347,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result ReceiverBuffer<'a, W> { stdout, mode: ReceiverMode::Buffering, deadline, - buffer: Vec::with_capacity(MAX_BUFFER_LENGTH), + buffer: Vec::with_capacity(config.max_buffer_size.min(10_000)), num_results: 0, } } @@ -208,7 +209,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { match self.mode { ReceiverMode::Buffering => { self.buffer.push(dir_entry); - if self.buffer.len() > MAX_BUFFER_LENGTH { + if self.buffer.len() > self.config.max_buffer_size { self.stream()?; } } @@ -241,6 +242,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { self.stream()?; } Err(RecvTimeoutError::Disconnected) => { + // Note: This branch is called when all the results are walked/exhausted. return self.stop(); } } @@ -280,9 +282,22 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Stop looping. fn stop(&mut self) -> Result<(), ExitCode> { - if self.mode == ReceiverMode::Buffering { - self.buffer.sort(); - self.stream()?; + match (self.mode, self.config.sort_key) { + (ReceiverMode::Buffering, None) => { + self.buffer.sort(); + self.stream()?; + } + (ReceiverMode::Buffering, Some(sort_key)) => { + sort_dir_entry_results(&mut self.buffer, sort_key); + self.stream()?; + } + (ReceiverMode::Streaming, None) => {} + (ReceiverMode::Streaming, Some(_)) => { + // We force Buffering mode in Config construction if --sort is set by setting the timeout to almost infinity. + unreachable!( + "--sort cannot work in Streaming mode. Buffering mode is forced in Config construction." + ); + } } if self.config.quiet { @@ -407,12 +422,23 @@ impl WorkerState { /// threads (for --exec). fn receive(&self, rx: Receiver) -> ExitCode { let config = &self.config; - // This will be set to `Some` if the `--exec` argument was supplied. if let Some(ref cmd) = config.command { - if cmd.in_batch_mode() { + if let Some(sort_key) = config.sort_key { + // With --sort, we must collect all results before dispatching, + // and run sequentially so the order is preserved. + let mut results: Vec = rx.into_iter().flatten().collect(); + sort_worker_results(&mut results, sort_key); + if cmd.in_batch_mode() { + exec::batch(results, cmd, config) + } else { + exec::job(results, cmd, config) + } + } else if cmd.in_batch_mode() { + // Batch mode without sorting. exec::batch(rx.into_iter().flatten(), cmd, config) } else { + // No sort. Not Batch mode. Dispatch jobs across a thread pool as results stream in. thread::scope(|scope| { // Each spawned job will store its thread handle in here. let threads = config.threads; @@ -663,6 +689,44 @@ impl WorkerState { } } +type SortKeyValueFn = Box Option>; + +fn dir_entry_key_fn(sort_key: SortKey) -> SortKeyValueFn { + match sort_key { + SortKey::Path => Box::new(|e| Some(SortKeyValue::Path(e.path().to_path_buf()))), + SortKey::Size => Box::new(|e| filesystem::file_size(e).map(SortKeyValue::Size)), + SortKey::Created => { + Box::new(|e| e.metadata().map(|m| SortKeyValue::Time(m.created().ok()))) + } + SortKey::Modified => { + Box::new(|e| e.metadata().map(|m| SortKeyValue::Time(m.modified().ok()))) + } + } +} + +fn sort_dir_entry_results(results: &mut [DirEntry], sort_key: SortKey) { + let key_fn = dir_entry_key_fn(sort_key); + results.sort_by_cached_key(|e| key_fn(e)); +} + +fn sort_worker_results(results: &mut [WorkerResult], sort_key: SortKey) { + let key_fn = dir_entry_key_fn(sort_key); + results.sort_by_cached_key(|r| match r { + WorkerResult::Entry(e) => key_fn(e), + WorkerResult::Error(_) => None, + }); +} + +/// Comparable key values, one variant per SortKey. +/// None sorts last via Option's natural ordering (None > Some). +/// Only like enum variants will be compared (e.g., Path < Size will never be compared). +#[derive(Eq, PartialEq, Ord, PartialOrd)] +enum SortKeyValue { + Path(PathBuf), + Size(u64), + Time(Option), +} + fn search_str_for_entry<'a>( entry_path: &'a std::path::Path, full_path_base: Option<&std::path::Path>, diff --git a/tests/testenv/mod.rs b/tests/testenv/mod.rs index 541fa46ec..0194ec765 100644 --- a/tests/testenv/mod.rs +++ b/tests/testenv/mod.rs @@ -18,9 +18,15 @@ pub struct TestEnv { /// Path to the *fd* executable. fd_exe: PathBuf, - /// Normalize each line by sorting the whitespace-separated words + /// Normalize each line by sorting the whitespace-separated words. normalize_line: bool, + /// When `true`, the order of lines in the result is allowed to be arbitrary + /// (i.e., a sort is performed before comparison). + /// + /// When `false`, the order of lines in the result is checked (e.g., when testing the `--sort` CLI option). + allow_random_result_order: bool, + /// Temporary directory for storing test config (global ignore file) config_dir: Option, } @@ -62,6 +68,46 @@ fn create_working_directory( Ok(temp_dir) } +/// Create the working directory and the test files. +fn create_test_directory_with_sized_files( + directories: &[&'static str], + files: &[(&'static str, usize)], +) -> Result { + let temp_dir = tempfile::Builder::new().prefix("fd-tests").tempdir()?; + + { + let root = temp_dir.path(); + + // Pretend that this is a Git repository in order for `.gitignore` files to be respected + fs::create_dir_all(root.join(".git"))?; + + for directory in directories { + fs::create_dir_all(root.join(directory))?; + } + + for (file, size) in files { + let fp = fs::File::create(root.join(file))?; + + // Create a file with the specified size. + fp.set_len(*size as u64)?; + } + + #[cfg(unix)] + unix::fs::symlink(root.join("one/two"), root.join("symlink"))?; + + // Note: creating symlinks on Windows requires the `SeCreateSymbolicLinkPrivilege` which + // is by default only granted for administrators. + #[cfg(windows)] + windows::fs::symlink_dir(root.join("one/two"), root.join("symlink"))?; + + fs::File::create(root.join(".fdignore"))?.write_all(b"fdignored.foo")?; + + fs::File::create(root.join(".gitignore"))?.write_all(b"gitignored.foo")?; + } + + Ok(temp_dir) +} + fn create_config_directory_with_global_ignore(ignore_file_content: &str) -> io::Result { let config_dir = tempfile::Builder::new().prefix("fd-config").tempdir()?; let fd_dir = config_dir.path().join("fd"); @@ -112,7 +158,13 @@ fn format_output_error(args: &[&str], expected: &str, actual: &str) -> String { } /// Normalize the output for comparison. -fn normalize_output(s: &str, trim_start: bool, normalize_line: bool) -> String { +/// +/// Args: +/// - `s`: The output string to normalize. +/// - `trim_start`: Whether to trim whitespace from the start of each line. +/// - `normalize_line`: Whether to sort the whitespace-separated words in each line. +/// - `sort_lines`: Whether to sort the lines alphabetically. +fn normalize_output(s: &str, trim_start: bool, normalize_line: bool, sort_lines: bool) -> String { // Split into lines and normalize separators. let mut lines = s .replace('\0', "NULL\n") @@ -129,7 +181,9 @@ fn normalize_output(s: &str, trim_start: bool, normalize_line: bool) -> String { }) .collect::>(); - lines.sort(); + if sort_lines { + lines.sort(); + } lines.join("\n") } @@ -145,6 +199,7 @@ fn trim_lines(s: &str) -> String { } impl TestEnv { + /// Create a test environment with a temporary folder, empty files, directories, and symlinks. pub fn new(directories: &[&'static str], files: &[&'static str]) -> TestEnv { let temp_dir = create_working_directory(directories, files).expect("working directory"); let fd_exe = find_fd_exe(); @@ -153,15 +208,51 @@ impl TestEnv { temp_dir, fd_exe, normalize_line: false, + allow_random_result_order: true, config_dir: None, } } + pub fn new_with_sized_files( + directories: &[&'static str], + files: &[(&'static str, usize)], + ) -> TestEnv { + let temp_dir = + create_test_directory_with_sized_files(directories, files).expect("working directory"); + let fd_exe = find_fd_exe(); + + TestEnv { + temp_dir, + fd_exe, + normalize_line: false, + allow_random_result_order: true, + config_dir: None, + } + } + + /// Sets whether output lines should be normalized before comparison. + /// + /// Normalization sorts whitespace-separated elements in each line to ensure consistent comparison. pub fn normalize_line(self, normalize: bool) -> TestEnv { TestEnv { temp_dir: self.temp_dir, fd_exe: self.fd_exe, normalize_line: normalize, + allow_random_result_order: self.allow_random_result_order, + config_dir: self.config_dir, + } + } + + /// Sets whether test results are allowed to appear in any order. + /// + /// When `true`, assertions will pass regardless of result ordering, + /// useful for tests where output order is non-deterministic. + pub fn allow_random_result_order(self, allow_random_result_order: bool) -> TestEnv { + TestEnv { + temp_dir: self.temp_dir, + fd_exe: self.fd_exe, + normalize_line: self.normalize_line, + allow_random_result_order, config_dir: self.config_dir, } } @@ -241,10 +332,13 @@ impl TestEnv { &String::from_utf8_lossy(&output.stdout), false, self.normalize_line, + self.allow_random_result_order, ) } /// Assert that calling *fd* with the specified arguments produces the expected output. + /// + /// Does not compare ordering. pub fn assert_output(&self, args: &[&str], expected: &str) { self.assert_output_subdirectory(".", args, expected) } @@ -259,6 +353,8 @@ impl TestEnv { /// Assert that calling *fd* in the specified path under the root working directory, /// and with the specified arguments produces the expected output. + /// + /// Performs normalization to pub fn assert_output_subdirectory>( &self, path: P, @@ -266,7 +362,12 @@ impl TestEnv { expected: &str, ) { // Normalize both expected and actual output. - let expected = normalize_output(expected, true, self.normalize_line); + let expected = normalize_output( + expected, + true, + self.normalize_line, + self.allow_random_result_order, + ); let actual = self.assert_success_and_get_normalized_output(path, args); // Compare actual output to expected output. diff --git a/tests/tests.rs b/tests/tests.rs index c125d3a59..01989a2cd 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -28,6 +28,18 @@ static DEFAULT_FILES: &[&str] = &[ "e1 e2", ]; +static DEFAULT_FILES_WITH_SIZES: &[(&str, usize)] = &[ + ("a.foo", 437), + ("one/b.foo", 823), + ("one/two/c.foo", 91), + ("one/two/C.Foo2", 614), + ("one/two/three/d.foo", 259), + ("fdignored.foo", 748), + ("gitignored.foo", 183), + (".hidden.foo", 502), + ("e1 e2", 376), +]; + #[allow(clippy::let_and_return)] fn get_absolute_root_path(env: &TestEnv) -> String { let path = env @@ -2023,6 +2035,102 @@ fn test_exec_batch_with_limit() { ); } +fn shuffle_files(files: &[&'static str], seed: u64) -> Vec<&'static str> { + use rand::SeedableRng as _; + use rand::seq::SliceRandom as _; + let mut files = files.to_vec(); + files.shuffle(&mut rand::rngs::StdRng::seed_from_u64(seed)); + + files +} + +#[test] +fn test_sort_by_path() { + let te = TestEnv::new(DEFAULT_DIRS, &shuffle_files(DEFAULT_FILES, 42)) + .allow_random_result_order(false); + + // --sort=path should produce deterministic alphabetical output + te.assert_output( + &["--sort=path", "foo"], + "a.foo + one/b.foo + one/two/C.Foo2 + one/two/c.foo + one/two/three/d.foo + one/two/three/directory_foo/", + ); +} + +#[test] +fn test_sort_by_size() { + let te = TestEnv::new_with_sized_files(DEFAULT_DIRS, DEFAULT_FILES_WITH_SIZES) + .allow_random_result_order(false); + + // --exec with --sort should produce output in sorted order. + te.assert_output( + &["foo", "--sort=size"], + "one/two/three/directory_foo/ + one/two/c.foo + one/two/three/d.foo + a.foo + one/two/C.Foo2 + one/b.foo", + ); +} + +/// Shell script execution with --sort (--exec) +#[cfg(not(windows))] +#[test] +fn test_sort_by_path_with_exec() { + let te = TestEnv::new(DEFAULT_DIRS, &shuffle_files(DEFAULT_FILES, 42)) + .allow_random_result_order(false); + + // --exec with --sort should produce output in sorted order. + te.assert_output( + &["foo", "--sort=path", "--exec", "echo", "Item: {}"], + "Item: ./a.foo + Item: ./one/b.foo + Item: ./one/two/C.Foo2 + Item: ./one/two/c.foo + Item: ./one/two/three/d.foo + Item: ./one/two/three/directory_foo", + ); +} + +/// Shell script execution with --sort (--exec) +#[cfg(not(windows))] +#[test] +fn test_sort_by_size_with_exec() { + let te = TestEnv::new_with_sized_files(DEFAULT_DIRS, DEFAULT_FILES_WITH_SIZES) + .allow_random_result_order(false); + + // --exec with --sort should produce output in sorted order. + te.assert_output( + &["foo", "--sort=size", "--exec", "echo", "Item: {}"], + "Item: ./one/two/three/directory_foo + Item: ./one/two/c.foo + Item: ./one/two/three/d.foo + Item: ./a.foo + Item: ./one/two/C.Foo2 + Item: ./one/b.foo", + ); +} + +/// Shell script execution with --sort (--exec-batch) +#[cfg(not(windows))] +#[test] +fn test_sort_by_size_with_exec_batch() { + let te = TestEnv::new_with_sized_files(DEFAULT_DIRS, DEFAULT_FILES_WITH_SIZES) + .normalize_line(false) // Assert order exactly as input. + .allow_random_result_order(false); + + // --exec-batch with --sort should maintain the sorted order in its arguments. + te.assert_output( + &["foo", "--sort=size", "--exec-batch", "echo"], + "./one/two/three/directory_foo ./one/two/c.foo ./one/two/three/d.foo ./a.foo ./one/two/C.Foo2 ./one/b.foo", + ); +} + /// Shell script execution (--exec) with a custom --path-separator #[test] fn test_exec_with_separator() {