Skip to content

Commit c0fd33a

Browse files
committed
parse: Handle multiple flags at once like -LEXO2
The POSIX Utility Syntax Guidelines specify that flag groups like -HL should be handled like -H -L. GNU find doesn't support grouping flags in this way, but BSD find does. To avoid conflicts with non-flag primaries, for now we require at least one flag in a group to be a capital letter. That is, we support things like -Lds but not -ds. We also do not support -fPATH (without a space) as it would conflict with -follow, -fprint, etc. It is impossible to be compatible with both GNU and BSD find here: user@gnu$ find -follow link link/file ... user@bsd$ find -follow find: ollow: No such file or directory Link: https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap12.html
1 parent 2473389 commit c0fd33a

12 files changed

Lines changed: 181 additions & 26 deletions

File tree

src/parse.c

Lines changed: 138 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -115,21 +115,28 @@ struct bfs_parser {
115115
};
116116

117117
/**
118-
* Possible token types.
118+
* Token types and flags.
119119
*/
120-
enum token_type {
120+
enum token_info {
121121
/** A flag. */
122-
T_FLAG,
122+
T_FLAG = 1,
123123
/** A root path. */
124-
T_PATH,
124+
T_PATH = 2,
125125
/** An option. */
126-
T_OPTION,
126+
T_OPTION = 3,
127127
/** A test. */
128-
T_TEST,
128+
T_TEST = 4,
129129
/** An action. */
130-
T_ACTION,
130+
T_ACTION = 5,
131131
/** An operator. */
132-
T_OPERATOR,
132+
T_OPERATOR = 6,
133+
/** Mask for token types. */
134+
T_TYPE = (1 << 3) - 1,
135+
136+
/** A token can match a prefix of an argument, like -On, -newerXY, etc. */
137+
T_PREFIX = 1 << 3,
138+
/** A flag that takes an argument. */
139+
T_NEEDS_ARG = 1 << 4,
133140
};
134141

135142
/**
@@ -414,7 +421,9 @@ static struct bfs_expr *parse_expr(struct bfs_parser *parser);
414421
/**
415422
* Advance by a single token.
416423
*/
417-
static char **parser_advance(struct bfs_parser *parser, enum token_type type, size_t argc) {
424+
static char **parser_advance(struct bfs_parser *parser, enum token_info type, size_t argc) {
425+
bfs_assert(type == (type & T_TYPE));
426+
418427
if (type != T_FLAG && type != T_PATH) {
419428
parser->expr_started = true;
420429
}
@@ -657,15 +666,33 @@ static struct bfs_expr *parse_nullary_flag(struct bfs_parser *parser) {
657666
*/
658667
static struct bfs_expr *parse_unary_flag(struct bfs_parser *parser) {
659668
const char *arg = parser->argv[0];
669+
char flag = arg[strlen(arg) - 1];
670+
660671
const char *value = parser->argv[1];
661672
if (!value) {
662-
parse_error(parser, "${cyn}%s${rs} needs a value.\n", arg);
673+
parse_error(parser, "${cyn}-%c${rs} needs a value.\n", flag);
663674
return NULL;
664675
}
665676

666677
return parse_flag(parser, 2);
667678
}
668679

680+
/**
681+
* Parse a prefix flag like -O3, -j8, etc.
682+
*/
683+
static struct bfs_expr *parse_prefix_flag(struct bfs_parser *parser, char flag, const char **value) {
684+
const char *arg = parser->argv[0];
685+
686+
const char *suffix = strchr(arg, flag) + 1;
687+
if (!*suffix) {
688+
parse_error(parser, "${cyn}-%c${rs} needs a value.\n", flag);
689+
return NULL;
690+
}
691+
692+
*value = suffix;
693+
return parse_nullary_flag(parser);
694+
}
695+
669696
/**
670697
* Parse a single option.
671698
*/
@@ -868,21 +895,22 @@ static struct bfs_expr *parse_debug(struct bfs_parser *parser, int arg1, int arg
868895
* Parse -On.
869896
*/
870897
static struct bfs_expr *parse_optlevel(struct bfs_parser *parser, int arg1, int arg2) {
871-
struct bfs_expr *expr = parse_nullary_flag(parser);
898+
const char *arg;
899+
struct bfs_expr *expr = parse_prefix_flag(parser, 'O', &arg);
872900
if (!expr) {
873901
return NULL;
874902
}
875903

876904
int *optlevel = &parser->ctx->optlevel;
877905

878-
if (strcmp(expr->argv[0], "-Ofast") == 0) {
906+
if (strcmp(arg, "fast") == 0) {
879907
*optlevel = 4;
880-
} else if (!parse_int(parser, expr->argv, expr->argv[0] + 2, optlevel, IF_INT | IF_UNSIGNED)) {
908+
} else if (!parse_int(parser, expr->argv, arg, optlevel, IF_INT | IF_UNSIGNED)) {
881909
return NULL;
882910
}
883911

884912
if (*optlevel > 4) {
885-
parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", expr->argv[0] + 2);
913+
parse_expr_warning(parser, expr, "${cyn}-O${bld}%s${rs} is the same as ${cyn}-O${bld}4${rs}.\n\n", arg);
886914
}
887915

888916
return expr;
@@ -1613,13 +1641,14 @@ static struct bfs_expr *parse_inum(struct bfs_parser *parser, int arg1, int arg2
16131641
* Parse -j<n>.
16141642
*/
16151643
static struct bfs_expr *parse_jobs(struct bfs_parser *parser, int arg1, int arg2) {
1616-
struct bfs_expr *expr = parse_nullary_flag(parser);
1644+
const char *arg;
1645+
struct bfs_expr *expr = parse_prefix_flag(parser, 'j', &arg);
16171646
if (!expr) {
16181647
return NULL;
16191648
}
16201649

16211650
unsigned int n;
1622-
if (!parse_int(parser, expr->argv, expr->argv[0] + 2, &n, IF_INT | IF_UNSIGNED)) {
1651+
if (!parse_int(parser, expr->argv, arg, &n, IF_INT | IF_UNSIGNED)) {
16231652
return NULL;
16241653
}
16251654

@@ -2954,18 +2983,18 @@ static struct bfs_expr *parse_version(struct bfs_parser *parser, int arg1, int a
29542983
return NULL;
29552984
}
29562985

2986+
/** Parser callback function type. */
29572987
typedef struct bfs_expr *parse_fn(struct bfs_parser *parser, int arg1, int arg2);
29582988

29592989
/**
29602990
* An entry in the parse table for primary expressions.
29612991
*/
29622992
struct table_entry {
29632993
char *arg;
2964-
enum token_type type;
2994+
enum token_info info;
29652995
parse_fn *parse;
29662996
int arg1;
29672997
int arg2;
2968-
bool prefix;
29692998
};
29702999

29713000
/**
@@ -2979,13 +3008,13 @@ static const struct table_entry parse_table[] = {
29793008
{"-Bnewer", T_TEST, parse_newer, BFS_STAT_BTIME},
29803009
{"-Bsince", T_TEST, parse_since, BFS_STAT_BTIME},
29813010
{"-Btime", T_TEST, parse_time, BFS_STAT_BTIME},
2982-
{"-D", T_FLAG, parse_debug},
3011+
{"-D", T_FLAG | T_NEEDS_ARG, parse_debug},
29833012
{"-E", T_FLAG, parse_regex_extended},
29843013
{"-H", T_FLAG, parse_follow, BFTW_FOLLOW_ROOTS, false},
29853014
{"-L", T_FLAG, parse_follow, BFTW_FOLLOW_ALL, false},
2986-
{"-O", T_FLAG, parse_optlevel, 0, 0, true},
3015+
{"-O", T_FLAG | T_PREFIX, parse_optlevel},
29873016
{"-P", T_FLAG, parse_follow, 0, false},
2988-
{"-S", T_FLAG, parse_search_strategy},
3017+
{"-S", T_FLAG | T_NEEDS_ARG, parse_search_strategy},
29893018
{"-X", T_FLAG, parse_xargs_safe},
29903019
{"-a", T_OPERATOR},
29913020
{"-acl", T_TEST, parse_acl},
@@ -3011,7 +3040,7 @@ static const struct table_entry parse_table[] = {
30113040
{"-execdir", T_ACTION, parse_exec, BFS_EXEC_CHDIR},
30123041
{"-executable", T_TEST, parse_access, X_OK},
30133042
{"-exit", T_ACTION, parse_exit},
3014-
{"-f", T_FLAG, parse_f},
3043+
{"-f", T_FLAG | T_NEEDS_ARG, parse_f},
30153044
{"-false", T_TEST, parse_const, false},
30163045
{"-files0-from", T_OPTION, parse_files0_from},
30173046
{"-flags", T_TEST, parse_flags},
@@ -3032,7 +3061,7 @@ static const struct table_entry parse_table[] = {
30323061
{"-ipath", T_TEST, parse_path, true},
30333062
{"-iregex", T_TEST, parse_regex, BFS_REGEX_ICASE},
30343063
{"-iwholename", T_TEST, parse_path, true},
3035-
{"-j", T_FLAG, parse_jobs, 0, 0, true},
3064+
{"-j", T_FLAG | T_PREFIX, parse_jobs},
30363065
{"-limit", T_ACTION, parse_limit},
30373066
{"-links", T_TEST, parse_links},
30383067
{"-lname", T_TEST, parse_lname, false},
@@ -3046,7 +3075,7 @@ static const struct table_entry parse_table[] = {
30463075
{"-mtime", T_TEST, parse_time, BFS_STAT_MTIME},
30473076
{"-name", T_TEST, parse_name, false},
30483077
{"-newer", T_TEST, parse_newer, BFS_STAT_MTIME},
3049-
{"-newer", T_TEST, parse_newerxy, 0, 0, true},
3078+
{"-newer", T_TEST | T_PREFIX, parse_newerxy},
30503079
{"-nocolor", T_OPTION, parse_color, false},
30513080
{"-nogroup", T_TEST, parse_nogroup},
30523081
{"-nohidden", T_TEST, parse_nohidden},
@@ -3099,7 +3128,7 @@ static const struct table_entry parse_table[] = {
30993128
static const struct table_entry *table_lookup(const char *arg) {
31003129
for (const struct table_entry *entry = parse_table; entry->arg; ++entry) {
31013130
bool match;
3102-
if (entry->prefix) {
3131+
if (entry->info & T_PREFIX) {
31033132
match = strncmp(arg, entry->arg, strlen(entry->arg)) == 0;
31043133
} else {
31053134
match = strcmp(arg, entry->arg) == 0;
@@ -3112,6 +3141,85 @@ static const struct table_entry *table_lookup(const char *arg) {
31123141
return NULL;
31133142
}
31143143

3144+
/** Look up a single-character flag in the parse table. */
3145+
static const struct table_entry *flag_lookup(char flag) {
3146+
for (const struct table_entry *entry = parse_table; entry->arg; ++entry) {
3147+
enum token_info type = entry->info & T_TYPE;
3148+
if (type == T_FLAG && entry->arg[1] == flag && !entry->arg[2]) {
3149+
return entry;
3150+
}
3151+
}
3152+
3153+
return NULL;
3154+
}
3155+
3156+
/** Check for a multi-flag argument like -LEXO2. */
3157+
static bool is_flag_group(const char *arg) {
3158+
// We enforce that at least one flag in a flag group must be a capital
3159+
// letter, to avoid ambiguity with primary expressions
3160+
bool has_upper = false;
3161+
3162+
// Flags that take an argument must appear last
3163+
bool needs_arg = false;
3164+
3165+
for (size_t i = 1; arg[i]; ++i) {
3166+
char c = arg[i];
3167+
if (c >= 'A' && c <= 'Z') {
3168+
has_upper = true;
3169+
}
3170+
3171+
if (needs_arg) {
3172+
return false;
3173+
}
3174+
3175+
const struct table_entry *entry = flag_lookup(c);
3176+
if (!entry || !entry->parse) {
3177+
return false;
3178+
}
3179+
3180+
if (entry->info & T_PREFIX) {
3181+
// The rest is the flag's argument
3182+
break;
3183+
}
3184+
3185+
if (entry->info & T_NEEDS_ARG) {
3186+
needs_arg = true;
3187+
}
3188+
}
3189+
3190+
return has_upper;
3191+
}
3192+
3193+
/** Parse a multi-flag argument. */
3194+
static struct bfs_expr *parse_flag_group(struct bfs_parser *parser) {
3195+
struct bfs_expr *expr = NULL;
3196+
3197+
char **start = parser->argv;
3198+
char **end = start;
3199+
const char *arg = start[0];
3200+
3201+
for (size_t i = 1; arg[i]; ++i) {
3202+
parser->argv = start;
3203+
3204+
const struct table_entry *entry = flag_lookup(arg[i]);
3205+
expr = entry->parse(parser, entry->arg1, entry->arg2);
3206+
3207+
if (parser->argv > end) {
3208+
end = parser->argv;
3209+
}
3210+
3211+
if (!expr || entry->info & T_PREFIX) {
3212+
break;
3213+
}
3214+
}
3215+
3216+
if (expr) {
3217+
bfs_assert(parser->argv == end, "Didn't eat enough tokens");
3218+
}
3219+
3220+
return expr;
3221+
}
3222+
31153223
/** Search for a fuzzy match in the parse table. */
31163224
static const struct table_entry *table_lookup_fuzzy(const char *arg) {
31173225
const struct table_entry *best = NULL;
@@ -3150,11 +3258,15 @@ static struct bfs_expr *parse_primary(struct bfs_parser *parser) {
31503258
}
31513259
}
31523260

3261+
if (is_flag_group(arg)) {
3262+
return parse_flag_group(parser);
3263+
}
3264+
31533265
match = table_lookup_fuzzy(arg);
31543266

31553267
CFILE *cerr = parser->ctx->cerr;
31563268
parse_error(parser, "Unknown argument; did you mean ");
3157-
switch (match->type) {
3269+
switch (match->info & T_TYPE) {
31583270
case T_FLAG:
31593271
cfprintf(cerr, "${cyn}%s${rs}?", match->arg);
31603272
break;

tests/bfs/LD_stat.out

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
links
2+
links/broken
3+
links/deeply
4+
links/deeply/nested
5+
links/deeply/nested/broken
6+
links/deeply/nested/dir
7+
links/deeply/nested/file
8+
links/deeply/nested/link
9+
links/file
10+
links/hardlink
11+
links/notdir
12+
links/skip
13+
links/skip/broken
14+
links/skip/dir
15+
links/skip/file
16+
links/skip/link
17+
links/symlink

tests/bfs/LD_stat.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bfs_diff -LD stat links

tests/bfs/O_3.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
! invoke_bfs -O 3 basic

tests/bsd/Hf.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
links/deeply/nested/dir

tests/bsd/Hf.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bfs_diff -Hf links/deeply/nested/dir

tests/common/HLP.out

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
links/deeply/nested/dir

tests/common/HLP.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bfs_diff -HLP links/deeply/nested/dir

tests/posix/HL.out

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
links
2+
links/broken
3+
links/deeply
4+
links/deeply/nested
5+
links/deeply/nested/broken
6+
links/deeply/nested/dir
7+
links/deeply/nested/file
8+
links/deeply/nested/link
9+
links/file
10+
links/hardlink
11+
links/notdir
12+
links/skip
13+
links/skip/broken
14+
links/skip/dir
15+
links/skip/file
16+
links/skip/link
17+
links/symlink

tests/posix/HL.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
bfs_diff -HL links

0 commit comments

Comments
 (0)