Skip to content

Commit 2cbc964

Browse files
committed
cache performance boost for recursion + cleanup + xmalloc
Status: patch applies cleanly apart concat_path_file_fast() manually added From 69312a6928c188ac8be3c714db3a53724b85dd09 Mon Sep 17 00:00:00 2001 From: Jody Bruchon <jody@jodybruchon.com> Date: Wed, 10 Apr 2024 18:08:00 -0400 Subject: [PATCH v2] Huge performance boost for recursion (cp, du, find, ls, rm, mv) This patch uses pre-calculated name lengths to massively speed up various recursive operations. Three new *_fast variant functions are added along with get_d_namlen copied from libjodycode. Passing lengths allows use of memcpy() instead of strcpy()/strcat() and replacement of a particularly hot xasprintf(). Cachegrind shows CPU instructions on Linux x86_64 drop by 24% to 67% with similar reductions in data reads and writes. Anything in BusyBox that uses a while(readdir()) loop or that calls concat_*path_file() or last_char_is() might benefit from adopting this optimization framework. Bloat-O-Meter: function old new delta concat_path_file_fast - 194 +194 get_d_namlen - 36 +36 concat_subpath_file_fast - 31 +31 last_char_is_fast - 26 +26 complete_cmd_dir_file 992 1002 +10 copy_file 1831 1834 +3 remove_file 708 707 -1 recursive_action1 420 419 -1 du 468 467 -1 scan_and_display_dirs_recur 675 672 -3 concat_subpath_file 39 - -39 ------------------------------------------------------------------------------ (add/remove: 5/1 grow/shrink: 2/4 up/down: 300/-45) Total: 255 bytes Cachegrind tests (-original, +improved): -------------------------------------------------------------------------------- Ir I1mr ILmr Dr D1mr DLmr Dw D1mw DLmw -------------------------------------------------------------------------------- cg_diff_cp:-1,811,369 (100.0%) 1,544 (100.0%) 1,514 (100.0%) 379,597 (100.0%) 3,151 (100.0%) 2,183 (100.0%) 249,874 (100.0%) 1,218 (100.0%) 1,160 (100.0%) PROGRAM TOTALS cg_diff_cp:+1,310,239 (100.0%) 1,550 (100.0%) 1,519 (100.0%) 290,298 (100.0%) 3,152 (100.0%) 2,183 (100.0%) 184,883 (100.0%) 1,218 (100.0%) 1,160 (100.0%) PROGRAM TOTALS cg_diff_du:-11,080,026 (100.0%) 1,692 (100.0%) 1,627 (100.0%) 2,345,969 (100.0%) 5,603 (100.0%) 2,524 (100.0%) 1,537,107 (100.0%) 1,838 (100.0%) 1,342 (100.0%) PROGRAM TOTALS cg_diff_du:+4,522,979 (100.0%) 1,635 (100.0%) 1,592 (100.0%) 1,189,256 (100.0%) 4,911 (100.0%) 2,513 (100.0%) 784,551 (100.0%) 1,636 (100.0%) 1,287 (100.0%) PROGRAM TOTALS cg_diff_find:-10,719,682 (100.0%) 1,638 (100.0%) 1,592 (100.0%) 2,360,985 (100.0%) 4,149 (100.0%) 2,634 (100.0%) 1,493,014 (100.0%) 1,096 (100.0%) 836 (100.0%) PROGRAM TOTALS cg_diff_find:+4,212,414 (100.0%) 1,527 (100.0%) 1,498 (100.0%) 1,215,858 (100.0%) 3,748 (100.0%) 2,629 (100.0%) 734,040 (100.0%) 850 (100.0%) 732 (100.0%) PROGRAM TOTALS cg_diff_ls:-17,363,363 (100.0%) 1,984 (100.0%) 1,731 (100.0%) 3,751,223 (100.0%) 33,435 (100.0%) 2,439 (100.0%) 2,805,925 (100.0%) 9,422 (100.0%) 2,713 (100.0%) PROGRAM TOTALS cg_diff_ls:+11,166,139 (100.0%) 1,774 (100.0%) 1,683 (100.0%) 2,666,248 (100.0%) 31,111 (100.0%) 2,671 (100.0%) 2,100,224 (100.0%) 9,007 (100.0%) 2,474 (100.0%) PROGRAM TOTALS cg_diff_rm:-6,176,069 (100.0%) 1,585 (100.0%) 1,537 (100.0%) 1,298,524 (100.0%) 3,536 (100.0%) 2,351 (100.0%) 830,656 (100.0%) 905 (100.0%) 802 (100.0%) PROGRAM TOTALS cg_diff_rm:+2,039,241 (100.0%) 1,459 (100.0%) 1,429 (100.0%) 573,877 (100.0%) 3,361 (100.0%) 2,438 (100.0%) 379,660 (100.0%) 724 (100.0%) 663 (100.0%) PROGRAM TOTALS svlogd: rmoldest(): use get_d_namlen() last_char_is_fast: more robust parameter check concat_path_file_fast: copy null byte instead of adding one later The file name will always end in a null byte, so copy it, saving 18 bytes of code. function old new delta concat_path_file_fast 193 175 -18 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-18) Total: -18 bytes Signed-off-by: Jody Bruchon <jody@jodybruchon.com> Signed-off-by: Roberto A. Foglietta <roberto.foglietta@gmail.com> This is the commit message #2: cache performance boost for recursion, cleanup Comments removal related to functions changed, good for patch inspection by the patch's author but not for production. Hence janitoring the patch. Signed-off-by: Roberto A. Foglietta <roberto.foglietta@gmail.com> This is the commit message #3: cache performance boost for recursion, xmalloc Signed-off-by: Roberto A. Foglietta <roberto.foglietta@gmail.com>
1 parent 26734d1 commit 2cbc964

13 files changed

Lines changed: 89 additions & 7 deletions

File tree

coreutils/du.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ static unsigned long long du(const char *filename)
200200
}
201201

202202
while ((entry = readdir(dir))) {
203-
newfile = concat_subpath_file(filename, entry->d_name);
203+
newfile = concat_subpath_file_fast(filename, entry);
204204
if (newfile == NULL)
205205
continue;
206206
++G.du_depth;

coreutils/ls.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1025,7 +1025,7 @@ static struct dnode **scan_one_dir(const char *path, unsigned *nfiles_p)
10251025
continue; /* if only -A, skip . and .. but show other dotfiles */
10261026
}
10271027
}
1028-
fullname = concat_path_file(path, entry->d_name);
1028+
fullname = concat_path_file_fast(path, entry);
10291029
cur = my_stat(fullname, bb_basename(fullname), 0);
10301030
if (!cur) {
10311031
free(fullname);

include/libbb.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ char *bb_get_last_path_component_nostrip(const char *path) FAST_FUNC;
584584
const char *bb_basename(const char *name) FAST_FUNC;
585585
/* NB: can violate const-ness (similarly to strchr) */
586586
char *last_char_is(const char *s, int c) FAST_FUNC;
587+
char *last_char_is_fast(const char *s, int c, int len) FAST_FUNC;
587588
const char* endofname(const char *name) FAST_FUNC;
588589
char *is_prefixed_with(const char *string, const char *key) FAST_FUNC;
589590
char *is_suffixed_with(const char *string, const char *key) FAST_FUNC;
@@ -1832,9 +1833,12 @@ void config_close(parser_t *parser) FAST_FUNC;
18321833
* If path is NULL, it is assumed to be "/".
18331834
* filename should not be NULL. */
18341835
char *concat_path_file(const char *path, const char *filename) FAST_FUNC;
1836+
char *concat_path_file_fast(const char *path, const struct dirent *dirp) FAST_FUNC;
18351837
/* Returns NULL on . and .. */
18361838
char *concat_subpath_file(const char *path, const char *filename) FAST_FUNC;
1839+
char *concat_subpath_file_fast(const char *path, const struct dirent *dirp) FAST_FUNC;
18371840

1841+
size_t get_d_namlen(const struct dirent * const dirent) FAST_FUNC;
18381842

18391843
int bb_make_directory(char *path, long mode, int flags) FAST_FUNC;
18401844

libbb/Kbuild.src

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ lib-y += find_pid_by_name.o
4040
lib-y += find_root_device.o
4141
lib-y += full_write.o
4242
lib-y += get_console.o
43+
lib-y += get_d_namlen.o
4344
lib-y += get_last_path_component.o
4445
lib-y += get_line_from_file.o
4546
lib-y += getpty.o

libbb/concat_path_file.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,35 @@ char* FAST_FUNC concat_path_file(const char *path, const char *filename)
5959
#endif
6060
}
6161

62+
char* FAST_FUNC concat_path_file_fast(const char *path, const struct dirent *dirp)
63+
{
64+
const char *filename = dirp->d_name;
65+
char *buf;
66+
int lc_slash = 0;
67+
int name_offset, end_offset;
68+
int pathlen, namelen;
69+
70+
if (!path) {
71+
path = "";
72+
pathlen = 0;
73+
} else {
74+
pathlen = strlen(path);
75+
if (last_char_is_fast(path, '/', pathlen) == NULL) lc_slash = 1;
76+
}
77+
namelen = get_d_namlen(dirp);
78+
while (*filename == '/') {
79+
filename++;
80+
namelen--;
81+
}
82+
name_offset = pathlen + lc_slash;
83+
end_offset = name_offset + namelen;
84+
buf = xmalloc(end_offset + 1);
85+
memcpy(buf, path, pathlen);
86+
if (lc_slash == 1) *(buf + pathlen) = '/';
87+
memcpy((buf + name_offset), filename, namelen + 1);
88+
return buf;
89+
}
90+
6291
/* If second component comes from struct dirent,
6392
* it's possible to eliminate one strlen() by using name length
6493
* provided by kernel in struct dirent. See below.

libbb/concat_subpath_file.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,11 @@ char* FAST_FUNC concat_subpath_file(const char *path, const char *f)
2020
return NULL;
2121
return concat_path_file(path, f);
2222
}
23+
24+
25+
char* FAST_FUNC concat_subpath_file_fast(const char *path, const struct dirent *dirp)
26+
{
27+
if (dirp->d_name && DOT_OR_DOTDOT(dirp->d_name))
28+
return NULL;
29+
return concat_path_file_fast(path, dirp);
30+
}

libbb/copy_file.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ int FAST_FUNC copy_file(const char *source, const char *dest, int flags)
197197
while ((d = readdir(dp)) != NULL) {
198198
char *new_source, *new_dest;
199199

200-
new_source = concat_subpath_file(source, d->d_name);
200+
new_source = concat_subpath_file_fast(source, d);
201201
if (new_source == NULL)
202202
continue;
203203
new_dest = concat_path_file(dest, d->d_name);

libbb/get_d_namlen.c

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/* vi: set sw=4 ts=4: */
2+
/*
3+
* Fast acquisition of d_namlen
4+
*
5+
* Copyright (C) 2024 by Jody Bruchon <jody@jodybruchon.com>
6+
* Copied from libjodycode for use with BusyBox
7+
*
8+
* Licensed under GPLv2 or later, see file LICENSE in this source tree.
9+
*/
10+
11+
#include "libbb.h"
12+
13+
/* Extract d_namlen from struct dirent */
14+
size_t get_d_namlen(const struct dirent * const dirent)
15+
{
16+
#ifdef _DIRENT_HAVE_D_NAMLEN
17+
return dirent->d_namlen;
18+
#elif defined _DIRENT_HAVE_D_RECLEN
19+
const size_t base = (sizeof(struct dirent) - sizeof(((struct dirent *)0)->d_name)) - offsetof(struct dirent, d_name) - 1;
20+
size_t skip;
21+
22+
skip = dirent->d_reclen - (sizeof(struct dirent) - sizeof(((struct dirent *)0)->d_name));
23+
if (skip > 0) skip -= base;
24+
return skip + strlen(dirent->d_name + skip);
25+
#else
26+
return strlen(dirent->d_name);
27+
#endif
28+
return 0;
29+
}

libbb/last_char_is.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,13 @@ char* FAST_FUNC last_char_is(const char *s, int c)
1717
s++;
1818
return (*s == (char)c) ? (char *) s : NULL;
1919
}
20+
21+
22+
/* Same as last_char_is() but uses a known length to skip the loop */
23+
char* FAST_FUNC last_char_is_fast(const char *s, int c, int len)
24+
{
25+
if (len < 1)
26+
return NULL;
27+
s += len - 1;
28+
return (*s == (char)c) ? (char *) s : NULL;
29+
}

libbb/lineedit.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ static NOINLINE unsigned complete_cmd_dir_file(const char *command, int type)
921921
if (strncmp(basecmd, name_found, baselen) != 0)
922922
continue; /* no */
923923

924-
found = concat_path_file(lpath, name_found);
924+
found = concat_path_file_fast(lpath, next);
925925
/* NB: stat() first so that we see is it a directory;
926926
* but if that fails, use lstat() so that
927927
* we still match dangling links */

0 commit comments

Comments
 (0)