Skip to content

Instantly share code, notes, and snippets.

@larryv
Created April 25, 2019 03:27
Show Gist options
  • Save larryv/26c4e8e0ada3e25f072f6e1346c934ed to your computer and use it in GitHub Desktop.
Save larryv/26c4e8e0ada3e25f072f6e1346c934ed to your computer and use it in GitHub Desktop.
A script I used once to attempt to benchmark different methods of emulating `rm -fR` with find(1).
#!/bin/sh
readonly DIR=/tmp/traversal-test-dir
readonly MAX_DIR_ENTRIES=32
# Given a positive integer N, print the whitespace-delimited sequence
# 0 1 2 ... N-2 N-1 to standard output. Each element is zero-padded to
# the same number of digits as N-1. If a second argument is provided, it
# is prefixed to each number in the sequence.
padded_countup() {
awk -v count="$1" -v prefix="$2" '
BEGIN {
for (i = 0; i < count; ++i) {
printf prefix "%0" length(count - 1) "d\n", i;
}
}
'
}
# Given a path to a directory PATH, a string DIR, and a string FILE:
# 1. Read a line of two positive integers M and N from standard input.
# 2. Create directories PATH/DIR[0], PATH/DIR[1], ..., PATH/DIR[M-1].
# 3. Create empty files PATH/FILE[0], PATH/FILE[1], ..., PATH/FILE[N-1].
# The sequence numbers are zero-padded to the same number of digits as
# M-1 and N-1.
populate() {
read -r dirs files
padded_countup "$dirs" "$1"/"$2" | xargs mkdir
padded_countup "$files" "$1"/"$3" | xargs touch
}
# Given the path to a directory, delete its contents with one call to
# find(1), then print timing statistics to standard error. The directory
# itself is not deleted.
purge1() {
time -p sh -c '
find "$1" -depth ! -path "$1" \
\( -type d -exec rmdir {} + -o -exec rm -f {} \; \)
' _ "$1"
}
# Given the path to a directory, delete its contents with two calls to
# find(1), then print timing statistics to standard error. The directory
# itself is not deleted.
purge2() {
time -p sh -c '
find "$1" ! -type d ! -path "$1" -exec rm -f {} + \
&& find "$1" -depth ! -path "$1" -exec rmdir {} +
' _ "$1"
}
main() {
# Reset testing directory and prevent Spotlight from indexing it.
rm -fR "$DIR" && mkdir -p "$DIR"/foo || return
touch "$DIR"/.metadata_never_index
# Create four-level directory hierarchy. Use awk(1) to generate the
# stream of random-ish numbers consumed by populate().
readonly d0="$DIR"/foo
awk -v max="$MAX_DIR_ENTRIES" '
BEGIN {
srand();
while (1) {
total = int((max + 1) * rand());
dirs = int((total + 1) * rand());
files = total - dirs;
printf "%d %d\n", dirs, files;
}
}
' | {
populate "$d0" dir file
for d1 in "$d0"/dir*; do
[ -d "$d1" ] && populate "$d1" dir file || continue
for d2 in "$d1"/dir*; do
[ -d "$d2" ] && populate "$d2" dir file || continue
for d3 in "$d2"/dir*; do
[ -d "$d3" ] && populate "$d3" dir file || continue
done
done
done
}
# Duplicate the directory hierarchy.
cp -pR "$DIR"/foo "$DIR"/bar
# Since awk(1)'s srand() uses the system time as its default seed,
# pause to ensure the system time advances. Attempt to mitigate
# caching effects by randomly choosing the ordering of purge method
# and target directory.
sleep 1
awk 'BEGIN { srand(); exit int(4 * rand()); }'
case $? in
0)
f1=$(purge1 "$DIR"/foo 2>&1)
f2=$(purge2 "$DIR"/bar 2>&1)
;;
1)
f1=$(purge1 "$DIR"/bar 2>&1)
f2=$(purge2 "$DIR"/foo 2>&1)
;;
2)
f2=$(purge2 "$DIR"/foo 2>&1)
f1=$(purge1 "$DIR"/bar 2>&1)
;;
3)
f2=$(purge2 "$DIR"/bar 2>&1)
f1=$(purge1 "$DIR"/foo 2>&1)
;;
esac
printf '%s\n\n%s\n' "$f1" "$f2"
}
main "$@"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment