larryv/benchmark-dir-emptying

## benchmark-dir-emptying
#!/bin/sh


readonly DIR=/tmp/traversal-test-dir
readonly MAX_DIR_ENTRIES=32


# Given a positive integer N, print the whitespace-delimited sequence
# 0 1 2 ... N-2 N-1 to standard output. Each element is zero-padded to
# the same number of digits as N-1. If a second argument is provided, it
# is prefixed to each number in the sequence.

padded_countup() {
    awk -v count="$1" -v prefix="$2" '
        BEGIN {
            for (i = 0; i < count; ++i) {
                printf prefix "%0" length(count - 1) "d\n", i;
            }
        }
    '
}


# Given a path to a directory PATH, a string DIR, and a string FILE:
#   1.  Read a line of two positive integers M and N from standard input.
#   2.  Create directories PATH/DIR[0], PATH/DIR[1], ..., PATH/DIR[M-1].
#   3.  Create empty files PATH/FILE[0], PATH/FILE[1], ..., PATH/FILE[N-1].
# The sequence numbers are zero-padded to the same number of digits as
# M-1 and N-1.

populate() {
    read -r dirs files
    padded_countup "$dirs" "$1"/"$2" | xargs mkdir
    padded_countup "$files" "$1"/"$3" | xargs touch
}


# Given the path to a directory, delete its contents with one call to
# find(1), then print timing statistics to standard error. The directory
# itself is not deleted.

purge1() {
    time -p sh -c '
        find "$1" -depth ! -path "$1" \
            \( -type d -exec rmdir {} + -o -exec rm -f {} \; \)
    ' _ "$1"
}


# Given the path to a directory, delete its contents with two calls to
# find(1), then print timing statistics to standard error. The directory
# itself is not deleted.

purge2() {
    time -p sh -c '
        find "$1" ! -type d ! -path "$1" -exec rm -f {} + \
            && find "$1" -depth ! -path "$1" -exec rmdir {} +
    ' _ "$1"
}


main() {
    # Reset testing directory and prevent Spotlight from indexing it.
    rm -fR "$DIR" && mkdir -p "$DIR"/foo || return
    touch "$DIR"/.metadata_never_index

    # Create four-level directory hierarchy. Use awk(1) to generate the
    # stream of random-ish numbers consumed by populate().
    readonly d0="$DIR"/foo
    awk -v max="$MAX_DIR_ENTRIES" '
        BEGIN {
            srand();
            while (1) {
                total = int((max + 1) * rand());
                dirs = int((total + 1) * rand());
                files = total - dirs;
                printf "%d %d\n", dirs, files;
            }
        }
    ' | {
        populate "$d0" dir file
        for d1 in "$d0"/dir*; do
            [ -d "$d1" ] && populate "$d1" dir file || continue
            for d2 in "$d1"/dir*; do
                [ -d "$d2" ] && populate "$d2" dir file || continue
                for d3 in "$d2"/dir*; do
                    [ -d "$d3" ] && populate "$d3" dir file || continue
                done
            done
        done
    }

    # Duplicate the directory hierarchy.
    cp -pR "$DIR"/foo "$DIR"/bar

    # Since awk(1)'s srand() uses the system time as its default seed,
    # pause to ensure the system time advances. Attempt to mitigate
    # caching effects by randomly choosing the ordering of purge method
    # and target directory.
    sleep 1
    awk 'BEGIN { srand(); exit int(4 * rand()); }'
    case $? in
        0)
            f1=$(purge1 "$DIR"/foo 2>&1)
            f2=$(purge2 "$DIR"/bar 2>&1)
            ;;
        1)
            f1=$(purge1 "$DIR"/bar 2>&1)
            f2=$(purge2 "$DIR"/foo 2>&1)
            ;;
        2)
            f2=$(purge2 "$DIR"/foo 2>&1)
            f1=$(purge1 "$DIR"/bar 2>&1)
            ;;
        3)
            f2=$(purge2 "$DIR"/bar 2>&1)
            f1=$(purge1 "$DIR"/foo 2>&1)
            ;;
    esac

    printf '%s\n\n%s\n' "$f1" "$f2"
}

main "$@"
	#!/bin/sh


	readonly DIR=/tmp/traversal-test-dir
	readonly MAX_DIR_ENTRIES=32


	# Given a positive integer N, print the whitespace-delimited sequence
	# 0 1 2 ... N-2 N-1 to standard output. Each element is zero-padded to
	# the same number of digits as N-1. If a second argument is provided, it
	# is prefixed to each number in the sequence.

	padded_countup() {
	awk -v count="$1" -v prefix="$2" '
	BEGIN {
	for (i = 0; i < count; ++i) {
	printf prefix "%0" length(count - 1) "d\n", i;
	}
	}
	'
	}


	# Given a path to a directory PATH, a string DIR, and a string FILE:
	# 1. Read a line of two positive integers M and N from standard input.
	# 2. Create directories PATH/DIR[0], PATH/DIR[1], ..., PATH/DIR[M-1].
	# 3. Create empty files PATH/FILE[0], PATH/FILE[1], ..., PATH/FILE[N-1].
	# The sequence numbers are zero-padded to the same number of digits as
	# M-1 and N-1.

	populate() {
	read -r dirs files
	padded_countup "$dirs" "$1"/"$2" \| xargs mkdir
	padded_countup "$files" "$1"/"$3" \| xargs touch
	}


	# Given the path to a directory, delete its contents with one call to
	# find(1), then print timing statistics to standard error. The directory
	# itself is not deleted.

	purge1() {
	time -p sh -c '
	find "$1" -depth ! -path "$1" \
	\( -type d -exec rmdir {} + -o -exec rm -f {} \; \)
	' _ "$1"
	}


	# Given the path to a directory, delete its contents with two calls to
	# find(1), then print timing statistics to standard error. The directory
	# itself is not deleted.

	purge2() {
	time -p sh -c '
	find "$1" ! -type d ! -path "$1" -exec rm -f {} + \
	&& find "$1" -depth ! -path "$1" -exec rmdir {} +
	' _ "$1"
	}


	main() {
	# Reset testing directory and prevent Spotlight from indexing it.
	rm -fR "$DIR" && mkdir -p "$DIR"/foo \|\| return
	touch "$DIR"/.metadata_never_index

	# Create four-level directory hierarchy. Use awk(1) to generate the
	# stream of random-ish numbers consumed by populate().
	readonly d0="$DIR"/foo
	awk -v max="$MAX_DIR_ENTRIES" '
	BEGIN {
	srand();
	while (1) {
	total = int((max + 1) * rand());
	dirs = int((total + 1) * rand());
	files = total - dirs;
	printf "%d %d\n", dirs, files;
	}
	}
	' \| {
	populate "$d0" dir file
	for d1 in "$d0"/dir*; do
	[ -d "$d1" ] && populate "$d1" dir file \|\| continue
	for d2 in "$d1"/dir*; do
	[ -d "$d2" ] && populate "$d2" dir file \|\| continue
	for d3 in "$d2"/dir*; do
	[ -d "$d3" ] && populate "$d3" dir file \|\| continue
	done
	done
	done
	}

	# Duplicate the directory hierarchy.
	cp -pR "$DIR"/foo "$DIR"/bar

	# Since awk(1)'s srand() uses the system time as its default seed,
	# pause to ensure the system time advances. Attempt to mitigate
	# caching effects by randomly choosing the ordering of purge method
	# and target directory.
	sleep 1
	awk 'BEGIN { srand(); exit int(4 * rand()); }'
	case $? in
	0)
	f1=$(purge1 "$DIR"/foo 2>&1)
	f2=$(purge2 "$DIR"/bar 2>&1)
	;;
	1)
	f1=$(purge1 "$DIR"/bar 2>&1)
	f2=$(purge2 "$DIR"/foo 2>&1)
	;;
	2)
	f2=$(purge2 "$DIR"/foo 2>&1)
	f1=$(purge1 "$DIR"/bar 2>&1)
	;;
	3)
	f2=$(purge2 "$DIR"/bar 2>&1)
	f1=$(purge1 "$DIR"/foo 2>&1)
	;;
	esac

	printf '%s\n\n%s\n' "$f1" "$f2"
	}

	main "$@"