0xB10C/README.txt

## README.txt
# PR 20827 IBD benchmarks

Bitcoin Core PR #20827 reduces the number of UTXO cache flushes to disk by increasing
the number of pruned blk/rev files per prune operation. Each prune operation causes a
dbcache flush. Fewer prune operations cause fewer flushes. Flushes are expensive with
slower disks.

I benchmarked this PR on a otherwise idle, properly cooled, and dedicated server. The
server has two HDDs in a RAID0 and an i7 4c/8t CPU. It runs a fully synced Bitcoin
Core peer that I connect to from the benchmarking node to sync from (to rule out
differences due to flunkey P2P network peers).

My first benchmark set synced the blocks from height 500k to 600k. I've opted to limit
the benchmark to 100k blocks as each 100k block run took only about 3h compared to
about 11h for a full IBD. I ran full IBDs after seeing improvments between block 500k
and 600k. It can be skipped as full IBDs are a more complete and important measurement.
The second set of benchmarks runs full IBDs (up to 710k).

For the more information about the first benchmarking set see [1] and [2] for the results.
My results for the full IBD benchmarks are at [3].

[1]: https://github.com/bitcoin/bitcoin/pull/20827#issuecomment-1009016428
[2]: https://github.com/bitcoin/bitcoin/pull/20827#issuecomment-1017603590
[3]: https://github.com/bitcoin/bitcoin/pull/20827#issuecomment-1023108273

I've attached the scripts for the two benchmark sets I've run.

1. bench-500k-to-600k.sh (requires datadir at height 500k)
2. bench-full.sh (recommended, but will take longer)

These produce a debug.log for each run. These can then be parsed and analysed with the
Python Jupyter notebooks.

1. bench-500k-to-600k.ipynb
2. bench-full.ipynb

## bench-500k-to-600k.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              bench-500k-to-600k.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## bench-500k-to-600k.sh
# Runs two Bitcoin Core binaries with the same configurations multiple times for IBD benchmarks.
# NOTE: This version requires a pre-synced Bitcoin Core datadir at height 500_000.

# The names of the two Bitcoin Core binaries to compare.
# It's assumed that these are avaliable in PATH. Otherwise, relative or absolute paths shoudl work too.
# MB = MergeBase = Commit just before the first commit of the PR (better name might be BASE).
# PR = PullRequest = HEAD commit of the PR (better name might be HEAD)
BITCOIND_PR=bitcoind-pr20827
BITCOIND_MB=bitcoind-mb20827

# Height at which the Bitcoin Core benchmark is being stopped (with -stopatheight).
STOP_HEIGHT=600000

# Extra configuration passed to bitcoind:
# - Local peer to sync from.
# - Port and RPCPort that don't collide with other used ports on the system.
# - connect=0 to not connect to P2P network peers
# - printtoconsole=0 as we are already logging to a debug.log file
# - coindb logging for dbcache flushes
# - prune logging for debug information about prune operations
EXTRA_CONFIG="-addnode=127.0.0.1:8333 -port=7900 -rpcport=7901 -connect=0 -printtoconsole=0 -debug=coindb -debug=prune"

# Path where the datadirs will be created and deleted for each run.
DATADIR_PATH="/home/b10c/luke-pr-bench/"

# Path where the pre-synced datadir resides. This is copied
# and used as starting point for each run.
STATEDIR="/home/b10c/luke-pr-bench/state"

# Path where debug.logs are stored.
LOGDIR="/home/b10c/luke-pr-bench/full/"

# Sleep time between runs to allow the machine to idle a while before the next run.
SLEEP_TIME_SECONDS=300

# How often these configurations should be run. More runs mean probably more accurate results.
# However, limit the number of runs for faster results.
RUNS_PER_CONFIGSET=3

# CLI configurations to test
CONFIGSET[0]="-dbcache=300 -prune=550"
CONFIGSET[1]="-dbcache=4000 -prune=550"

CONFIGSET[2]="-dbcache=300 -prune=1100"
CONFIGSET[3]="-dbcache=4000 -prune=1100"

CONFIGSET[4]="-dbcache=300 -prune=2200"
CONFIGSET[5]="-dbcache=4000 -prune=2200"

CONFIGSET[6]="-dbcache=300 -prune=4400"
CONFIGSET[7]="-dbcache=4000 -prune=4400"

echo "Starting Bitcoin Core IBD benchmark"
echo "bitcoind PR path: ${BITCOIND_PR}"
echo "bitcoind MB path: ${BITCOIND_MB}"
echo "datadir path: ${DATADIR_PATH}"
echo "logdir path: ${LOGDIR}"
echo ""
mkdir -p ${LOGDIR}

run_bench() { # $1=run $2=cset $3=binary $4=label (MB or PR)
    echo "Running $4 config $2 run $1"
    datadir="${DATADIR_PATH}datadir_$4_$2_$1"
    echo "Copying inital state from $STATEDIR to datadir $datadir"
	  cp -r $STATEDIR $datadir
    RUNCONFIG="-datadir=$datadir -stopatheight=$STOP_HEIGHT -debuglogfile="${LOGDIR}debug_$4_$2_$1.log" $configset $EXTRA_CONFIG"
    echo "$3 $RUNCONFIG"
    $3 $RUNCONFIG
    sleep 5
    echo "Run $1 with configuration set $2 finished"
    echo "Deleting datadir $datadir"
    rm -rf $datadir
    echo "Sleeping for $SLEEP_TIME_SECONDS seconds to cool down.."
    sleep $SLEEP_TIME_SECONDS
}

for cset in ${!CONFIGSET[*]}
do
    echo "Starting with configuration set $cset"
    configset=${CONFIGSET[$cset]}
    echo "Configuration set: $configset"
    for run in $(seq 1 $RUNS_PER_CONFIGSET)
    do
        # Run benchmarks in random order to counter eventual
        # disk caching or similar.
        timestamp=$(date +%s%3N)
        prng=$(($timestamp%2))
        if [[ $prng -gt 0 ]]
        then
            echo "Running PR first and then MB"
            run_bench $run $cset $BITCOIND_PR "PR"
            run_bench $run $cset $BITCOIND_MB "MB"
        else
            echo "Running MB first and then PR"
            run_bench $run $cset $BITCOIND_MB "MB"
            run_bench $run $cset $BITCOIND_PR "PR"
        fi
        echo "---"
    done
    echo ""
done

## bench-full.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              bench-full.ipynb
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## bench-full.sh
# Runs two Bitcoin Core binaries with the same configurations multiple times for IBD benchmarks.

# The names of the two Bitcoin Core binaries to compare.
# It's assumed that these are avaliable in PATH. Otherwise, relative or absolute paths shoudl work too.
# MB = MergeBase = Commit just before the first commit of the PR (better name might be BASE).
# PR = PullRequest = HEAD commit of the PR (better name might be HEAD)
BITCOIND_PR=bitcoind-pr20827
BITCOIND_MB=bitcoind-mb20827

# Height at which the Bitcoin Core benchmark is being stopped (with -stopatheight).
STOP_HEIGHT=710000

# Extra configuration passed to bitcoind:
# - Local peer to sync from.
# - Port and RPCPort that don't collide with other used ports on the system.
# - connect=0 to not connect to P2P network peers
# - printtoconsole=0 as we are already logging to a debug.log file
# - coindb logging for dbcache flushes
# - prune logging for debug information about prune operations
EXTRA_CONFIG="-addnode=127.0.0.1:8333 -port=7900 -rpcport=7901 -connect=0 -printtoconsole=0 -debug=coindb -debug=prune"

# Path where the datadirs will be created and deleted for each run.
DATADIR_PATH="/home/b10c/luke-pr-bench/"

# Path where debug.logs are stored.
LOGDIR="/home/b10c/luke-pr-bench/full/"

# Sleep time between runs to allow the machine to idle a while before the next run.
SLEEP_TIME_SECONDS=300

# How often these configurations should be run. More runs mean probably more accurate results.
# However, limit the number of runs for faster results.
RUNS_PER_CONFIGSET=2

# CLI configurations to test
CONFIGSET[0]="-dbcache=300 -prune=550"
CONFIGSET[1]="-dbcache=4000 -prune=4400"
CONFIGSET[2]="-dbcache=4000 -prune=8800"

echo "Starting Bitcoin Core IBD benchmark"
echo "bitcoind PR path: ${BITCOIND_PR}"
echo "bitcoind MB path: ${BITCOIND_MB}"
echo "datadir path: ${DATADIR_PATH}"
echo "logdir path: ${LOGDIR}"
echo ""
mkdir -p ${LOGDIR}

run_bench() { # $1=run $2=cset $3=binary $4=label (MB or PR)
    echo "Running $4 config $2 run $1"
    datadir="${DATADIR_PATH}datadir_$4_$2_$1"
    echo "Creating datadir $datadir"
    mkdir -p $datadir
    RUNCONFIG="-datadir=$datadir -stopatheight=$STOP_HEIGHT -debuglogfile="${LOGDIR}debug_$4_$2_$1.log" $configset $EXTRA_CONFIG"
    echo "$3 $RUNCONFIG"
    $3 $RUNCONFIG
    sleep 5
    echo "Run $1 with configuration set $2 finished"
    echo "Deleting datadir $datadir"
    rm -rf $datadir
    echo "Sleeping for $SLEEP_TIME_SECONDS seconds to cool down.."
    sleep $SLEEP_TIME_SECONDS
}

for cset in ${!CONFIGSET[*]}
do
    echo "Starting with configuration set $cset"
    configset=${CONFIGSET[$cset]}
    echo "Configuration set: $configset"
    for run in $(seq 1 $RUNS_PER_CONFIGSET)
    do
        # Run benchmarks in random order to counter eventual
        # disk caching or similar.
        timestamp=$(date +%s%3N)
        prng=$(($timestamp%2))
        if [[ $prng -gt 0 ]]
        then
            echo "Running PR first and then MB"
            run_bench $run $cset $BITCOIND_PR "PR"
            run_bench $run $cset $BITCOIND_MB "MB"
        else
            echo "Running MB first and then PR"
            run_bench $run $cset $BITCOIND_MB "MB"
            run_bench $run $cset $BITCOIND_PR "PR"
        fi
        echo "---"
    done
    echo ""
done
	# PR 20827 IBD benchmarks

	Bitcoin Core PR #20827 reduces the number of UTXO cache flushes to disk by increasing
	the number of pruned blk/rev files per prune operation. Each prune operation causes a
	dbcache flush. Fewer prune operations cause fewer flushes. Flushes are expensive with
	slower disks.

	I benchmarked this PR on a otherwise idle, properly cooled, and dedicated server. The
	server has two HDDs in a RAID0 and an i7 4c/8t CPU. It runs a fully synced Bitcoin
	Core peer that I connect to from the benchmarking node to sync from (to rule out
	differences due to flunkey P2P network peers).

	My first benchmark set synced the blocks from height 500k to 600k. I've opted to limit
	the benchmark to 100k blocks as each 100k block run took only about 3h compared to
	about 11h for a full IBD. I ran full IBDs after seeing improvments between block 500k
	and 600k. It can be skipped as full IBDs are a more complete and important measurement.
	The second set of benchmarks runs full IBDs (up to 710k).

	For the more information about the first benchmarking set see [1] and [2] for the results.
	My results for the full IBD benchmarks are at [3].

	[1]: https://github.com/bitcoin/bitcoin/pull/20827#issuecomment-1009016428
	[2]: https://github.com/bitcoin/bitcoin/pull/20827#issuecomment-1017603590
	[3]: https://github.com/bitcoin/bitcoin/pull/20827#issuecomment-1023108273

	I've attached the scripts for the two benchmark sets I've run.

	1. bench-500k-to-600k.sh (requires datadir at height 500k)
	2. bench-full.sh (recommended, but will take longer)

	These produce a debug.log for each run. These can then be parsed and analysed with the
	Python Jupyter notebooks.

	1. bench-500k-to-600k.ipynb
	2. bench-full.ipynb
	# Runs two Bitcoin Core binaries with the same configurations multiple times for IBD benchmarks.
	# NOTE: This version requires a pre-synced Bitcoin Core datadir at height 500_000.

	# The names of the two Bitcoin Core binaries to compare.
	# It's assumed that these are avaliable in PATH. Otherwise, relative or absolute paths shoudl work too.
	# MB = MergeBase = Commit just before the first commit of the PR (better name might be BASE).
	# PR = PullRequest = HEAD commit of the PR (better name might be HEAD)
	BITCOIND_PR=bitcoind-pr20827
	BITCOIND_MB=bitcoind-mb20827

	# Height at which the Bitcoin Core benchmark is being stopped (with -stopatheight).
	STOP_HEIGHT=600000

	# Extra configuration passed to bitcoind:
	# - Local peer to sync from.
	# - Port and RPCPort that don't collide with other used ports on the system.
	# - connect=0 to not connect to P2P network peers
	# - printtoconsole=0 as we are already logging to a debug.log file
	# - coindb logging for dbcache flushes
	# - prune logging for debug information about prune operations
	EXTRA_CONFIG="-addnode=127.0.0.1:8333 -port=7900 -rpcport=7901 -connect=0 -printtoconsole=0 -debug=coindb -debug=prune"

	# Path where the datadirs will be created and deleted for each run.
	DATADIR_PATH="/home/b10c/luke-pr-bench/"

	# Path where the pre-synced datadir resides. This is copied
	# and used as starting point for each run.
	STATEDIR="/home/b10c/luke-pr-bench/state"

	# Path where debug.logs are stored.
	LOGDIR="/home/b10c/luke-pr-bench/full/"

	# Sleep time between runs to allow the machine to idle a while before the next run.
	SLEEP_TIME_SECONDS=300

	# How often these configurations should be run. More runs mean probably more accurate results.
	# However, limit the number of runs for faster results.
	RUNS_PER_CONFIGSET=3

	# CLI configurations to test
	CONFIGSET[0]="-dbcache=300 -prune=550"
	CONFIGSET[1]="-dbcache=4000 -prune=550"

	CONFIGSET[2]="-dbcache=300 -prune=1100"
	CONFIGSET[3]="-dbcache=4000 -prune=1100"

	CONFIGSET[4]="-dbcache=300 -prune=2200"
	CONFIGSET[5]="-dbcache=4000 -prune=2200"

	CONFIGSET[6]="-dbcache=300 -prune=4400"
	CONFIGSET[7]="-dbcache=4000 -prune=4400"

	echo "Starting Bitcoin Core IBD benchmark"
	echo "bitcoind PR path: ${BITCOIND_PR}"
	echo "bitcoind MB path: ${BITCOIND_MB}"
	echo "datadir path: ${DATADIR_PATH}"
	echo "logdir path: ${LOGDIR}"
	echo ""
	mkdir -p ${LOGDIR}

	run_bench() { # $1=run $2=cset $3=binary $4=label (MB or PR)
	echo "Running $4 config $2 run $1"
	datadir="${DATADIR_PATH}datadir_$4_$2_$1"
	echo "Copying inital state from $STATEDIR to datadir $datadir"
	cp -r $STATEDIR $datadir
	RUNCONFIG="-datadir=$datadir -stopatheight=$STOP_HEIGHT -debuglogfile="${LOGDIR}debug_$4_$2_$1.log" $configset $EXTRA_CONFIG"
	echo "$3 $RUNCONFIG"
	$3 $RUNCONFIG
	sleep 5
	echo "Run $1 with configuration set $2 finished"
	echo "Deleting datadir $datadir"
	rm -rf $datadir
	echo "Sleeping for $SLEEP_TIME_SECONDS seconds to cool down.."
	sleep $SLEEP_TIME_SECONDS
	}

	for cset in ${!CONFIGSET[*]}
	do
	echo "Starting with configuration set $cset"
	configset=${CONFIGSET[$cset]}
	echo "Configuration set: $configset"
	for run in $(seq 1 $RUNS_PER_CONFIGSET)
	do
	# Run benchmarks in random order to counter eventual
	# disk caching or similar.
	timestamp=$(date +%s%3N)
	prng=$(($timestamp%2))
	if [[ $prng -gt 0 ]]
	then
	echo "Running PR first and then MB"
	run_bench $run $cset $BITCOIND_PR "PR"
	run_bench $run $cset $BITCOIND_MB "MB"
	else
	echo "Running MB first and then PR"
	run_bench $run $cset $BITCOIND_MB "MB"
	run_bench $run $cset $BITCOIND_PR "PR"
	fi
	echo "---"
	done
	echo ""
	done