Skip to content

Instantly share code, notes, and snippets.

@sebastianwebber
Last active March 17, 2023 14:33
Show Gist options
  • Save sebastianwebber/57b279daddcd44f4c0fa65cbea9538ee to your computer and use it in GitHub Desktop.
Save sebastianwebber/57b279daddcd44f4c0fa65cbea9538ee to your computer and use it in GitHub Desktop.
Run benchmarks on postgres with pgbench
#!/bin/bash -ex
export PGBIN=/usr/pgsql-9.3/bin
export PGUSER=postgres
export PGDATABASE=bench
export DATADIR=/dados/pgbench
export CLUSTER_LOG=/tmp/benchmark.log
export TOTAL_CPUS=$(grep 'cpu cores' /proc/cpuinfo | uniq | awk '{print $NF}')
export MAX_EXECUTIONS=5
export MINIMUM_LOAD_TO_CONTINUE=1.5
### dependcies: bc
### inspired by:
## https://www.enterprisedb.com/blog/pgbench-performance-benchmark-postgresql-12-and-edb-advanced-server-12
function setup {
if [ ! -f "${DATADIR}/postgresql.conf" ]; then
sudo -u ${PGUSER} \
${PGBIN}/initdb \
-A trust \
-E utf-8 \
-D ${DATADIR}
fi
start-db
local F_SCALE_FACTOR=4000
sudo -u ${PGUSER} \
${PGBIN}/pgbench -i \
-j ${TOTAL_CPUS} \
-s ${F_SCALE_FACTOR} \
-U ${PGUSER} \
${PGDATABASE}
stop-db
}
function extract-value {
echo "${1}" | grep "${2}" | awk -F "${3:-:}" '{print $NF}' | xargs echo
}
function print-summary {
echo "RUN,CLIENTS,THREADS,DURATION,TOTAL_TRANSACTIONS,TPS_WITH_CONN,TPS_WITHOUT_CONN,LOAD1,USED_RAM,USED_SWAP"
}
function process-summary {
local OUT=${1}
CLIENTS=$(extract-value "${OUT}" "number of clients:")
THREADS=$(extract-value "${OUT}" "number of threads:")
DURATION=$(extract-value "${OUT}" "duration:")
TOTAL_TRANSACTIONS=$(extract-value "${OUT}" "number of transactions actually processed:")
TPS_WITH_CONN=$(extract-value "${OUT}" "including connections establishing" "=" | awk '{ print $1 }')
TPS_WITHOUT_CONN=$(extract-value "${OUT}" "excluding connections establishing" "=" | awk '{ print $1 }')
echo "${CURRENT_RUN},${CLIENTS},${THREADS},${DURATION},${TOTAL_TRANSACTIONS},${TPS_WITH_CONN},${TPS_WITHOUT_CONN},$(get-load1),$(get-memory-and-swap)"
}
function clean-cache {
## os cache
sync; echo 3 > /proc/sys/vm/drop_caches
## swap
swapoff -a && swapon -a
}
function get-memory-and-swap {
local data=$(free --bytes -w)
local used_ram=$(echo "${data}" | grep Mem: | awk '{print $2 - $4}')
local used_swap=$(echo "${data}" | grep Swap: | awk '{print $2 - $4}')
echo "${used_ram},${used_swap}"
}
function get-load1 {
uptime | awk -F ', ' '{print $(NF-2)}' | awk '{print $NF}' | sed 's/,/./'
}
function wait-for-high-load {
log "wait server load to slown down"
while true; do
{
local LOAD1=$(get-load1)
if [ 1 -eq $(echo "${LOAD1} < ${MINIMUM_LOAD_TO_CONTINUE}" | bc) ]; then
break
fi
sleep 5
}
done
}
function execute-benchmark {
local MAX_USERS=${1}
local MAX_TIME_SECONDS=${2:-600}
local EXTRA_ARGS=${3}
process-summary "$(sudo -u ${PGUSER} \
${PGBIN}/pgbench \
-c ${MAX_USERS} \
-j ${MAX_USERS} \
-n \
-C \
-T ${MAX_TIME_SECONDS} \
-r \
${EXTRA_ARGS} \
-U ${PGUSER} \
${PGDATABASE} 2>&1)"
}
function log {
echo ">> ${*}"
}
function stop-db {
sudo -u ${PGUSER} \
${PGBIN}/pg_ctl stop \
-m immediate \
-D ${DATADIR} \
-l ${CLUSTER_LOG} 2>&1 > /dev/null || true
}
function start-db {
sudo -u ${PGUSER} \
${PGBIN}/pg_ctl start \
-D ${DATADIR} \
-l ${CLUSTER_LOG} \
2>&1 > /dev/null
while true; do
{
${PGBIN}/pg_isready 2>&1 > /dev/null \
&& break
} || sleep 1
done
}
function cache-tables {
SCRIPT_FILE=$(mktemp)
echo '\o /dev/null' > ${SCRIPT_FILE}
# echo "set work_mem = '40GB'; " >> ${SCRIPT_FILE}
# echo "set maintenance_work_mem = '40GB'; " >> ${SCRIPT_FILE}
# echo "set effective_cache_size = '200GB'; " >> ${SCRIPT_FILE}
psql -U ${PGUSER} \
-d ${PGDATABASE} \
-At \
-c "select format('SELECT * FROM %s;', relname) from pg_stat_user_tables;" \
>> ${SCRIPT_FILE} 2>&1
echo '\o' >> ${SCRIPT_FILE}
psql -U ${PGUSER} -d ${PGDATABASE} -f ${SCRIPT_FILE}
rm -rfv ${SCRIPT_FILE}
}
function run-vacuum {
SCRIPT_FILE=$(mktemp)
echo "set maintenance_work_mem = '20GB'; " > ${SCRIPT_FILE}
echo "VACUUM (analyze, freeze);" >> ${SCRIPT_FILE}
psql -U ${PGUSER} -d ${PGDATABASE} -f ${SCRIPT_FILE}
rm -rfv ${SCRIPT_FILE}
}
function restart-db {
log Stopping database...
stop-db
log Cleaning OS caches...
clean-cache
log Starting DB...
start-db
log vacuuming
run-vacuum
# log "populating shared_buffers cache (reading all tables)..."
# cache-tables
}
function prepare-benchmark {
wait-for-high-load
restart-db
log "populating shared_buffers cache (running benchmark for 60s)..."
execute-benchmark 1 60 "-S" 2>&1 > /dev/null
}
function run-benchmarks {
print-summary > ${REPORT_FILE}
local USER_TARGET=(1 10 50 100 200)
# local USER_TARGET=(1 5)
for current_run in $(seq 1 ${MAX_EXECUTIONS}); do
export CURRENT_RUN="#${current_run}"
for total_users in ${USER_TARGET[@]}; do
echo Running benchmark with ${total_users} users:
prepare-benchmark
execute-benchmark "${total_users}" 2>&1 >> ${REPORT_FILE}
done
done
echo
log "Report saved on ${REPORT_FILE} :"
echo
cat ${REPORT_FILE}
stop-db
}
# IDEAS:
# - support other tools or script instead of pgbench
# - support custom files for pgbench
# - use pg_stat_statements and print slow queries
# - disable autovacuum before start the db? (flag like --no-autovacuum)
# - use pg-prewarm to load the tables on the shared_buffers
export REPORT_FILE=$(mktemp -p $(pwd) -t "benchmark-report-XXXXXXXXXX.csv")
[[ ! -z "$1" ]] && "$@"
#from: /etc/tuned/postgresql/tuned.conf
[main]
include= throughput-performance
[vm]
transparent_hugepages=never
[sysctl]
vm.overcommit_memory = 2
vm.swappiness = 1
kernel.sched_autogroup_enabled = 0
kernel.sched_migration_cost_ns = 50000000
vm.dirty_ratio = 90
vm.dirty_background_ratio = 10
vm.overcommit_ratio = 90
vm.zone_reclaim_mode = 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment