Skip to content

Instantly share code, notes, and snippets.

@kou
Last active January 24, 2024 14:04
Show Gist options
  • Save kou/be02e02e5072c91969469dbf137b5de5 to your computer and use it in GitHub Desktop.
Save kou/be02e02e5072c91969469dbf137b5de5 to your computer and use it in GitHub Desktop.
COPY TO benchmark
#!/bin/bash
set -eu
set -o pipefail
base_dir=$(dirname "$0")
prepare_sql()
{
size=$1
db_name=$2
cat <<SQL
DROP DATABASE IF EXISTS ${db_name};
CREATE DATABASE ${db_name};
\\c ${db_name}
CREATE TABLE data (int32 integer);
SELECT setseed(0.29);
INSERT INTO data
SELECT random() * 10000
FROM generate_series(1, ${size});
SQL
}
measure()
{
local format=$1
local n_tries=6
for i in $(seq ${n_tries}); do
LANG=C \
PAGER=cat \
psql \
--no-psqlrc \
--command "\\timing" \
--command "COPY data TO '/dev/null' WITH (FORMAT ${format})" \
--command "COPY data TO '/dev/null' WITH (FORMAT ${format})" \
--command "COPY data TO '/dev/null' WITH (FORMAT ${format})" | \
grep --text -o '^Time: [0-9.]* ms' | \
grep -o '[0-9.]*'
done | sort --numeric-sort | head -n 9 | tail -n 1
}
result_suffix=""
if [ $# -gt 0 ]; then
result_suffix="-$1"
fi
result="${base_dir}/result${result_suffix}.csv"
echo "Format,N records,Elapsed time (ms)" | \
tee "${result}"
sizes=()
sizes+=(100000)
sizes+=(1000000)
sizes+=(10000000)
for size in "${sizes[@]}"; do
export PGDATABASE="copy_${size}"
echo "${size}: preparing"
prepare_sql "${size}" "${PGDATABASE}" | \
psql -d postgres
echo "${size}: text"
elapsed_time=$(measure text)
echo "text,${size},${elapsed_time}" | \
tee -a "${result}"
echo "${size}: csv"
elapsed_time=$(measure csv)
echo "csv,${size},${elapsed_time}" | \
tee -a "${result}"
echo "${size}: binary"
elapsed_time=$(measure binary)
echo "binary,${size},${elapsed_time}" | \
tee -a "${result}"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment