Skip to content

Instantly share code, notes, and snippets.

@stephanGarland
Last active August 1, 2023 18:38
Show Gist options
  • Save stephanGarland/fe0788cf2332d6e241ff3ac6d9440caf to your computer and use it in GitHub Desktop.
Save stephanGarland/fe0788cf2332d6e241ff3ac6d9440caf to your computer and use it in GitHub Desktop.
Benchmarks loading some CSVs into Postgres (and optionally MySQL), specifically testing UUIDs vs SERIAL
#!/usr/bin/env zsh
function create() {
local db=$1
printf "\n%s\n" "Dropping/re-creating tables"
if [[ "$db" == "postgres" ]]; then
psql -d postgres -c "DROP TABLE IF EXISTS uuid_pk; CREATE TABLE uuid_pk (id UUID PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
psql -d postgres -c "DROP TABLE IF EXISTS uuid_chr_pk; CREATE TABLE uuid_chr_pk (id VARCHAR(36) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
psql -d postgres -c "DROP TABLE IF EXISTS identity_pk; CREATE TABLE identity_pk (id INT GENERATED ALWAYS AS IDENTITY (CACHE 1000) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
psql -d postgres -c "DROP TABLE IF EXISTS serial_pk; CREATE TABLE serial_pk (id SERIAL PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
psql -d postgres -c "DROP TABLE IF EXISTS identity_pk_big; CREATE TABLE identity_pk_big (id BIGINT GENERATED ALWAYS AS IDENTITY (CACHE 1000) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
psql -d postgres -c "DROP TABLE IF EXISTS serial_pk_big; CREATE TABLE serial_pk_big (id BIGSERIAL PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
elif [[ "$db" == "mysql" ]]; then
mysql -D test -Be "DROP TABLE IF EXISTS uuid_pk; CREATE TABLE uuid_pk (id BINARY(16), first_name VARCHAR(255), last_name VARCHAR(255));"
mysql -D test -Be "DROP TABLE IF EXISTS uuid_chr_pk; CREATE TABLE uuid_chr_pk (id CHAR(36) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
mysql -D test -Be "DROP TABLE IF EXISTS serial_pk; CREATE TABLE serial_pk (id INT AUTO_INCREMENT PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));"
else
printf "\n%s\n" "ERROR: Invalid DB type ${db} requested"
exit 1
fi
}
function load() {
local db=$1
if [[ "$db" == "postgres" ]]; then
local time
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table uuid_pk, using a v4 UUID with data type UUID as PK"
time=$(psql -d postgres -c '\timing' -c "COPY uuid_pk FROM '/mnt/ramdisk/uuid_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_uuid_arr+=($time)
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table uuid_chr_pk, using a v4 UUID with data type VARCHAR(36) as PK"
time=$(psql -d postgres -c '\timing' -c "COPY uuid_chr_pk FROM '/mnt/ramdisk/uuid_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_uuid_chr_arr+=($time)
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table identity_pk, using a CACHED IDENTITY with datatype INT as PK"
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_identity_arr+=($time)
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table serial_pk, using a SERIAL with datatype INT as PK"
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_serial_arr+=($time)
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table identity_pk_big, using a CACHED IDENTITY with datatype BIGINT as PK"
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_identity_big_arr+=($time)
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table serial_pk_big, using a BIGSERIAL with datatype BIGINT as PK"
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_serial_big_arr+=($time)
elif [[ "$db" == "mysql" ]]; then
printf "\n%s\n" "MySQL: loading 1,000,000 rows into table uuid_pk, using a v4 UUID with data type UUID as PK"
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_0.csv' INTO TABLE uuid_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (@uuid_str, first_name, last_name) SET id = UUID_TO_BIN(@uuid_str); SELECT SUM(duration) AS 'time (sec)' FROM information_schema.profiling WHERE query_id=1;")
mysql_uuid_arr+=($time)
printf "\n%s\n" "MySQL: loading 1,000,000 rows into table uuid_chr_pk, using a v4 UUID with data type CHAR(36) as PK"
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_0.csv' INTO TABLE uuid_chr_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (id, first_name, last_name); SELECT SUM(duration) AS 'time (sec)' FROM information_schema.profiling WHERE query_id=1;")
mysql_uuid_chr_arr+=($time)
printf "\n%s\n" "MySQL: loading 1,000,000 rows into table serial_pk, using an auto-incrementing column with data type INT as PK"
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/serial_pk_0.csv' INTO TABLE serial_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (first_name, last_name); SELECT SUM(duration) AS 'time (sec)' FROM information_schema.profiling WHERE query_id=1;")
mysql_serial_arr+=($time)
else
printf "\n%s\n" "ERROR: Invalid DB type ${db} requested"
exit 1
fi
}
function append() {
local db=$1
local idx=$2
local time
if [[ "$db" == "postgres" ]]; then
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table uuid_pk ($idx)"
time=$(psql -d postgres -c '\timing' -c "COPY uuid_pk FROM '/mnt/ramdisk/uuid_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_uuid_arr+=($time)
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table uuid_chr_pk ($idx)"
time=$(psql -d postgres -c '\timing' -c "COPY uuid_chr_pk FROM '/mnt/ramdisk/uuid_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_uuid_chr_arr+=($time)
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table identity_pk ($idx)"
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_identity_arr+=($time)
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table serial_pk ($idx)"
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_serial_arr+=($time)
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table identity_pk_big ($idx)"
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_identity_big_arr+=($time)
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table serial_pk_big ($idx)"
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}')
pg_serial_big_arr+=($time)
elif [[ "$db" == "mysql" ]]; then
printf "\n%s\n" "MySQL: Appending 1,000,000 rows into table uuid_pk ($idx)"
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_${idx}.csv' INTO TABLE uuid_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (@uuid_str, first_name, last_name) SET id = UUID_TO_BIN(@uuid_str); SELECT SUM(duration) FROM information_schema.profiling WHERE query_id=1;" | awk 'NR>1 {print}')
mysql_uuid_arr+=($time)
printf "\n%s\n" "MySQL: Appending 1,000,000 rows into table uuid_chr_pk ($idx)"
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_${idx}.csv' INTO TABLE uuid_chr_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (id, first_name, last_name); SELECT SUM(duration) FROM information_schema.profiling WHERE query_id=1;" | awk 'NR>1 {print}')
mysql_uuid_chr_arr+=($time)
printf "\n%s\n" "MySQL: Appending 1,000,000 rows into table serial_pk ($idx)"
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/serial_pk_${idx}.csv' INTO TABLE serial_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (first_name, last_name); SELECT SUM(duration) FROM information_schema.profiling WHERE query_id=1;" | awk 'NR>1 {print}')
mysql_serial_arr+=($time)
else
printf "\n%s\n" "ERROR: Invalid DB type ${db} requested"
exit 1
fi
}
function calculate_stats() {
local arr_name=$1[@]
local arr=("${(P)arr_name}")
local sum=0
local max=0
local min=999999999
local sq_sum=0
for i in "${arr[@]}"; do
sum=$(echo "$sum + $i" | bc)
if (( $(echo "$i > $max" | bc -l) )); then
max=$i
fi
if (( $(echo "$i < $min" | bc -l) )); then
min=$i
fi
done
local avg=$(echo "scale=3; $sum / ${#arr[@]}" | bc)
for i in "${arr[@]}"; do
sq_sum=$(echo "$sq_sum + (($i - $avg)^2)" | bc)
done
local stddev=$(echo "scale=3; sqrt($sq_sum / ${#arr[@]})" | bc)
printf "\n%s\n" "Stats for $1:"
printf "Runs: "; printf "%s " "${arr[@]}"; printf "\n"
printf "Min: %.3f\n" $min
printf "Max: %.3f\n" $max
printf "Avg: %.3f\n" $avg
printf "StdDev: %.3f\n" $stddev
}
mysql_uuid_arr=()
mysql_uuid_chr_arr=()
mysql_serial_arr=()
pg_uuid_arr=()
pg_uuid_chr_arr=()
pg_identity_arr=()
pg_serial_arr=()
pg_identity_big_arr=()
pg_serial_big_arr=()
if [ -z $1 ]; then
printf "\n%s\n" "ERROR: Script requires an integer as the 1st arg for number of appends"
exit 1
fi
create postgres
# create mysql
load postgres
#load mysql
for i in {1..$1}; do
append postgres $i
#append mysql $i
done
printf "\n%s\n" "Loaded $(( (1 + $i) * 1000000 )) rows into each table"
printf "\n%s\n" "Statistics for various appends"
calculate_stats pg_uuid_arr
calculate_stats pg_uuid_chr_arr
calculate_stats pg_identity_arr
calculate_stats pg_serial_arr
calculate_stats pg_identity_big_arr
calculate_stats pg_serial_big_arr
#calculate_stats mysql_uuid_arr
#calculate_stats mysql_uuid_chr_arr
#calculate_stats mysql_serial_arr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment