-
-
Save stephanGarland/fe0788cf2332d6e241ff3ac6d9440caf to your computer and use it in GitHub Desktop.
Benchmarks loading some CSVs into Postgres (and optionally MySQL), specifically testing UUIDs vs SERIAL
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env zsh | |
function create() { | |
local db=$1 | |
printf "\n%s\n" "Dropping/re-creating tables" | |
if [[ "$db" == "postgres" ]]; then | |
psql -d postgres -c "DROP TABLE IF EXISTS uuid_pk; CREATE TABLE uuid_pk (id UUID PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
psql -d postgres -c "DROP TABLE IF EXISTS uuid_chr_pk; CREATE TABLE uuid_chr_pk (id VARCHAR(36) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
psql -d postgres -c "DROP TABLE IF EXISTS identity_pk; CREATE TABLE identity_pk (id INT GENERATED ALWAYS AS IDENTITY (CACHE 1000) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
psql -d postgres -c "DROP TABLE IF EXISTS serial_pk; CREATE TABLE serial_pk (id SERIAL PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
psql -d postgres -c "DROP TABLE IF EXISTS identity_pk_big; CREATE TABLE identity_pk_big (id BIGINT GENERATED ALWAYS AS IDENTITY (CACHE 1000) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
psql -d postgres -c "DROP TABLE IF EXISTS serial_pk_big; CREATE TABLE serial_pk_big (id BIGSERIAL PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
elif [[ "$db" == "mysql" ]]; then | |
mysql -D test -Be "DROP TABLE IF EXISTS uuid_pk; CREATE TABLE uuid_pk (id BINARY(16), first_name VARCHAR(255), last_name VARCHAR(255));" | |
mysql -D test -Be "DROP TABLE IF EXISTS uuid_chr_pk; CREATE TABLE uuid_chr_pk (id CHAR(36) PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
mysql -D test -Be "DROP TABLE IF EXISTS serial_pk; CREATE TABLE serial_pk (id INT AUTO_INCREMENT PRIMARY KEY, first_name VARCHAR(255), last_name VARCHAR(255));" | |
else | |
printf "\n%s\n" "ERROR: Invalid DB type ${db} requested" | |
exit 1 | |
fi | |
} | |
function load() { | |
local db=$1 | |
if [[ "$db" == "postgres" ]]; then | |
local time | |
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table uuid_pk, using a v4 UUID with data type UUID as PK" | |
time=$(psql -d postgres -c '\timing' -c "COPY uuid_pk FROM '/mnt/ramdisk/uuid_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_uuid_arr+=($time) | |
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table uuid_chr_pk, using a v4 UUID with data type VARCHAR(36) as PK" | |
time=$(psql -d postgres -c '\timing' -c "COPY uuid_chr_pk FROM '/mnt/ramdisk/uuid_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_uuid_chr_arr+=($time) | |
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table identity_pk, using a CACHED IDENTITY with datatype INT as PK" | |
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_identity_arr+=($time) | |
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table serial_pk, using a SERIAL with datatype INT as PK" | |
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_serial_arr+=($time) | |
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table identity_pk_big, using a CACHED IDENTITY with datatype BIGINT as PK" | |
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_identity_big_arr+=($time) | |
printf "\n%s\n" "Postgres: loading 1,000,000 rows into table serial_pk_big, using a BIGSERIAL with datatype BIGINT as PK" | |
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_0.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_serial_big_arr+=($time) | |
elif [[ "$db" == "mysql" ]]; then | |
printf "\n%s\n" "MySQL: loading 1,000,000 rows into table uuid_pk, using a v4 UUID with data type UUID as PK" | |
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_0.csv' INTO TABLE uuid_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (@uuid_str, first_name, last_name) SET id = UUID_TO_BIN(@uuid_str); SELECT SUM(duration) AS 'time (sec)' FROM information_schema.profiling WHERE query_id=1;") | |
mysql_uuid_arr+=($time) | |
printf "\n%s\n" "MySQL: loading 1,000,000 rows into table uuid_chr_pk, using a v4 UUID with data type CHAR(36) as PK" | |
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_0.csv' INTO TABLE uuid_chr_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (id, first_name, last_name); SELECT SUM(duration) AS 'time (sec)' FROM information_schema.profiling WHERE query_id=1;") | |
mysql_uuid_chr_arr+=($time) | |
printf "\n%s\n" "MySQL: loading 1,000,000 rows into table serial_pk, using an auto-incrementing column with data type INT as PK" | |
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/serial_pk_0.csv' INTO TABLE serial_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (first_name, last_name); SELECT SUM(duration) AS 'time (sec)' FROM information_schema.profiling WHERE query_id=1;") | |
mysql_serial_arr+=($time) | |
else | |
printf "\n%s\n" "ERROR: Invalid DB type ${db} requested" | |
exit 1 | |
fi | |
} | |
function append() { | |
local db=$1 | |
local idx=$2 | |
local time | |
if [[ "$db" == "postgres" ]]; then | |
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table uuid_pk ($idx)" | |
time=$(psql -d postgres -c '\timing' -c "COPY uuid_pk FROM '/mnt/ramdisk/uuid_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_uuid_arr+=($time) | |
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table uuid_chr_pk ($idx)" | |
time=$(psql -d postgres -c '\timing' -c "COPY uuid_chr_pk FROM '/mnt/ramdisk/uuid_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_uuid_chr_arr+=($time) | |
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table identity_pk ($idx)" | |
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_identity_arr+=($time) | |
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table serial_pk ($idx)" | |
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_serial_arr+=($time) | |
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table identity_pk_big ($idx)" | |
time=$(psql -d postgres -c '\timing' -c "COPY identity_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_identity_big_arr+=($time) | |
printf "\n%s\n" "Postgres: Appending 1,000,000 rows into table serial_pk_big ($idx)" | |
time=$(psql -d postgres -c '\timing' -c "COPY serial_pk_big (first_name, last_name) FROM '/mnt/ramdisk/serial_pk_${idx}.csv' WITH (FORMAT CSV, HEADER, QUOTE '''');" | awk '/Time:/ {print $2}') | |
pg_serial_big_arr+=($time) | |
elif [[ "$db" == "mysql" ]]; then | |
printf "\n%s\n" "MySQL: Appending 1,000,000 rows into table uuid_pk ($idx)" | |
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_${idx}.csv' INTO TABLE uuid_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (@uuid_str, first_name, last_name) SET id = UUID_TO_BIN(@uuid_str); SELECT SUM(duration) FROM information_schema.profiling WHERE query_id=1;" | awk 'NR>1 {print}') | |
mysql_uuid_arr+=($time) | |
printf "\n%s\n" "MySQL: Appending 1,000,000 rows into table uuid_chr_pk ($idx)" | |
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/uuid_pk_${idx}.csv' INTO TABLE uuid_chr_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (id, first_name, last_name); SELECT SUM(duration) FROM information_schema.profiling WHERE query_id=1;" | awk 'NR>1 {print}') | |
mysql_uuid_chr_arr+=($time) | |
printf "\n%s\n" "MySQL: Appending 1,000,000 rows into table serial_pk ($idx)" | |
time=$(mysql -D test -Be "SET profiling=1; LOAD DATA INFILE '/mnt/ramdisk/serial_pk_${idx}.csv' INTO TABLE serial_pk FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY \"'\" IGNORE 1 LINES (first_name, last_name); SELECT SUM(duration) FROM information_schema.profiling WHERE query_id=1;" | awk 'NR>1 {print}') | |
mysql_serial_arr+=($time) | |
else | |
printf "\n%s\n" "ERROR: Invalid DB type ${db} requested" | |
exit 1 | |
fi | |
} | |
function calculate_stats() { | |
local arr_name=$1[@] | |
local arr=("${(P)arr_name}") | |
local sum=0 | |
local max=0 | |
local min=999999999 | |
local sq_sum=0 | |
for i in "${arr[@]}"; do | |
sum=$(echo "$sum + $i" | bc) | |
if (( $(echo "$i > $max" | bc -l) )); then | |
max=$i | |
fi | |
if (( $(echo "$i < $min" | bc -l) )); then | |
min=$i | |
fi | |
done | |
local avg=$(echo "scale=3; $sum / ${#arr[@]}" | bc) | |
for i in "${arr[@]}"; do | |
sq_sum=$(echo "$sq_sum + (($i - $avg)^2)" | bc) | |
done | |
local stddev=$(echo "scale=3; sqrt($sq_sum / ${#arr[@]})" | bc) | |
printf "\n%s\n" "Stats for $1:" | |
printf "Runs: "; printf "%s " "${arr[@]}"; printf "\n" | |
printf "Min: %.3f\n" $min | |
printf "Max: %.3f\n" $max | |
printf "Avg: %.3f\n" $avg | |
printf "StdDev: %.3f\n" $stddev | |
} | |
mysql_uuid_arr=() | |
mysql_uuid_chr_arr=() | |
mysql_serial_arr=() | |
pg_uuid_arr=() | |
pg_uuid_chr_arr=() | |
pg_identity_arr=() | |
pg_serial_arr=() | |
pg_identity_big_arr=() | |
pg_serial_big_arr=() | |
if [ -z $1 ]; then | |
printf "\n%s\n" "ERROR: Script requires an integer as the 1st arg for number of appends" | |
exit 1 | |
fi | |
create postgres | |
# create mysql | |
load postgres | |
#load mysql | |
for i in {1..$1}; do | |
append postgres $i | |
#append mysql $i | |
done | |
printf "\n%s\n" "Loaded $(( (1 + $i) * 1000000 )) rows into each table" | |
printf "\n%s\n" "Statistics for various appends" | |
calculate_stats pg_uuid_arr | |
calculate_stats pg_uuid_chr_arr | |
calculate_stats pg_identity_arr | |
calculate_stats pg_serial_arr | |
calculate_stats pg_identity_big_arr | |
calculate_stats pg_serial_big_arr | |
#calculate_stats mysql_uuid_arr | |
#calculate_stats mysql_uuid_chr_arr | |
#calculate_stats mysql_serial_arr |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment