Created
October 30, 2020 12:40
-
-
Save kmoppel/7067dc8ffa5e248ef431af5c1ca560db to your computer and use it in GitHub Desktop.
Insert only data modelling perf test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP TABLE IF EXISTS standard_flow, alternative_flow, alternative_flow_state CASCADE; | |
--TRUNCATE standard_flow, alternative_flow, alternative_flow_state; | |
/* STANDARD UPDATES */ | |
CREATE TABLE IF NOT EXISTS standard_flow ( | |
session_id int8 NOT NULL GENERATED BY DEFAULT AS IDENTITY, | |
state text NOT NULL DEFAULT 'APPSTATE_1', | |
created_on timestamptz NOT NULL DEFAULT now(), | |
last_state_change timestamptz, | |
data1 int8, | |
data2 int8, | |
data3 int8, | |
data4 int8, | |
data5 int8, | |
data6 text, | |
data7 text, | |
data8 text, | |
data9 text, | |
data10 text | |
); | |
/* INSERT ONLY */ | |
CREATE TABLE IF NOT EXISTS alternative_flow ( | |
session_id int8 NOT NULL GENERATED BY DEFAULT AS IDENTITY, | |
-- state text NOT NULL DEFAULT 'APPSTATE_1', /* states moved to "alternative_flow_state" | |
created_on timestamptz NOT NULL DEFAULT now(), | |
-- last_state_change timestamptz, /* not needed anymore, covered by alternative_flow_state.created_on ! */ | |
data1 int8, | |
data2 int8, | |
data3 int8, | |
data4 int8, | |
data5 int8, | |
data6 text, | |
data7 text, | |
data8 text, | |
data9 text, | |
data10 text | |
); | |
CREATE TABLE IF NOT EXISTS alternative_flow_state ( | |
session_id int8 NOT NULL, | |
created_on timestamptz NOT NULL DEFAULT now(), | |
state text NOT NULL | |
); | |
/* generating 100 million rows to get out of RAM bounds */ | |
INSERT INTO standard_flow (state, created_on, last_state_change, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10) | |
SELECT | |
'APPSTATE_5', | |
clock_timestamp() - '500ms'::interval, | |
clock_timestamp(), | |
0, | |
0, | |
0, | |
0, | |
0, | |
'SOMEDATA', | |
'SOMEDATA', | |
'SOMEDATA', | |
'SOMEDATA', | |
'SOMEDATA' | |
FROM | |
generate_series(1, 1e8) i; | |
-- let's also insert back some old data to "visualize" that there is | |
-- much more transient data with the insert-only model still | |
INSERT INTO alternative_flow (session_id, created_on, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10) | |
SELECT | |
session_id, created_on, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10 | |
FROM | |
standard_flow; | |
INSERT INTO alternative_flow_state (session_id, state, created_on) | |
SELECT | |
session_id, | |
'APPSTATE_' || i, | |
created_on + (i * '100ms'::interval) | |
FROM | |
standard_flow, | |
generate_series(1, 3) i; | |
VACUUM ANALYZE standard_flow, alternative_flow, alternative_flow_state; | |
ALTER TABLE standard_flow ADD CONSTRAINT standard_flow_pk PRIMARY KEY (session_id); | |
CREATE EXTENSION IF NOT EXISTS btree_gin; | |
CREATE INDEX ON standard_flow USING gin (state); | |
CREATE INDEX ON standard_flow (created_on); | |
CREATE INDEX ON standard_flow (last_state_change); | |
ALTER TABLE alternative_flow ADD CONSTRAINT alternative_flow_pk PRIMARY KEY (session_id); | |
CREATE INDEX ON alternative_flow (created_on); | |
CREATE INDEX ON alternative_flow_state (session_id); | |
CREATE INDEX ON alternative_flow_state USING gin (state); | |
CREATE INDEX ON alternative_flow_state (created_on); | |
-- not to get duplicate ID errors | |
SELECT setval('alternative_flow_session_id_seq', (select max(session_id) from alternative_flow)); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
set -e | |
# Assuming by default that testclusters are on localhost | |
# Specified DB will also be used for storing pg_stat_statement snapshots from all clusters for easier analyzing | |
export PGHOST=/var/run/postgresql | |
export PGUSER=postgres | |
export PGDATABASE=postgres | |
PGBENCH=/usr/lib/postgresql/12/bin/pgbench | |
PSQL=/usr/lib/postgresql/12/bin/psql | |
CONN_STR= # specify remote connections etc | |
DURATION=14400 # seconds | |
CLIENTS=32 # parallel "user" sessions | |
JOBS=2 # management threads | |
DROP_CACHES_BETWEEN_TESTS=1 # clear the Linux buffer cache before starting any tests. NB! Assumes root if set to 1 | |
INIT_SCHEMA=1 | |
SCHEMA_INIT_FILE=./init-schema.sql | |
TEST_FILES="tx-standard.sql tx-alternative.sql" | |
echo "Testing connection..." | |
$PSQL $CONN_STR -qXc "select 1" &>/dev/null | |
if [ "$?" -ne 0 ] ; then | |
echo "could not connect to PG, check connection params. exiting..." | |
exit 1 | |
fi | |
echo "OK" | |
echo "ensuring pg_stat_statements..." | |
$PSQL $CONN_STR -qXc "create extension if not exists pg_stat_statements" | |
echo "OK" | |
if [ $INIT_SCHEMA -gt 0 ]; then | |
echo "Initializing the test schema from $SCHEMA_INIT_FILE ..." | |
$PSQL $CONN_STR -f "$SCHEMA_INIT_FILE" | |
echo "OK" | |
fi | |
echo "" | |
echo "Table sizes before test" | |
$PSQL $CONN_STR -c "\dt+ *flow*" | |
echo "" | |
echo "Resetting pg_stat_statements..." | |
$PSQL $CONN_STR -c "select pg_stat_statements_reset()" | |
echo "OK" | |
for test_file in $TEST_FILES; do | |
if [ $DROP_CACHES_BETWEEN_TESTS -gt 0 ]; then | |
echo "Dropping OS file cache with:" | |
echo "echo 1 > /proc/sys/vm/drop_caches" | |
echo 1 > /proc/sys/vm/drop_caches | |
echo "OK" | |
fi | |
echo "Checkpointing..." | |
$PSQL $CONN_STR -c "Checkpoint" | |
echo "OK" | |
echo "Starting testing for file $test_file" | |
echo "pgbench -n -r -c $CLIENTS -j $JOBS -T $DURATION -f $test_file" | |
pgbench -n -r -c $CLIENTS -j $JOBS -T $DURATION -f $test_file | |
done | |
echo "" | |
echo "Finished running scripts" | |
echo "" | |
echo "" | |
echo "Table sizes after test" | |
$PSQL $CONN_STR -c "\dt+ *flow*" | |
echo "" | |
echo "pg_stat_statements data for standard UPDATE approach:" | |
$PSQL $CONN_STR -P pager=off -c "select mean_time, stddev_time, total_time::int8, calls, round(100 * shared_blks_hit::numeric / (shared_blks_hit + shared_blks_read), 1) as cache_hit_pct, blk_read_time, blk_write_time, query from pg_stat_statements where query ~ 'UPDATE standard_flow' order by calls desc limit 1;" | |
echo "" | |
echo "pg_stat_statements data for insert-only INSERTS:" | |
$PSQL $CONN_STR -P pager=off -c "select mean_time, stddev_time, total_time::int8, calls, round(100 * shared_blks_hit::numeric / (shared_blks_hit + shared_blks_read), 1) as cache_hit_pct, blk_read_time, blk_write_time, query from pg_stat_statements where query ~ 'INSERT INTO alternative_flow' and not query ~ 'RETURNING' order by calls desc limit 1;" | |
echo "" | |
echo "Done" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT (random() * max(session_id))::int8 as session_id FROM alternative_flow \gset | |
INSERT INTO alternative_flow_state (session_id, state) | |
VALUES (:session_id, 'APPSTATE_1'); | |
SELECT pg_sleep(0.1 * random()); | |
INSERT INTO alternative_flow_state (session_id, state) | |
VALUES (:session_id, 'APPSTATE_2'); | |
SELECT pg_sleep(0.1 * random()); | |
INSERT INTO alternative_flow_state (session_id, state) | |
VALUES (:session_id, 'APPSTATE_3'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT (random() * max(session_id))::int8 as session_id FROM standard_flow \gset | |
UPDATE standard_flow | |
SET state = 'APPSTATE_1', | |
last_state_change = clock_timestamp() | |
WHERE session_id = :session_id; | |
SELECT pg_sleep(0.1 * random()); | |
UPDATE standard_flow | |
SET state = 'APPSTATE_2', | |
last_state_change = clock_timestamp() | |
WHERE session_id = :session_id; | |
SELECT pg_sleep(0.1 * random()); | |
UPDATE standard_flow | |
SET state = 'APPSTATE_3', | |
last_state_change = clock_timestamp() | |
WHERE session_id = :session_id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment