Skip to content

Instantly share code, notes, and snippets.

@kmoppel
Created October 30, 2020 12:40
Show Gist options
  • Save kmoppel/7067dc8ffa5e248ef431af5c1ca560db to your computer and use it in GitHub Desktop.
Save kmoppel/7067dc8ffa5e248ef431af5c1ca560db to your computer and use it in GitHub Desktop.
Insert only data modelling perf test
DROP TABLE IF EXISTS standard_flow, alternative_flow, alternative_flow_state CASCADE;
--TRUNCATE standard_flow, alternative_flow, alternative_flow_state;
/* STANDARD UPDATES */
CREATE TABLE IF NOT EXISTS standard_flow (
session_id int8 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
state text NOT NULL DEFAULT 'APPSTATE_1',
created_on timestamptz NOT NULL DEFAULT now(),
last_state_change timestamptz,
data1 int8,
data2 int8,
data3 int8,
data4 int8,
data5 int8,
data6 text,
data7 text,
data8 text,
data9 text,
data10 text
);
/* INSERT ONLY */
CREATE TABLE IF NOT EXISTS alternative_flow (
session_id int8 NOT NULL GENERATED BY DEFAULT AS IDENTITY,
-- state text NOT NULL DEFAULT 'APPSTATE_1', /* states moved to "alternative_flow_state"
created_on timestamptz NOT NULL DEFAULT now(),
-- last_state_change timestamptz, /* not needed anymore, covered by alternative_flow_state.created_on ! */
data1 int8,
data2 int8,
data3 int8,
data4 int8,
data5 int8,
data6 text,
data7 text,
data8 text,
data9 text,
data10 text
);
CREATE TABLE IF NOT EXISTS alternative_flow_state (
session_id int8 NOT NULL,
created_on timestamptz NOT NULL DEFAULT now(),
state text NOT NULL
);
/* generating 100 million rows to get out of RAM bounds */
INSERT INTO standard_flow (state, created_on, last_state_change, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10)
SELECT
'APPSTATE_5',
clock_timestamp() - '500ms'::interval,
clock_timestamp(),
0,
0,
0,
0,
0,
'SOMEDATA',
'SOMEDATA',
'SOMEDATA',
'SOMEDATA',
'SOMEDATA'
FROM
generate_series(1, 1e8) i;
-- let's also insert back some old data to "visualize" that there is
-- much more transient data with the insert-only model still
INSERT INTO alternative_flow (session_id, created_on, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10)
SELECT
session_id, created_on, data1, data2, data3, data4, data5, data6, data7, data8, data9, data10
FROM
standard_flow;
INSERT INTO alternative_flow_state (session_id, state, created_on)
SELECT
session_id,
'APPSTATE_' || i,
created_on + (i * '100ms'::interval)
FROM
standard_flow,
generate_series(1, 3) i;
VACUUM ANALYZE standard_flow, alternative_flow, alternative_flow_state;
ALTER TABLE standard_flow ADD CONSTRAINT standard_flow_pk PRIMARY KEY (session_id);
CREATE EXTENSION IF NOT EXISTS btree_gin;
CREATE INDEX ON standard_flow USING gin (state);
CREATE INDEX ON standard_flow (created_on);
CREATE INDEX ON standard_flow (last_state_change);
ALTER TABLE alternative_flow ADD CONSTRAINT alternative_flow_pk PRIMARY KEY (session_id);
CREATE INDEX ON alternative_flow (created_on);
CREATE INDEX ON alternative_flow_state (session_id);
CREATE INDEX ON alternative_flow_state USING gin (state);
CREATE INDEX ON alternative_flow_state (created_on);
-- not to get duplicate ID errors
SELECT setval('alternative_flow_session_id_seq', (select max(session_id) from alternative_flow));
#!/bin/bash
set -e
# Assuming by default that testclusters are on localhost
# Specified DB will also be used for storing pg_stat_statement snapshots from all clusters for easier analyzing
export PGHOST=/var/run/postgresql
export PGUSER=postgres
export PGDATABASE=postgres
PGBENCH=/usr/lib/postgresql/12/bin/pgbench
PSQL=/usr/lib/postgresql/12/bin/psql
CONN_STR= # specify remote connections etc
DURATION=14400 # seconds
CLIENTS=32 # parallel "user" sessions
JOBS=2 # management threads
DROP_CACHES_BETWEEN_TESTS=1 # clear the Linux buffer cache before starting any tests. NB! Assumes root if set to 1
INIT_SCHEMA=1
SCHEMA_INIT_FILE=./init-schema.sql
TEST_FILES="tx-standard.sql tx-alternative.sql"
echo "Testing connection..."
$PSQL $CONN_STR -qXc "select 1" &>/dev/null
if [ "$?" -ne 0 ] ; then
echo "could not connect to PG, check connection params. exiting..."
exit 1
fi
echo "OK"
echo "ensuring pg_stat_statements..."
$PSQL $CONN_STR -qXc "create extension if not exists pg_stat_statements"
echo "OK"
if [ $INIT_SCHEMA -gt 0 ]; then
echo "Initializing the test schema from $SCHEMA_INIT_FILE ..."
$PSQL $CONN_STR -f "$SCHEMA_INIT_FILE"
echo "OK"
fi
echo ""
echo "Table sizes before test"
$PSQL $CONN_STR -c "\dt+ *flow*"
echo ""
echo "Resetting pg_stat_statements..."
$PSQL $CONN_STR -c "select pg_stat_statements_reset()"
echo "OK"
for test_file in $TEST_FILES; do
if [ $DROP_CACHES_BETWEEN_TESTS -gt 0 ]; then
echo "Dropping OS file cache with:"
echo "echo 1 > /proc/sys/vm/drop_caches"
echo 1 > /proc/sys/vm/drop_caches
echo "OK"
fi
echo "Checkpointing..."
$PSQL $CONN_STR -c "Checkpoint"
echo "OK"
echo "Starting testing for file $test_file"
echo "pgbench -n -r -c $CLIENTS -j $JOBS -T $DURATION -f $test_file"
pgbench -n -r -c $CLIENTS -j $JOBS -T $DURATION -f $test_file
done
echo ""
echo "Finished running scripts"
echo ""
echo ""
echo "Table sizes after test"
$PSQL $CONN_STR -c "\dt+ *flow*"
echo ""
echo "pg_stat_statements data for standard UPDATE approach:"
$PSQL $CONN_STR -P pager=off -c "select mean_time, stddev_time, total_time::int8, calls, round(100 * shared_blks_hit::numeric / (shared_blks_hit + shared_blks_read), 1) as cache_hit_pct, blk_read_time, blk_write_time, query from pg_stat_statements where query ~ 'UPDATE standard_flow' order by calls desc limit 1;"
echo ""
echo "pg_stat_statements data for insert-only INSERTS:"
$PSQL $CONN_STR -P pager=off -c "select mean_time, stddev_time, total_time::int8, calls, round(100 * shared_blks_hit::numeric / (shared_blks_hit + shared_blks_read), 1) as cache_hit_pct, blk_read_time, blk_write_time, query from pg_stat_statements where query ~ 'INSERT INTO alternative_flow' and not query ~ 'RETURNING' order by calls desc limit 1;"
echo ""
echo "Done"
SELECT (random() * max(session_id))::int8 as session_id FROM alternative_flow \gset
INSERT INTO alternative_flow_state (session_id, state)
VALUES (:session_id, 'APPSTATE_1');
SELECT pg_sleep(0.1 * random());
INSERT INTO alternative_flow_state (session_id, state)
VALUES (:session_id, 'APPSTATE_2');
SELECT pg_sleep(0.1 * random());
INSERT INTO alternative_flow_state (session_id, state)
VALUES (:session_id, 'APPSTATE_3');
SELECT (random() * max(session_id))::int8 as session_id FROM standard_flow \gset
UPDATE standard_flow
SET state = 'APPSTATE_1',
last_state_change = clock_timestamp()
WHERE session_id = :session_id;
SELECT pg_sleep(0.1 * random());
UPDATE standard_flow
SET state = 'APPSTATE_2',
last_state_change = clock_timestamp()
WHERE session_id = :session_id;
SELECT pg_sleep(0.1 * random());
UPDATE standard_flow
SET state = 'APPSTATE_3',
last_state_change = clock_timestamp()
WHERE session_id = :session_id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment