Skip to content

Instantly share code, notes, and snippets.

@tzaffi
Last active August 9, 2023 18:28
Show Gist options
  • Save tzaffi/3c3d25b7e30e5b53e6d5d5772736faf6 to your computer and use it in GitHub Desktop.
Save tzaffi/3c3d25b7e30e5b53e6d5d5772736faf6 to your computer and use it in GitHub Desktop.
Performant stats for Indexer's `txn` table
-- 1. Approximate the indexer table row counts:
SELECT
psut.relname AS table_name,
pc.reltuples::BIGINT AS estimated_count,
pg_total_relation_size(psut.schemaname || '.' || psut.relname) AS estimated_size,
pg_size_pretty(pg_total_relation_size(psut.schemaname || '.' || psut.relname)) AS size_human
FROM
pg_stat_user_tables psut
INNER JOIN
pg_class pc
ON
pc.oid = (psut.schemaname || '.' || psut.relname)::regclass
WHERE
psut.schemaname = 'public';
/*
{
"table_name": "txn",
"estimated_count": "1107932160",
"estimated_size": "1537212612608",
"size_human": "1432 GB"
},
*/
-- so estimated_count = 1,107,932,160
-- 2. Get the range of "secret" CTID's for `txn`
WITH t1 AS (
SELECT ctid, round, intra FROM txn ORDER BY round, intra LIMIT 1
)
SELECT * FROM t1
UNION
SELECT ctid, round, intra FROM txn WHERE round = 31026402 AND intra = 0
ORDER BY round, intra;
/*
[
{
"ctid": "(0,1)",
"round": "62440",
"intra": 0
},
{
"ctid": "(151214485,8)",
"round": "31026402",
"intra": 0
}
]
*/
-- so the CTID for round 31,026,402 is (151214485,8)
-- 3. Hack for efficiently querying Indexer's `txn` table
-- to produces a reasonable distribution of
-- #txns as a function of round
-- you need to fill in the following "magic constants" as they appear
-- A) the CTID of step #2 in row 3
-- B) the row estimated_count of step #1 in row
WITH ctids AS (
SELECT
n::decimal / 100 AS portion, ('(' || round((n::decimal / 100) * 151214485)::bigint || ',1)')::tid AS ctid
FROM generate_series(0, 100) AS t(n)
),
txns AS (
SELECT t.round, t.intra, CEIL(c.portion * 1107932160) AS txns, c.ctid
FROM ctids AS c
INNER JOIN txn AS t ON t.ctid = c.ctid
)
SELECT * from txns;
-- est AS (
-- SELECT round, portion * 151214485 AS txns
-- FROM ctids
-- )
-- SELECT * FROM est;
-- SELECT * FROM ctids;
-- another useful query for getting the latest round
SELECT MAX(round) from "block_header";
-- 31,043,471
WITH trailing_times AS (
SELECT
round,
realtime,
LAG(realtime) OVER (ORDER BY round) as prev_realtime
FROM block_header
WHERE round >= (SELECT MAX(round) FROM block_header) - 1000000
),
durations AS (
SELECT round,
realtime as coarse_round_time,
COALESCE(EXTRACT(EPOCH FROM (realtime - prev_realtime)), 3) AS coarse_round_duration
FROM trailing_times
),
stats AS (
SELECT
AVG(coarse_round_duration) AS mean_duration,
STDDEV(coarse_round_duration) AS stdev_duration,
MIN(coarse_round_duration) AS min_duration,
MAX(coarse_round_duration) AS max_duration
FROM durations
),
histogram AS (
SELECT
CASE
WHEN coarse_round_duration >= 0 AND coarse_round_duration < 5 THEN '0-4'
WHEN coarse_round_duration >= 5 AND coarse_round_duration < 10 THEN '5-9'
WHEN coarse_round_duration >= 10 AND coarse_round_duration < 15 THEN '10-14'
WHEN coarse_round_duration >= 15 AND coarse_round_duration < 20 THEN '15-19'
WHEN coarse_round_duration >= 20 AND coarse_round_duration < 25 THEN '20-24'
ELSE 'Other'
END AS bucket,
COUNT(*) AS freq
FROM durations
GROUP BY 1
)
SELECT
mean_duration,
stdev_duration,
min_duration,
max_duration,
COALESCE(MAX(CASE WHEN bucket = '0-4' THEN freq END), NULL) AS "0-4",
COALESCE(MAX(CASE WHEN bucket = '5-9' THEN freq END), NULL) AS "5-9",
COALESCE(MAX(CASE WHEN bucket = '10-14' THEN freq END), NULL) AS "10-14",
COALESCE(MAX(CASE WHEN bucket = '15-19' THEN freq END), NULL) AS "15-19",
COALESCE(MAX(CASE WHEN bucket = '20-24' THEN freq END), NULL) AS "20-24"
FROM stats
CROSS JOIN histogram
GROUP BY mean_duration, stdev_duration, min_duration, max_duration;
/* RESULT SET on 09Aug2023
mean_duration stdev_duration min_duration max_duration 0-4 5-9 10-14 15-19 20-24
3.400925599 0.507927656 0 22 999803 148 0 0 50
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment