Josh Berkus jberkus

## gist:3296689
breakpad=# select date_trunc('day', date_processed), count(*) from reports where date_processed between '2012-08-01' and '2012-08-08' group by 1 order by 1;
       date_trunc       | count
------------------------+--------
 2012-08-01 00:00:00+00 | 481349
 2012-08-02 00:00:00+00 | 483259
 2012-08-03 00:00:00+00 | 480551
 2012-08-04 00:00:00+00 | 423422
 2012-08-05 00:00:00+00 | 429353
 2012-08-06 00:00:00+00 | 508709
 2012-08-07 00:00:00+00 | 909945

## xlog_numeric_location.sql
-- For documentation on these functions, please see blog post at:
-- http://www.databasesoup.com/2012/10/determining-furthest-ahead-replica.html

-- determines current xlog location as a monotonically increasing numeric.
CREATE OR REPLACE FUNCTION xlog_location_numeric(vcloc text default NULL)
RETURNS numeric
LANGUAGE plpgsql
as $f$
DECLARE cloc text;
        floc text[];

## gist:9815260
Specification:

BASIC VERSION

Config File:
   Series of time windows for "low traffic" points.
      e.g. "Sunday 1am-4am"
   Postgres connection info for 1 database
   vacuum settings
   min_freeze threshold (default 1,000,000)

## New New Index Bloat Query
WITH btree_index_atts AS (
    SELECT nspname, relname, reltuples, relpages, indrelid, relam,
        regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
        indexrelid as index_oid
    FROM pg_index
    JOIN pg_class ON pg_class.oid=pg_index.indexrelid
    JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
    JOIN pg_am ON pg_class.relam = pg_am.oid
    WHERE pg_am.amname = 'btree'
    ),

## gist:5b16ddef1f6f64bcf8ac
1) alter postgresql.conf
    checkpoint_segments = 100
    checkpoint_completion_target = 0.9
    appropriate shared_buffers

2) initialize pgbench with pgbench -s 1000 -i bench

   do one run on the SSD, and if that looks good, one on the HDD

3) run pgbench test with:

## gist:6b1bcaf7724dfc2a54f3
WITH table_scans as (
    SELECT relid,
        tables.idx_scan + tables.seq_scan as all_scans,
        ( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes,
                pg_relation_size(relid) as table_size
        FROM pg_stat_user_tables as tables
),
all_writes as (
    SELECT sum(writes) as total_writes
    FROM table_scans

## flexible_freeze.py
'''THIS SCRIPT HAS BEEN REPLACED WITH THE PROJECT AT https://github.com/jberkus/flexible-freeze'''

'''Flexible Freeze script for PostgreSQL databases
Version 0.3
(c) 2014 PostgreSQL Experts Inc.
Licensed under The PostgreSQL License

This script is designed for doing VACUUM FREEZE or VACUUM ANALYZE runs
on your database during known slow traffic periods.  If doing both
vacuum freezes and vacuum analyzes, do the freezes first.

## gist:e4cadd6b8877c3bc59c8
-- check for exact matches
WITH index_cols_ord as (
    SELECT attrelid, attnum, attname
    FROM pg_attribute
        JOIN pg_index ON indexrelid = attrelid
    WHERE indkey[0] > 0
    ORDER BY attrelid, attnum
),
index_col_list AS (
    SELECT attrelid,

## gist:6bbffae5ce10fb399d29
-- check for containment
-- i.e. index A contains index B
-- and both share the same first column
-- but they are NOT identical

WITH index_cols_ord as (
    SELECT attrelid, attnum, attname
    FROM pg_attribute
        JOIN pg_index ON indexrelid = attrelid
    WHERE indkey[0] > 0

## gist:6c1d8e3991d6de4e869a
SELECT * FROM (
SELECT
  schemaname, tablename,
  ROUND(CASE WHEN otta=0 THEN 0.0 ELSE ((sml.relpages/otta::numeric) * 100 - 100) END) AS tbloat,
  CASE WHEN relpages < otta THEN 0 ELSE round(bs*(sml.relpages-otta)::numeric / (1024^2)::numeric , 2 ) END AS wastedmb,
  ROUND(AVG(CASE WHEN iotta >= ipages THEN 0
  		 WHEN iotta = 0 THEN 0
		 ELSE (ipages/iotta::numeric) * 100 - 100 END), -1) AS idxbloat,
  SUM(CASE WHEN ipages < iotta THEN 0 ELSE round( bs*(ipages-iotta)::numeric / (1024^2)::numeric, 2 ) END) AS wastedidxmb
FROM (
	breakpad=# select date_trunc('day', date_processed), count(*) from reports where date_processed between '2012-08-01' and '2012-08-08' group by 1 order by 1;
	date_trunc \| count
	------------------------+--------
	2012-08-01 00:00:00+00 \| 481349
	2012-08-02 00:00:00+00 \| 483259
	2012-08-03 00:00:00+00 \| 480551
	2012-08-04 00:00:00+00 \| 423422
	2012-08-05 00:00:00+00 \| 429353
	2012-08-06 00:00:00+00 \| 508709
	2012-08-07 00:00:00+00 \| 909945
	-- For documentation on these functions, please see blog post at:
	-- http://www.databasesoup.com/2012/10/determining-furthest-ahead-replica.html

	-- determines current xlog location as a monotonically increasing numeric.
	CREATE OR REPLACE FUNCTION xlog_location_numeric(vcloc text default NULL)
	RETURNS numeric
	LANGUAGE plpgsql
	as $f$
	DECLARE cloc text;
	floc text[];
	Specification:

	BASIC VERSION

	Config File:
	Series of time windows for "low traffic" points.
	e.g. "Sunday 1am-4am"
	Postgres connection info for 1 database
	vacuum settings
	min_freeze threshold (default 1,000,000)
	WITH btree_index_atts AS (
	SELECT nspname, relname, reltuples, relpages, indrelid, relam,
	regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,
	indexrelid as index_oid
	FROM pg_index
	JOIN pg_class ON pg_class.oid=pg_index.indexrelid
	JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
	JOIN pg_am ON pg_class.relam = pg_am.oid
	WHERE pg_am.amname = 'btree'
	),
	1) alter postgresql.conf
	checkpoint_segments = 100
	checkpoint_completion_target = 0.9
	appropriate shared_buffers

	2) initialize pgbench with pgbench -s 1000 -i bench

	do one run on the SSD, and if that looks good, one on the HDD

	3) run pgbench test with:
	WITH table_scans as (
	SELECT relid,
	tables.idx_scan + tables.seq_scan as all_scans,
	( tables.n_tup_ins + tables.n_tup_upd + tables.n_tup_del ) as writes,
	pg_relation_size(relid) as table_size
	FROM pg_stat_user_tables as tables
	),
	all_writes as (
	SELECT sum(writes) as total_writes
	FROM table_scans
	'''THIS SCRIPT HAS BEEN REPLACED WITH THE PROJECT AT https://github.com/jberkus/flexible-freeze'''

	'''Flexible Freeze script for PostgreSQL databases
	Version 0.3
	(c) 2014 PostgreSQL Experts Inc.
	Licensed under The PostgreSQL License

	This script is designed for doing VACUUM FREEZE or VACUUM ANALYZE runs
	on your database during known slow traffic periods. If doing both
	vacuum freezes and vacuum analyzes, do the freezes first.
	-- check for exact matches
	WITH index_cols_ord as (
	SELECT attrelid, attnum, attname
	FROM pg_attribute
	JOIN pg_index ON indexrelid = attrelid
	WHERE indkey[0] > 0
	ORDER BY attrelid, attnum
	),
	index_col_list AS (
	SELECT attrelid,
	-- check for containment
	-- i.e. index A contains index B
	-- and both share the same first column
	-- but they are NOT identical

	WITH index_cols_ord as (
	SELECT attrelid, attnum, attname
	FROM pg_attribute
	JOIN pg_index ON indexrelid = attrelid
	WHERE indkey[0] > 0
	SELECT * FROM (
	SELECT
	schemaname, tablename,
	ROUND(CASE WHEN otta=0 THEN 0.0 ELSE ((sml.relpages/otta::numeric) * 100 - 100) END) AS tbloat,
	CASE WHEN relpages < otta THEN 0 ELSE round(bs*(sml.relpages-otta)::numeric / (1024^2)::numeric , 2 ) END AS wastedmb,
	ROUND(AVG(CASE WHEN iotta >= ipages THEN 0
	WHEN iotta = 0 THEN 0
	ELSE (ipages/iotta::numeric) * 100 - 100 END), -1) AS idxbloat,
	SUM(CASE WHEN ipages < iotta THEN 0 ELSE round( bs*(ipages-iotta)::numeric / (1024^2)::numeric, 2 ) END) AS wastedidxmb
	FROM (