kevlarr/README.md

## README.md

      
    Raw
  

              README.md
            
          
    asdf

  
## having.sql
/* ***************************************************

Using HAVING to filter by aggregate function results
means not having to select and filter from a subquery.

I wasn't sure if repeating the agg function would
re-run it, so I built a large temporary table and
timed queries with and without the HAVING.

TL;DR:

  `SELECT agg(..) FROM .. GROUP BY .. HAVING agg(..)`
  ~~ does not~~ repeat the aggregate function call,
  so this is a pretty awesome trick!

*************************************************** */


/*
Create a temporary table and join several times
to create a large, uneven data set:

  i1 | i2 | i3
  ----+----+----
  1 |  1 |  1
  2 |  1 |  1
  2 |  2 |  1
  2 |  2 |  2
  3 |  1 |  1
  3 |  2 |  1
  3 |  2 |  2
  3 |  3 |  1
  3 |  3 |  2
  3 |  3 |  3
  4 |  1 |  1
  ...
 10 | 10 |  8
 10 | 10 |  9
 10 | 10 | 10
(220 rows)
*/
with t as (select generate_series(1, 10) as i)
select t1.i as i1, t2.i as i2, t3.i as i3
from t t1
join t as t2 on t2.i <= t1.i
join t as t3 on t3.i <= t2.i
order by t1.i, t2.i, t3.i
;


-- Up the size and do a GROUP BY w/o aggregate functions
-- to use as the "base" measurement
explain analyze
with t as (select generate_series(1, 500) as i)
select t1.i as i1
from t t1
join t as t2 on t2.i <= t1.i
join t as t3 on t3.i <= t2.i
group by t1.i
order by t1.i
;

/*
                                                             QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=6604170.49..6604170.99 rows=200 width=4) (actual time=30567.533..30567.574 rows=500 loops=1)
   Sort Key: t1.i
   Sort Method: quicksort  Memory: 48kB
   CTE t
     ->  ProjectSet  (cost=0.00..5.02 rows=1000 width=4) (actual time=0.015..0.348 rows=500 loops=1)
           ->  Result  (cost=0.00..0.01 rows=1 width=0) (actual time=0.007..0.007 rows=1 loops=1)
   ->  HashAggregate  (cost=6604155.83..6604157.83 rows=200 width=4) (actual time=30567.197..30567.320 rows=500 loops=1)
         Group Key: t1.i
         ->  Nested Loop  (cost=0.00..6326378.33 rows=111111000 width=4) (actual time=0.065..23867.591 rows=20958500 loops=1)
               Join Filter: (t3.i <= t2.i)
               Rows Removed by Join Filter: 41666500
               ->  CTE Scan on t t3  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.029..0.567 rows=500 loops=1)
               ->  Materialize  (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..23.979 rows=125250 loops=500)
                     ->  Nested Loop  (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.022..110.981 rows=125250 loops=1)
                           Join Filter: (t2.i <= t1.i)
                           Rows Removed by Join Filter: 124750
                           ->  CTE Scan on t t1  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.007..0.157 rows=500 loops=1)
                           ->  CTE Scan on t t2  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.100 rows=500 loops=500)
 Planning Time: 0.285 ms
 Execution Time: 30568.055 ms
*/


-- Add `sum` aggregations
explain analyze
with t as (select generate_series(1, 500) as i)
select t1.i as i1, sum(t2.i) as i2_sum, sum(t3.i) as i3_sum
from t t1
join t as t2 on t2.i <= t1.i
join t as t3 on t3.i <= t2.i
group by t1.i
order by t1.i
;

/*
                                                             QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=7159725.49..7159725.99 rows=200 width=20) (actual time=31786.370..31786.411 rows=500 loops=1)
   Sort Key: t1.i
   Sort Method: quicksort  Memory: 64kB
   CTE t
     ->  ProjectSet  (cost=0.00..5.02 rows=1000 width=4) (actual time=0.004..0.209 rows=500 loops=1)
           ->  Result  (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.002 rows=1 loops=1)
   ->  HashAggregate  (cost=7159710.83..7159712.83 rows=200 width=20) (actual time=31785.967..31786.132 rows=500 loops=1)
         Group Key: t1.i
         ->  Nested Loop  (cost=0.00..6326378.33 rows=111111000 width=12) (actual time=0.019..23993.921 rows=20958500 loops=1)
               Join Filter: (t3.i <= t2.i)
               Rows Removed by Join Filter: 41666500
               ->  CTE Scan on t t3  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.008..0.526 rows=500 loops=1)
               ->  Materialize  (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..23.715 rows=125250 loops=500)
                     ->  Nested Loop  (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.006..100.399 rows=125250 loops=1)
                           Join Filter: (t2.i <= t1.i)
                           Rows Removed by Join Filter: 124750
                           ->  CTE Scan on t t1  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.001..0.115 rows=500 loops=1)
                           ->  CTE Scan on t t2  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.090 rows=500 loops=500)
 Planning Time: 0.169 ms
 Execution Time: 31786.840 ms
 */


-- Add `HAVING` clauses for sums
explain analyze
with t as (select generate_series(1, 500) as i)
select t1.i as i1, sum(t2.i) as i2_sum, sum(t3.i) as i3_sum
from t t1
join t as t2 on t2.i <= t1.i
join t as t3 on t3.i <= t2.i
group by t1.i
having  sum(t2.i) > 40000 and sum(t3.i) > 30000
order by t1.i
;

/*
                                                             QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------
 Sort  (cost=7715274.34..7715274.40 rows=22 width=20) (actual time=32677.428..32677.465 rows=445 loops=1)
   Sort Key: t1.i
   Sort Method: quicksort  Memory: 59kB
   CTE t
     ->  ProjectSet  (cost=0.00..5.02 rows=1000 width=4) (actual time=0.004..0.174 rows=500 loops=1)
           ->  Result  (cost=0.00..0.01 rows=1 width=0) (actual time=0.002..0.002 rows=1 loops=1)
   ->  HashAggregate  (cost=7715265.83..7715268.83 rows=22 width=20) (actual time=32677.017..32677.203 rows=445 loops=1)
         Group Key: t1.i
         Filter: ((sum(t2.i) > 40000) AND (sum(t3.i) > 30000))
         Rows Removed by Filter: 55
         ->  Nested Loop  (cost=0.00..6326378.33 rows=111111000 width=12) (actual time=0.018..24711.548 rows=20958500 loops=1)
               Join Filter: (t3.i <= t2.i)
               Rows Removed by Join Filter: 41666500
               ->  CTE Scan on t t3  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.008..0.592 rows=500 loops=1)
               ->  Materialize  (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..24.599 rows=125250 loops=500)
                     ->  Nested Loop  (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.005..98.609 rows=125250 loops=1)
                           Join Filter: (t2.i <= t1.i)
                           Rows Removed by Join Filter: 124750
                           ->  CTE Scan on t t1  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.001..0.106 rows=500 loops=1)
                           ->  CTE Scan on t t2  (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.087 rows=500 loops=500)
 Planning Time: 0.220 ms
 Execution Time: 32679.939 ms
 */


 /*
 CONCLUSION:
   Including a `HAVING` clause did not appear to re-run any aggregations
   (as one would hope), as the timing differences appeared during the
   `MATERIALIZE` phase prior to any filters.
 */
	/* ***************************************************

	Using HAVING to filter by aggregate function results
	means not having to select and filter from a subquery.

	I wasn't sure if repeating the agg function would
	re-run it, so I built a large temporary table and
	timed queries with and without the HAVING.

	TL;DR:

	`SELECT agg(..) FROM .. GROUP BY .. HAVING agg(..)`
	~~ does not~~ repeat the aggregate function call,
	so this is a pretty awesome trick!

	*************************************************** */


	/*
	Create a temporary table and join several times
	to create a large, uneven data set:

	i1 \| i2 \| i3
	----+----+----
	1 \| 1 \| 1
	2 \| 1 \| 1
	2 \| 2 \| 1
	2 \| 2 \| 2
	3 \| 1 \| 1
	3 \| 2 \| 1
	3 \| 2 \| 2
	3 \| 3 \| 1
	3 \| 3 \| 2
	3 \| 3 \| 3
	4 \| 1 \| 1
	...
	10 \| 10 \| 8
	10 \| 10 \| 9
	10 \| 10 \| 10
	(220 rows)
	*/
	with t as (select generate_series(1, 10) as i)
	select t1.i as i1, t2.i as i2, t3.i as i3
	from t t1
	join t as t2 on t2.i <= t1.i
	join t as t3 on t3.i <= t2.i
	order by t1.i, t2.i, t3.i
	;


	-- Up the size and do a GROUP BY w/o aggregate functions
	-- to use as the "base" measurement
	explain analyze
	with t as (select generate_series(1, 500) as i)
	select t1.i as i1
	from t t1
	join t as t2 on t2.i <= t1.i
	join t as t3 on t3.i <= t2.i
	group by t1.i
	order by t1.i
	;

	/*
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------
	Sort (cost=6604170.49..6604170.99 rows=200 width=4) (actual time=30567.533..30567.574 rows=500 loops=1)
	Sort Key: t1.i
	Sort Method: quicksort Memory: 48kB
	CTE t
	-> ProjectSet (cost=0.00..5.02 rows=1000 width=4) (actual time=0.015..0.348 rows=500 loops=1)
	-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.007..0.007 rows=1 loops=1)
	-> HashAggregate (cost=6604155.83..6604157.83 rows=200 width=4) (actual time=30567.197..30567.320 rows=500 loops=1)
	Group Key: t1.i
	-> Nested Loop (cost=0.00..6326378.33 rows=111111000 width=4) (actual time=0.065..23867.591 rows=20958500 loops=1)
	Join Filter: (t3.i <= t2.i)
	Rows Removed by Join Filter: 41666500
	-> CTE Scan on t t3 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.029..0.567 rows=500 loops=1)
	-> Materialize (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..23.979 rows=125250 loops=500)
	-> Nested Loop (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.022..110.981 rows=125250 loops=1)
	Join Filter: (t2.i <= t1.i)
	Rows Removed by Join Filter: 124750
	-> CTE Scan on t t1 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.007..0.157 rows=500 loops=1)
	-> CTE Scan on t t2 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.100 rows=500 loops=500)
	Planning Time: 0.285 ms
	Execution Time: 30568.055 ms
	*/


	-- Add `sum` aggregations
	explain analyze
	with t as (select generate_series(1, 500) as i)
	select t1.i as i1, sum(t2.i) as i2_sum, sum(t3.i) as i3_sum
	from t t1
	join t as t2 on t2.i <= t1.i
	join t as t3 on t3.i <= t2.i
	group by t1.i
	order by t1.i
	;

	/*
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------
	Sort (cost=7159725.49..7159725.99 rows=200 width=20) (actual time=31786.370..31786.411 rows=500 loops=1)
	Sort Key: t1.i
	Sort Method: quicksort Memory: 64kB
	CTE t
	-> ProjectSet (cost=0.00..5.02 rows=1000 width=4) (actual time=0.004..0.209 rows=500 loops=1)
	-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.001..0.002 rows=1 loops=1)
	-> HashAggregate (cost=7159710.83..7159712.83 rows=200 width=20) (actual time=31785.967..31786.132 rows=500 loops=1)
	Group Key: t1.i
	-> Nested Loop (cost=0.00..6326378.33 rows=111111000 width=12) (actual time=0.019..23993.921 rows=20958500 loops=1)
	Join Filter: (t3.i <= t2.i)
	Rows Removed by Join Filter: 41666500
	-> CTE Scan on t t3 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.008..0.526 rows=500 loops=1)
	-> Materialize (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..23.715 rows=125250 loops=500)
	-> Nested Loop (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.006..100.399 rows=125250 loops=1)
	Join Filter: (t2.i <= t1.i)
	Rows Removed by Join Filter: 124750
	-> CTE Scan on t t1 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.001..0.115 rows=500 loops=1)
	-> CTE Scan on t t2 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.090 rows=500 loops=500)
	Planning Time: 0.169 ms
	Execution Time: 31786.840 ms
	*/


	-- Add `HAVING` clauses for sums
	explain analyze
	with t as (select generate_series(1, 500) as i)
	select t1.i as i1, sum(t2.i) as i2_sum, sum(t3.i) as i3_sum
	from t t1
	join t as t2 on t2.i <= t1.i
	join t as t3 on t3.i <= t2.i
	group by t1.i
	having sum(t2.i) > 40000 and sum(t3.i) > 30000
	order by t1.i
	;

	/*
	QUERY PLAN
	------------------------------------------------------------------------------------------------------------------------------------
	Sort (cost=7715274.34..7715274.40 rows=22 width=20) (actual time=32677.428..32677.465 rows=445 loops=1)
	Sort Key: t1.i
	Sort Method: quicksort Memory: 59kB
	CTE t
	-> ProjectSet (cost=0.00..5.02 rows=1000 width=4) (actual time=0.004..0.174 rows=500 loops=1)
	-> Result (cost=0.00..0.01 rows=1 width=0) (actual time=0.002..0.002 rows=1 loops=1)
	-> HashAggregate (cost=7715265.83..7715268.83 rows=22 width=20) (actual time=32677.017..32677.203 rows=445 loops=1)
	Group Key: t1.i
	Filter: ((sum(t2.i) > 40000) AND (sum(t3.i) > 30000))
	Rows Removed by Filter: 55
	-> Nested Loop (cost=0.00..6326378.33 rows=111111000 width=12) (actual time=0.018..24711.548 rows=20958500 loops=1)
	Join Filter: (t3.i <= t2.i)
	Rows Removed by Join Filter: 41666500
	-> CTE Scan on t t3 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.008..0.592 rows=500 loops=1)
	-> Materialize (cost=0.00..25499.67 rows=333333 width=8) (actual time=0.006..24.599 rows=125250 loops=500)
	-> Nested Loop (cost=0.00..22530.00 rows=333333 width=8) (actual time=0.005..98.609 rows=125250 loops=1)
	Join Filter: (t2.i <= t1.i)
	Rows Removed by Join Filter: 124750
	-> CTE Scan on t t1 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.001..0.106 rows=500 loops=1)
	-> CTE Scan on t t2 (cost=0.00..20.00 rows=1000 width=4) (actual time=0.000..0.087 rows=500 loops=500)
	Planning Time: 0.220 ms
	Execution Time: 32679.939 ms
	*/


	/*
	CONCLUSION:
	Including a `HAVING` clause did not appear to re-run any aggregations
	(as one would hope), as the timing differences appeared during the
	`MATERIALIZE` phase prior to any filters.
	*/