luhn/README.md

## README.md

      
    Raw
  

              README.md
            
          
    This was performed on Ubuntu 14.04 with a fresh install of PostgreSQL 9.3.14 directly from the official Postgres apt repo.  It only works if the query planner chooses the attached plan (with HashAggregate).
Create and populate a new database:
psql -U postgres -c "create database test;"
psql -U postgres test < populate.sql
Open a connection to the database and execute query.sql.  The Postgres worker memory usage will rise approximately 50MB and won't be released until the connection is closed.

  
## populate.sql
CREATE TABLE venue(
	id INTEGER PRIMARY KEY
);

CREATE TABLE guest(
	id SERIAL PRIMARY KEY,
	venue_id INTEGER NOT NULL REFERENCES venue(id)
);

CREATE TABLE reservation(
	id SERIAL PRIMARY KEY,
	guest_id INTEGER NOT NULL REFERENCES guest(id),
	venue_id INTEGER NOT NULL REFERENCES venue(id)
);

CREATE FUNCTION populate() RETURNS VOID AS $$
DECLARE
	i INTEGER;
	j INTEGER;
	k INTEGER;
	guest_id INTEGER;
BEGIN
	FOR i IN 1..10 LOOP
		INSERT INTO venue VALUES (i);

		FOR j IN 1..1000 LOOP
			INSERT INTO guest(venue_id) VALUES (i) RETURNING id INTO guest_id;

			FOR k IN 1..10 LOOP
				INSERT INTO reservation(guest_id, venue_id) VALUES (guest_id, i);
			END LOOP;
		END LOOP;
	END LOOP;
END;
$$ LANGUAGE plpgsql;
SELECT populate();

## query-plan.txt
                                                         QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------
 HashAggregate  (cost=2364.30..2490.68 rows=10110 width=8) (actual time=29.270..33.212 rows=10000 loops=1)
   ->  Hash Join  (cost=270.00..2313.75 rows=10110 width=8) (actual time=4.263..18.959 rows=10000 loops=1)
         Hash Cond: (reservation.guest_id = guest.id)
         ->  Seq Scan on reservation  (cost=0.00..1791.00 rows=10110 width=8) (actual time=0.007..10.111 rows=10000 loops=1)
               Filter: (venue_id = 1)
               Rows Removed by Filter: 90000
         ->  Hash  (cost=145.00..145.00 rows=10000 width=4) (actual time=4.250..4.250 rows=10000 loops=1)
               Buckets: 1024  Batches: 1  Memory Usage: 352kB
               ->  Seq Scan on guest  (cost=0.00..145.00 rows=10000 width=4) (actual time=0.004..2.063 rows=10000 loops=1)
 Total runtime: 35.944 ms
(10 rows)

## query.sql
SELECT reservation.id,
        array_agg(guest.id) AS guest_ids
FROM reservation

JOIN guest ON guest.id = reservation.guest_id

WHERE reservation.venue_id = 1
GROUP BY reservation.id;

## setup.sh
sudo apt-get update && sudo apt-get upgrade -y
sudo bash -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | \
  sudo apt-key add -
sudo apt-get update
sudo apt-get install -y postgresql-9.3 postgresql-9.3-dbg
sudo -u postgres psql -c "create database test;"
curl https://gist.githubusercontent.com/luhn/2b35a9b31255e3a6a2e6a06d1213dfc9/raw/d7a4fb433c3bb8f60f0e12e502adef39e57d4369/populate.sql | sudo -u postgres psql test
sudo vim /etc/postgresql/9.3/main/postgresql.conf  # Adjust work_mem to 128MB
	CREATE TABLE venue(
	id INTEGER PRIMARY KEY
	);

	CREATE TABLE guest(
	id SERIAL PRIMARY KEY,
	venue_id INTEGER NOT NULL REFERENCES venue(id)
	);

	CREATE TABLE reservation(
	id SERIAL PRIMARY KEY,
	guest_id INTEGER NOT NULL REFERENCES guest(id),
	venue_id INTEGER NOT NULL REFERENCES venue(id)
	);

	CREATE FUNCTION populate() RETURNS VOID AS $$
	DECLARE
	i INTEGER;
	j INTEGER;
	k INTEGER;
	guest_id INTEGER;
	BEGIN
	FOR i IN 1..10 LOOP
	INSERT INTO venue VALUES (i);

	FOR j IN 1..1000 LOOP
	INSERT INTO guest(venue_id) VALUES (i) RETURNING id INTO guest_id;

	FOR k IN 1..10 LOOP
	INSERT INTO reservation(guest_id, venue_id) VALUES (guest_id, i);
	END LOOP;
	END LOOP;
	END LOOP;
	END;
	$$ LANGUAGE plpgsql;
	SELECT populate();
	QUERY PLAN
	-----------------------------------------------------------------------------------------------------------------------------
	HashAggregate (cost=2364.30..2490.68 rows=10110 width=8) (actual time=29.270..33.212 rows=10000 loops=1)
	-> Hash Join (cost=270.00..2313.75 rows=10110 width=8) (actual time=4.263..18.959 rows=10000 loops=1)
	Hash Cond: (reservation.guest_id = guest.id)
	-> Seq Scan on reservation (cost=0.00..1791.00 rows=10110 width=8) (actual time=0.007..10.111 rows=10000 loops=1)
	Filter: (venue_id = 1)
	Rows Removed by Filter: 90000
	-> Hash (cost=145.00..145.00 rows=10000 width=4) (actual time=4.250..4.250 rows=10000 loops=1)
	Buckets: 1024 Batches: 1 Memory Usage: 352kB
	-> Seq Scan on guest (cost=0.00..145.00 rows=10000 width=4) (actual time=0.004..2.063 rows=10000 loops=1)
	Total runtime: 35.944 ms
	(10 rows)
	SELECT reservation.id,
	array_agg(guest.id) AS guest_ids
	FROM reservation

	JOIN guest ON guest.id = reservation.guest_id

	WHERE reservation.venue_id = 1
	GROUP BY reservation.id;
	sudo apt-get update && sudo apt-get upgrade -y
	sudo bash -c 'echo "deb http://apt.postgresql.org/pub/repos/apt/ trusty-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
	wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc \| \
	sudo apt-key add -
	sudo apt-get update
	sudo apt-get install -y postgresql-9.3 postgresql-9.3-dbg
	sudo -u postgres psql -c "create database test;"
	curl https://gist.githubusercontent.com/luhn/2b35a9b31255e3a6a2e6a06d1213dfc9/raw/d7a4fb433c3bb8f60f0e12e502adef39e57d4369/populate.sql \| sudo -u postgres psql test
	sudo vim /etc/postgresql/9.3/main/postgresql.conf # Adjust work_mem to 128MB