sovetnik/books.sql

## books.sql

-- Create authors
CREATE TABLE authors(id serial PRIMARY KEY, name char(255));

-- Single author of many books
INSERT INTO authors(name)
VALUES ('William Gibson');

-- Collective of authors of one book
INSERT INTO authors(name)
VALUES ('Erich Gamma'), ('Richard Helm'), ('Ralph Johnson'), ('John Vlissides ');

-- Main author and collaborator
INSERT INTO authors(name)
VALUES ('Daniel Kahneman'), ('Amos Twersky');


-- Create books
CREATE TABLE books(id serial PRIMARY KEY, title char(255));

-- William Gibson books
INSERT INTO books(title)
VALUES('Neuromancer'), ('Count Zero'), ('Mona Lisa Overdrive');

-- GoF book
INSERT INTO books(title)
VALUES('Design Patterns: Elements of Reusable Object-Oriented Software');

-- Only Kahneman
INSERT INTO books(title)
VALUES('Thinking, Fast and Slow');

-- Kahneman and Tversky as coauthor
INSERT INTO books(title)
VALUES('Choices, Values and Frames');


-- Add authors and books relations
CREATE TABLE book_authors(
  author_id int REFERENCES authors(id),
  book_id int REFERENCES books(id),
  main_author boolean
);

-- with constraints
-- book can be written only once
CREATE UNIQUE INDEX authorship_idx
ON book_authors(author_id, book_id);

-- book can have only one main author
CREATE UNIQUE INDEX main_author_idx
ON book_authors(book_id, main_author)
WHERE main_author = TRUE;

--and data
INSERT INTO book_authors(author_id, book_id, main_author)
VALUES (1,1, FALSE), (1,2, FALSE), (1,3, FALSE),
(2,4, FALSE), (3,4, FALSE), (4,4, FALSE), (5,4, FALSE),
(6,5, FALSE), (6,6, TRUE), (7,6, FALSE);


-- Query to get all authors not seen in collaboration
EXPLAIN WITH
books_aggregate(book_id, authors_count) as(
    SELECT book_id, count(author_id) as authors_count
    FROM book_authors
    GROUP BY book_id
    ORDER BY book_id
    ),
collective_books(id) as (
    SELECT book_id
    FROM books_aggregate
    WHERE authors_count > 1
    ),
collective_authors(id) as (
    select ba.author_id
    FROM book_authors ba
    JOIN collective_books cb
    ON ba.book_id = cb.id
    )
SELECT *
FROM authors a
WHERE  a.id
NOT IN (SELECT id FROM collective_authors);

/* +----------------------------------------------------------------------------------------+ */
/* | QUERY PLAN                                                                             | */
/* |----------------------------------------------------------------------------------------| */
/* | Seq Scan on authors a  (cost=126.40..137.27 rows=35 width=1028)                        | */
/* |   Filter: (NOT (hashed SubPlan 4))                                                     | */
/* |   CTE books_aggregate                                                                  | */
/* |     ->  Sort  (cost=53.54..54.04 rows=200 width=12)                                    | */
/* |           Sort Key: book_authors.book_id                                               | */
/* |           ->  HashAggregate  (cost=43.90..45.90 rows=200 width=12)                     | */
/* |                 Group Key: book_authors.book_id                                        | */
/* |                 ->  Seq Scan on book_authors  (cost=0.00..32.60 rows=2260 width=8)     | */
/* |   CTE collective_books                                                                 | */
/* |     ->  CTE Scan on books_aggregate  (cost=0.00..4.50 rows=67 width=4)                 | */
/* |           Filter: (authors_count > 1)                                                  | */
/* |   CTE collective_authors                                                               | */
/* |     ->  Hash Join  (cost=2.18..50.82 rows=757 width=4)                                 | */
/* |           Hash Cond: (ba.book_id = cb.id)                                              | */
/* |           ->  Seq Scan on book_authors ba  (cost=0.00..32.60 rows=2260 width=8)        | */
/* |           ->  Hash  (cost=1.34..1.34 rows=67 width=4)                                  | */
/* |                 ->  CTE Scan on collective_books cb  (cost=0.00..1.34 rows=67 width=4) | */
/* |   SubPlan 4                                                                            | */
/* |     ->  CTE Scan on collective_authors  (cost=0.00..15.14 rows=757 width=4)            | */
/* +----------------------------------------------------------------------------------------+ */

CREATE INDEX book_idx ON book_authors(book_id);

-- Same query after ad index
/* +---------------------------------------------------------------------------------------+ */
/* | QUERY PLAN                                                                            | */
/* |---------------------------------------------------------------------------------------| */
/* | Seq Scan on authors a  (cost=3.00..13.87 rows=35 width=1028)                          | */
/* |   Filter: (NOT (hashed SubPlan 4))                                                    | */
/* |   CTE books_aggregate                                                                 | */
/* |     ->  GroupAggregate  (cost=1.27..1.44 rows=10 width=12)                            | */
/* |           Group Key: book_authors.book_id                                             | */
/* |           ->  Sort  (cost=1.27..1.29 rows=10 width=8)                                 | */
/* |                 Sort Key: book_authors.book_id                                        | */
/* |                 ->  Seq Scan on book_authors  (cost=0.00..1.10 rows=10 width=8)       | */
/* |   CTE collective_books                                                                | */
/* |     ->  CTE Scan on books_aggregate  (cost=0.00..0.22 rows=3 width=4)                 | */
/* |           Filter: (authors_count > 1)                                                 | */
/* |   CTE collective_authors                                                              | */
/* |     ->  Hash Join  (cost=0.10..1.26 rows=3 width=4)                                   | */
/* |           Hash Cond: (ba.book_id = cb.id)                                             | */
/* |           ->  Seq Scan on book_authors ba  (cost=0.00..1.10 rows=10 width=8)          | */
/* |           ->  Hash  (cost=0.06..0.06 rows=3 width=4)                                  | */
/* |                 ->  CTE Scan on collective_books cb  (cost=0.00..0.06 rows=3 width=4) | */
/* |   SubPlan 4                                                                           | */
/* |     ->  CTE Scan on collective_authors  (cost=0.00..0.06 rows=3 width=4)              | */
/* +---------------------------------------------------------------------------------------+ */

/* -- Export to csv */
/* COPY( */
/*   WITH */
/*   books_aggregate(book_id, authors_count) as( */
/*     SELECT book_id, count(author_id) as authors_count */
/*     FROM book_authors */
/*     GROUP BY book_id */
/*     ORDER BY book_id */
/*   ), */
/*   collective_books(id) as ( */
/*     SELECT book_id */
/*     FROM books_aggregate */
/*     WHERE authors_count > 1 */
/*   ), */
/*   collective_authors(id) as ( */
/*     select ba.author_id */
/*     FROM book_authors ba */
/*     JOIN collective_books cb */
/*     ON ba.book_id = cb.id */
/*   ) */
/*   SELECT * */
/*   FROM authors a */
/*   WHERE  a.id */
/*   NOT IN (SELECT id FROM collective_authors) */
/* ) TO '/tmp/mustread.csv' WITH CSV HEADER; */

/* --Cleanup */
/* DROP TABLE book_authors; */
/* DROP TABLE authors; */
/* DROP TABLE books; */
/* DROP INDEX main_author_idx; */
/* DROP INDEX authorship_idx; */
/* DROP INDEX book_idx; */

	-- Create authors
	CREATE TABLE authors(id serial PRIMARY KEY, name char(255));

	-- Single author of many books
	INSERT INTO authors(name)
	VALUES ('William Gibson');

	-- Collective of authors of one book
	INSERT INTO authors(name)
	VALUES ('Erich Gamma'), ('Richard Helm'), ('Ralph Johnson'), ('John Vlissides ');

	-- Main author and collaborator
	INSERT INTO authors(name)
	VALUES ('Daniel Kahneman'), ('Amos Twersky');


	-- Create books
	CREATE TABLE books(id serial PRIMARY KEY, title char(255));

	-- William Gibson books
	INSERT INTO books(title)
	VALUES('Neuromancer'), ('Count Zero'), ('Mona Lisa Overdrive');

	-- GoF book
	INSERT INTO books(title)
	VALUES('Design Patterns: Elements of Reusable Object-Oriented Software');

	-- Only Kahneman
	INSERT INTO books(title)
	VALUES('Thinking, Fast and Slow');

	-- Kahneman and Tversky as coauthor
	INSERT INTO books(title)
	VALUES('Choices, Values and Frames');


	-- Add authors and books relations
	CREATE TABLE book_authors(
	author_id int REFERENCES authors(id),
	book_id int REFERENCES books(id),
	main_author boolean
	);

	-- with constraints
	-- book can be written only once
	CREATE UNIQUE INDEX authorship_idx
	ON book_authors(author_id, book_id);

	-- book can have only one main author
	CREATE UNIQUE INDEX main_author_idx
	ON book_authors(book_id, main_author)
	WHERE main_author = TRUE;

	--and data
	INSERT INTO book_authors(author_id, book_id, main_author)
	VALUES (1,1, FALSE), (1,2, FALSE), (1,3, FALSE),
	(2,4, FALSE), (3,4, FALSE), (4,4, FALSE), (5,4, FALSE),
	(6,5, FALSE), (6,6, TRUE), (7,6, FALSE);


	-- Query to get all authors not seen in collaboration
	EXPLAIN WITH
	books_aggregate(book_id, authors_count) as(
	SELECT book_id, count(author_id) as authors_count
	FROM book_authors
	GROUP BY book_id
	ORDER BY book_id
	),
	collective_books(id) as (
	SELECT book_id
	FROM books_aggregate
	WHERE authors_count > 1
	),
	collective_authors(id) as (
	select ba.author_id
	FROM book_authors ba
	JOIN collective_books cb
	ON ba.book_id = cb.id
	)
	SELECT *
	FROM authors a
	WHERE a.id
	NOT IN (SELECT id FROM collective_authors);

	/* +----------------------------------------------------------------------------------------+ */
	/* \| QUERY PLAN \| */
	/* \|----------------------------------------------------------------------------------------\| */
	/* \| Seq Scan on authors a (cost=126.40..137.27 rows=35 width=1028) \| */
	/* \| Filter: (NOT (hashed SubPlan 4)) \| */
	/* \| CTE books_aggregate \| */
	/* \| -> Sort (cost=53.54..54.04 rows=200 width=12) \| */
	/* \| Sort Key: book_authors.book_id \| */
	/* \| -> HashAggregate (cost=43.90..45.90 rows=200 width=12) \| */
	/* \| Group Key: book_authors.book_id \| */
	/* \| -> Seq Scan on book_authors (cost=0.00..32.60 rows=2260 width=8) \| */
	/* \| CTE collective_books \| */
	/* \| -> CTE Scan on books_aggregate (cost=0.00..4.50 rows=67 width=4) \| */
	/* \| Filter: (authors_count > 1) \| */
	/* \| CTE collective_authors \| */
	/* \| -> Hash Join (cost=2.18..50.82 rows=757 width=4) \| */
	/* \| Hash Cond: (ba.book_id = cb.id) \| */
	/* \| -> Seq Scan on book_authors ba (cost=0.00..32.60 rows=2260 width=8) \| */
	/* \| -> Hash (cost=1.34..1.34 rows=67 width=4) \| */
	/* \| -> CTE Scan on collective_books cb (cost=0.00..1.34 rows=67 width=4) \| */
	/* \| SubPlan 4 \| */
	/* \| -> CTE Scan on collective_authors (cost=0.00..15.14 rows=757 width=4) \| */
	/* +----------------------------------------------------------------------------------------+ */

	CREATE INDEX book_idx ON book_authors(book_id);

	-- Same query after ad index
	/* +---------------------------------------------------------------------------------------+ */
	/* \| QUERY PLAN \| */
	/* \|---------------------------------------------------------------------------------------\| */
	/* \| Seq Scan on authors a (cost=3.00..13.87 rows=35 width=1028) \| */
	/* \| Filter: (NOT (hashed SubPlan 4)) \| */
	/* \| CTE books_aggregate \| */
	/* \| -> GroupAggregate (cost=1.27..1.44 rows=10 width=12) \| */
	/* \| Group Key: book_authors.book_id \| */
	/* \| -> Sort (cost=1.27..1.29 rows=10 width=8) \| */
	/* \| Sort Key: book_authors.book_id \| */
	/* \| -> Seq Scan on book_authors (cost=0.00..1.10 rows=10 width=8) \| */
	/* \| CTE collective_books \| */
	/* \| -> CTE Scan on books_aggregate (cost=0.00..0.22 rows=3 width=4) \| */
	/* \| Filter: (authors_count > 1) \| */
	/* \| CTE collective_authors \| */
	/* \| -> Hash Join (cost=0.10..1.26 rows=3 width=4) \| */
	/* \| Hash Cond: (ba.book_id = cb.id) \| */
	/* \| -> Seq Scan on book_authors ba (cost=0.00..1.10 rows=10 width=8) \| */
	/* \| -> Hash (cost=0.06..0.06 rows=3 width=4) \| */
	/* \| -> CTE Scan on collective_books cb (cost=0.00..0.06 rows=3 width=4) \| */
	/* \| SubPlan 4 \| */
	/* \| -> CTE Scan on collective_authors (cost=0.00..0.06 rows=3 width=4) \| */
	/* +---------------------------------------------------------------------------------------+ */

	/* -- Export to csv */
	/* COPY( */
	/* WITH */
	/* books_aggregate(book_id, authors_count) as( */
	/* SELECT book_id, count(author_id) as authors_count */
	/* FROM book_authors */
	/* GROUP BY book_id */
	/* ORDER BY book_id */
	/* ), */
	/* collective_books(id) as ( */
	/* SELECT book_id */
	/* FROM books_aggregate */
	/* WHERE authors_count > 1 */
	/* ), */
	/* collective_authors(id) as ( */
	/* select ba.author_id */
	/* FROM book_authors ba */
	/* JOIN collective_books cb */
	/* ON ba.book_id = cb.id */
	/* ) */
	/* SELECT * */
	/* FROM authors a */
	/* WHERE a.id */
	/* NOT IN (SELECT id FROM collective_authors) */
	/* ) TO '/tmp/mustread.csv' WITH CSV HEADER; */

	/* --Cleanup */
	/* DROP TABLE book_authors; */
	/* DROP TABLE authors; */
	/* DROP TABLE books; */
	/* DROP INDEX main_author_idx; */
	/* DROP INDEX authorship_idx; */
	/* DROP INDEX book_idx; */