brunocalza/create_index_include.sql

## create_index_include.sql
-- Only available on PostgreSQL 11

/* We'll create a table and load a million data and compare a two indexes, onde with included data and the other without it */

/* The table will for demonstration */
CREATE TABLE "user" (
    id BIGSERIAL NOT NULL,
    name VARCHAR(32) NOT NULL,
    email VARCHAR(32) NOT NULL,
    aboutme VARCHAR(256),
    created_at TIMESTAMP NOT NULL default NOW(),
    updated_at TIMESTAMP
);

/*
 * Helper function that generates a random string
 * Source: https://stackoverflow.com/questions/3970795/how-do-you-create-a-random-string-thats-suitable-for-a-session-id-in-postgresql
 */

CREATE or REPLACE FUNCTION random_string(length integer) returns text as
$$
declare
  letters text[] := '{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}';
  result text := '';
  i integer := 0;
begin
  for i in 1..length loop
    result := result || letters[1+random()*(array_length(letters, 1)-1)];
  end loop;
  return result;
end;
$$ language plpgsql;

/* Loading a 1000000 tuples using the function random_string */
DO
$do$
BEGIN
    FOR i IN 1..1000000 LOOP
      INSERT INTO "user" (name, email, aboutme) values (random_string(floor(random()*32)::int), random_string(floor(random()*20)::int) || '@email.com', random_string(floor(random()*256)::int));
    END LOOP;
END
$do$;

-- Grabing a random email for test
WITH random_id AS (
    SELECT floor(random()*1000000)::int AS id
), random_email AS (
     SELECT email FROM "user" WHERE id in (SELECT id FROM random_id)
) SELECT * FROM random_email;

EXPLAIN ANALYSE SELECT name, aboutme, updated_at FROM "user" WHERE email = 'chqtyerthmtidyxu@email.com';

--  Parallel Seq Scan on "user"  (cost=0.00..31758.33 rows=2 width=153) (actual time=42.829..54.338 rows=0 loops=3)

CREATE INDEX email_index ON "user" (email);
EXPLAIN ANALYSE SELECT name, aboutme, updated_at FROM "user" WHERE email = 'chqtyerthmtidyxu@email.com';

-- Index Scan using email_index on "user"  (cost=0.42..28.53 rows=6 width=153) (actual time=0.064..0.065 rows=1 loops=1)

DROP INDEX email_index;
CREATE INDEX email_index_with_data ON "user" (email) INCLUDE (name, aboutme, updated_at);
EXPLAIN ANALYSE SELECT name, aboutme, updated_at FROM "user" WHERE email = 'chqtyerthmtidyxu@email.com';

--> Index Only Scan using email_index_with_data on "user"  (cost=0.55..28.65 rows=6 width=153) (actual time=0.053..0.055 rows=1 loops=1)


-- https://wiki.postgresql.org/wiki/Index_Maintenance
 schemaname | tablename |       indexname       | num_rows | table_size | index_size | unique | number_of_scans | tuples_read | tuples_fetched
------------+-----------+-----------------------+----------+------------+------------+--------+-----------------+-------------+----------------
 public     | user      | email_index_with_data |    1e+06 | 207 MB     | 209 MB     | N      |               0 |           0 |              0
 public     | user      | email_index           |    1e+06 | 207 MB     | 39 MB      | N      |               1 |        1995 |           1995
	-- Only available on PostgreSQL 11

	/* We'll create a table and load a million data and compare a two indexes, onde with included data and the other without it */

	/* The table will for demonstration */
	CREATE TABLE "user" (
	id BIGSERIAL NOT NULL,
	name VARCHAR(32) NOT NULL,
	email VARCHAR(32) NOT NULL,
	aboutme VARCHAR(256),
	created_at TIMESTAMP NOT NULL default NOW(),
	updated_at TIMESTAMP
	);

	/*
	* Helper function that generates a random string
	* Source: https://stackoverflow.com/questions/3970795/how-do-you-create-a-random-string-thats-suitable-for-a-session-id-in-postgresql
	*/

	CREATE or REPLACE FUNCTION random_string(length integer) returns text as
	$$
	declare
	letters text[] := '{a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}';
	result text := '';
	i integer := 0;
	begin
	for i in 1..length loop
	result := result \|\| letters[1+random()*(array_length(letters, 1)-1)];
	end loop;
	return result;
	end;
	$$ language plpgsql;

	/* Loading a 1000000 tuples using the function random_string */
	DO
	$do$
	BEGIN
	FOR i IN 1..1000000 LOOP
	INSERT INTO "user" (name, email, aboutme) values (random_string(floor(random()32)::int), random_string(floor(random()20)::int) \|\| '@email.com', random_string(floor(random()*256)::int));
	END LOOP;
	END
	$do$;

	-- Grabing a random email for test
	WITH random_id AS (
	SELECT floor(random()*1000000)::int AS id
	), random_email AS (
	SELECT email FROM "user" WHERE id in (SELECT id FROM random_id)
	) SELECT * FROM random_email;

	EXPLAIN ANALYSE SELECT name, aboutme, updated_at FROM "user" WHERE email = 'chqtyerthmtidyxu@email.com';

	-- Parallel Seq Scan on "user" (cost=0.00..31758.33 rows=2 width=153) (actual time=42.829..54.338 rows=0 loops=3)

	CREATE INDEX email_index ON "user" (email);
	EXPLAIN ANALYSE SELECT name, aboutme, updated_at FROM "user" WHERE email = 'chqtyerthmtidyxu@email.com';

	-- Index Scan using email_index on "user" (cost=0.42..28.53 rows=6 width=153) (actual time=0.064..0.065 rows=1 loops=1)

	DROP INDEX email_index;
	CREATE INDEX email_index_with_data ON "user" (email) INCLUDE (name, aboutme, updated_at);
	EXPLAIN ANALYSE SELECT name, aboutme, updated_at FROM "user" WHERE email = 'chqtyerthmtidyxu@email.com';

	--> Index Only Scan using email_index_with_data on "user" (cost=0.55..28.65 rows=6 width=153) (actual time=0.053..0.055 rows=1 loops=1)


	-- https://wiki.postgresql.org/wiki/Index_Maintenance
	schemaname \| tablename \| indexname \| num_rows \| table_size \| index_size \| unique \| number_of_scans \| tuples_read \| tuples_fetched
	------------+-----------+-----------------------+----------+------------+------------+--------+-----------------+-------------+----------------
	public \| user \| email_index_with_data \| 1e+06 \| 207 MB \| 209 MB \| N \| 0 \| 0 \| 0
	public \| user \| email_index \| 1e+06 \| 207 MB \| 39 MB \| N \| 1 \| 1995 \| 1995