paulghaddad/Aggregations

## Aggregations
1. Count the number of facilities

SELECT COUNT(*)
FROM cd.facilities

2. Count the number of expensive facilities

SELECT COUNT(*)
FROM cd.facilities
WHERE guestcost > 10

3. Count the number of recommendations each member makes.

SELECT recommendedby, COUNT(*)
FROM cd.members
WHERE recommendedby IS NOT NULL
GROUP BY recommendedby
ORDER BY recommendedby

4. List the total slots booked per facility

SELECT facid, SUM(slots) AS total_slots
FROM cd.bookings
GROUP BY facid
ORDER BY facid

5. List the total slots booked per facility in a given month

SELECT facid, SUM(slots) AS "Total Slots"
FROM cd.bookings
WHERE starttime BETWEEN '2012-09-01' AND '2012-10-01'
GROUP BY facid
ORDER BY "Total Slots"

6. List the total slots booked per facility per month

SELECT facid, date_part('month', starttime) AS month, SUM(slots) AS "Total Slots"
FROM cd.bookings
WHERE starttime BETWEEN '2012-01-01' AND '2012-12-31'
GROUP BY facid, month
ORDER BY facid, month

7. Find the count of members who have made at least one booking

-- we don't necessarily need to also select from the members table
-- but it ensures all members are connected to a booking
SELECT COUNT(DISTINCT m.memid)
FROM cd.members m, cd.bookings b
WHERE m.memid = b.memid

8. List facilities with more than 1000 slots booked

SELECT facid, SUM(slots) AS total_slots
FROM cd.bookings
GROUP BY facid
HAVING SUM(slots) > 1000
ORDER BY facid

9. Find the total revenue of each facility

SELECT f.name, SUM(
    CASE
    WHEN memid = 0
    THEN guestcost * slots
    ELSE membercost * slots
    END
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY f.name
ORDER BY revenue

10. Find facilities with a total revenue less than 1000

-- Approach 1: Use HAVING
SELECT name, SUM(
    CASE
    WHEN memid = 0 THEN slots * guestcost
    ELSE slots * membercost
    END
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY facid
HAVING SUM(
    CASE
    WHEN memid = 0 THEN slots * guestcost
    ELSE slots * membercost
    END
) < 1000
ORDER BY revenue

-- Approach 2: Using a subquery and then apply a WHERE clause outside the subquery
SELECT name, revenue FROM (
    SELEct name,
    SUM(
        CASE
        WHEN memid = 0 THEN slots * guestcost
        ELSE slots * membercost
        END
    ) AS revenue
    FROM cd.facilities f
    INNER JOIN cd.bookings b
    USING (facid)
    GROUP BY facid
) as revenue_subquery
WHERE revenue < 1000
ORDER BY revenue

11. Output the facility id that has the highest number of slots booked

-- Select facilities whose total slots match the highest from the collection of facilities
WITH slots_booked_per_facility AS (
    SELECT facid, SUM(slots) as total_slots
    FROM cd.bookings
    GROUP BY facid
)
SELECT *
FROM slots_booked_per_facility
WHERE total_slots = (
  SELECT MAX(total_slots) FROM slots_booked_per_facility
)

12. List the total slots booked per facility per month, part 2

SELECT facid, date_part('month', starttime) as month,
SUM(slots) as total_slots
FROM cd.bookings
WHERE date_part('year', starttime) = 2012
-- This will generate aggregate stats for unique combinations
-- of facid and month, including when the month column iS NULL (resulting
-- in a sum of all months for a given facility, and also when the facid and
-- month columns are NULL, resulting in total for all months across all facilities
GROUP BY ROLLUP (facid, month)
ORDER BY facid, month

13. List the total hours booked per named facility

SELECT f.facid, name, to_char(SUM(slots) * 0.5, '99,999.99') as total_hours
FROM cd.bookings b, cd.facilities f
WHERE b.facid = f.facid
GROUP BY f.facid, name
ORDER BY facid

14. List each member's first booking after September 1st 2012

SELECT surname, firstname, memid, MIN(starttime) as starttime
FROM cd.bookings b
INNER JOIN cd.members m
USING (memid)
WHERE starttime >= '2012-09-01'
GROUP BY memid, surname, firstname
ORDER BY memid

15. Produce a list of member names, with each row containing the total member count

SELECT COUNT(*) OVER(), firstname, surname
FROM cd.members
ORDER BY joindate

16. Produce a numbered list of members

SELECT ROW_NUMBER() OVER (
    ORDER BY joindate
),
firstname, surname
FROM cd.members

17. Output the facility id that has the highest number of slots booked, again

-- RANK() will output identical numbers with gaps in the event of ties
-- So in this example, it is preferable to DENSE_RANK()
WITH ranking_of_facility_bookings AS (
    SELECT RANK() OVER (
        ORDER BY SUM(slots) DESC
    ), facid, SUM(slots) AS total
    FROM cd.facilities f
    INNER JOIN cd.bookings b
    USING (facid)
    GROUP BY facid
)

SELECT facid, total
FROM ranking_of_facility_bookings
WHERE rank = 1

18. Rank members by (rounded) hours used

SELECT firstname, surname, round(SUM(slots * 0.5), -1) As hours,
RANK() OVER (
    ORDER BY round(SUM(slots * 0.5), -1) DESC
) AS rank
FROM cd.members m
INNER JOIN cd.bookings b
USING (memid)
GROUP BY memid
ORDER BY rank, surname, firstname

19. Find the top three revenue generating facilities

WITH revenue_by_facility AS (
    SELECT f.name,
    SUM(
        CASE WHEN memid = 1
        THEN slots * guestcost
        ELSE slots * membercost
        END
    ) AS revenue
    FROM cd.facilities f
    INNER JOIN cd.bookings b
    USING (facid)
    INNER JOIN cd.members m
    USING (memid)
    GROUP BY f.name
)

SELECT name, RANK() OVER (
    ORDER BY revenue DESC
)
FROM revenue_by_facility
LIMIT 3

20. Classify facilities by value

WITH facility_rank_by_revenue AS (
SELECT name,
NTILE(3) OVER (
    ORDER BY SUM(
        CASE WHEN memid = 1
        THEN guestcost * slots
        ELSE membercost * slots
        END
    ) DESC
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY name
ORDER BY revenue, name
)

SELECT name,
    (CASE
    WHEN revenue = 1 THEN 'high'
    WHEN revenue = 2 THEN 'average'
    ELSE 'low'
    END) AS revenue
FROM facility_rank_by_revenue

21. Calculate the payback time for each facility

WITH facility_cost_profile AS (
SELECT DISTINCT name, initialoutlay, monthlymaintenance, SUM((
    CASE
    WHEN memid = 0 THEN guestcost * slots
    ELSE membercost * slots
    END) / 3)
OVER (
    ORDER BY name
) AS monthly_revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
)

SELECT name, (
    initialoutlay / (monthly_revenue - monthlymaintenance)
) as months
FROM facility_cost_profile
ORDER BY name

22. Calculate a rolling average of total revenue

WITH daily_revenue AS (
    SELECT DISTINCT starttime::date as date,
    SUM(
        CASE
        WHEN memid = 0 THEN guestcost * slots
        ELSE membercost * slots
        END
    ) AS revenue
    FROM cd.bookings b
    INNER JOIN cd.facilities f
    USING (facid)
    GROUP BY date
    ORDER BY date
), avg_daily_revenue AS (
    SELECT date,
    AVG(revenue) OVER (
        ROWS BETWEEN 14 PRECEDING AND CURRENT ROW
    ) AS revenue
    FROM daily_revenue
)

SELECT * FROM avg_daily_revenue
WHERE date BETWEEN '2012-08-01' AND '2012-08-31'

3. Generate a list of all the dates in October 2012

SELECT * FROM generate_series(timestamp '2012-10-01 00:00:00', timestamp '2012-10-31 00:00:00', '1 day')

## Date
1. Produce a timestamp for 1 a.m. on the 31st of August 2012

-- Two approaches

-- Use a function
SELECT make_timestamp(2012, 8, 31, 1, 0, 0)

-- Create a record with a timestamp data type
SELECT timestamp '2012-08-31 01:00:00'

2. Subtract timestamps from each other

SELECT timestamp '2012-08-31 01:00:00' - timestamp '2012-07-30 01:00:00' AS interval

3. Generate a list of all the dates in October 2012

SELECT * FROM generate_series(timestamp '2012-10-01 00:00:00', timestamp '2012-10-31 00:00:00', '1 day')

4. Get the day of the month from a timestamp

SELECT date_part('day', timestamp '2012-08-31')

OR
-- extract and date_part are equivalent
SELECT extract('day', timestamp '2012-08-31')

5. Work out the number of seconds between timestamps

SELECT EXTRACT(EPOCH FROM timestamp '2012-09-02 00:00:00' - timestamp '2012-08-31 01:00:00')

6. Work out the number of days in each month of 2012

SELECT date_part('month', current_month) AS current_month,
(current_month + interval '1 month') - current_month AS length
FROM generate_series(timestamp '2012-01-01 00:00', timestamp '2012-12-31 23:59', '1 month') AS current_month

7. Work out the number of days remaining in the month

SELECT (date_trunc('month', timestamp '2012-02-11 01:00:00') + '1 month') -
  date_trunc('day', timestamp '2012-02-11 01:00:00') AS remaining

  -- or using a subquery

SELECT (date_trunc('month', ts) + '1 month') - date_trunc('day', ts)
FROM (SELECT timestamp '2012-02-11 01:00:00' As ts) AS my_time

8. Work out the end time of bookings

SELECT starttime, (starttime + slots * interval '30 minutes') As endtime
FROM cd.bookings
ORDER BY endtime DESC, starttime DESC
LIMIT 10

9. Return a count of bookings for each month

SELECT date_trunc('month', starttime) AS month, COUNT(*)
FROM cd.bookings
GROUP BY month
ORDER BY month

10. Work out the utilisation percentage for each facility by month

WITH booking_detail AS (
    SELECT name, date_trunc('month', starttime) AS month, starttime, slots,
    starttime + (slots * INTERVAL '30 minutes') AS endtime
    FROM cd.facilities
    INNER JOIN cd.bookings
    USING (facid)
), time_booked_per_appt AS (
    SELECT *,
    (endtime - starttime) AS booking_duration,
    EXTRACT(EPOCH FROM (endtime - starttime)) AS booking_duration_in_sec
    FROM booking_detail
), monthly_availability AS (
    SELECT month, (month + INTERVAL '1 month') - month AS length,
    EXTRACT(EPOCH FROM make_time(20, 30, 0) - make_time(8, 0, 0)) AS daily_availability
    FROM booking_detail
    GROUP BY month
), time_utilized_by_month AS (
    SELECT name, month, SUM(booking_duration_in_sec) AS time_utilized
    FROM time_booked_per_appt
    GROUP BY name, month
)

SELECT name, tubm.month,
round(CAST (time_utilized / (extract('day' from length) * daily_availability) * 100 AS numeric), 1) AS utilization
FROM time_utilized_by_month tubm
INNER JOIN monthly_availability ma
ON tubm.month = ma.month
ORDER BY name, month

## Joins and Subqueries
1. Retrieve the start times of members' bookings

SELECT b.starttime
FROM cd.members m
INNER JOIN cd.bookings b
ON m.memid = b.memid
WHERE m.firstname = 'David' AND m.surname = 'Farrell'

2. Work out the start times of bookings for tennis courts

SELECT starttime AS start, name
FROM cd.bookings b
INNER JOIN cd.facilities f
ON b.facid = f.facid
WHERE name LIKE 'Tennis%'
AND date_trunc('day', b.starttime) = '2012-09-21'
ORDER BY start

3. Produce a list of all members who have recommended another member

-- Method 1: Semi-join

SELECT firstname, surname
FROM cd.members
WHERE memid IN (
    SELECT DISTINCT(recommendedby)
    FROM cd.members
    WHERE recommendedby IS NOT NULL
)
ORDER BY surname, firstname

-- Method 2: Self-join using a FROM clause
SELECT DISTINCT m1.firstname, m1.surname
FROM cd.members m1, cd.members m2
WHERE m1.memid = m2.recommendedby
ORDER BY m1.surname, m1.surname

-- Method 3: Self-join using an INNER JOIN
SELECT DISTINCT m1.firstname, m1.surname
FROM cd.members m1
INNER JOIN cd.members m2
ON m1.memid = m2.recommendedby
ORDER BY m1.surname, m1.surname

4. Produce a list of all members, along with their recommender

SELECT m1.firstname AS memfname, m1.surname AS memsname, m2.firstname AS recfname, m2.surname AS recsname
FROM cd.members m1
LEFT OUTER JOIN cd.members m2
ON m1.recommendedby = m2.memid
ORDER BY memsname, memfname

5. Produce a list of all members who have used a tennis court

SELECT DISTINCT m.firstname || ' ' || m.surname AS member, f.name AS facility
FROM cd.members AS m
INNER JOIN cd.bookings AS b
USING (memid)
INNER JOIN cd.facilities AS f
ON b.facid = f.facid AND f.name LIKE 'Tennis%' -- placing the condition in the join clause but could have used a WHERE clause too
ORDER BY member

6. Produce a list of costly bookings

SELECT m.firstname || ' ' || m.surname AS member, name,
CASE WHEN m.surname = 'GUEST' THEN slots * guestcost
ELSE slots * membercost
END AS cost
FROM cd.members AS m
INNER JOIN cd.bookings AS b
USING (memid)
INNER JOIN cd.facilities AS f
USING (facid)
WHERE date_trunc('day', starttime) = '2012-09-14'
-- the WHERE clause is applied before the SELECT,
-- so we can't refer to the alias name, ie cost > 30
AND (
    CASE WHEN m.surname = 'GUEST' THEN slots * guestcost
    ELSE slots * membercost
    END
) > 30
ORDER BY cost DESC

7. Produce a list of all members, along with their recommender, using no joins.

-- Use a correlated subquery within a SELECT clause to fetch the recommender
-- for each member. The subquery is run for each record in the outer query
SELECT DISTINCT m.firstname || ' ' || m.surname AS member, (
    SELECT recommenders.firstname || ' ' || recommenders.surname
    FROM cd.members AS recommenders
    WHERE m.recommendedby = recommenders.memid) AS recommender
FROM cd.members m
ORDER BY member ASC

8. Produce a list of costly bookings, using a subquery

-- booking costs is a temporary table that computes each bookings' cost.
-- This temporary table is then join to the members table to link it to the member's name
-- I prefer this to the solution given because the booking costs are separate from
-- the members table and there's one less join in the subquery

SELECT concat_ws(' ', m.firstname, m.surname ) AS member, name, cost
FROM cd.members m,
    (SELECT memid, name, starttime,
        CASE WHEN b.memid = 0 THEN (f.guestcost * b.slots)
        ELSE (f.membercost * b.slots)
        END AS cost
     FROM cd.bookings b
     INNER JOIN cd.facilities f
     USING (facid)) AS booking_costs
WHERE m.memid = booking_costs.memid
AND cost > 30
AND date_trunc('day', booking_costs.starttime) = '2012-09-14'
ORDER BY cost DESC

## Modifying Data
1. Insert some data into a table

Approach 1:

INSERT INTO cd.facilities VALUES
    (9, 'Spa', 20, 30, 100000, 800)

Approach 2: Specify column names

INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance)
VALUES
    (9, 'Spa', 20, 30, 100000, 800)

2. Insert multiple rows of data into a table

INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) VALUES
    (9, 'Spa', 20, 30, 100000, 800),
    (10, 'Squash Court 2', 3.5, 17.5, 5000, 80)

3. Insert calculated data into a table

Approach 1: Use the SELECT in place of VALUES

INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) SELECT
    (SELECT MAX(facid) FROM cd.facilities) + 1, 'Spa', 20, 30, 100000, 800

Approach 2: Use VALUES with computed data

INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) VALUES
    ((SELECT MAX(facid) + 1 FROM cd.facilities), 'Spa', 20, 30, 100000, 800)

4. Update some existing data

UPDATE cd.facilities
SET initialoutlay = 10000
WHERE facid = 1

5. Update multiple rows and columns at the same time

UPDATE cd.facilities
SET membercost = 6, guestcost = 30
WHERE name LIKE 'Tennis Court%'

6. Update a row based on the contents of another row

-- Use UPDATE FROM
UPDATE cd.facilities AS court_2
SET membercost = 1.1 * court_1.membercost,
guestcost = 1.1 * court_1.guestcost
FROM (SELECT * FROM cd.facilities WHERE facid = 0) AS court_1
WHERE court_2.facid = 1

7. Delete all bookings

DELETE FROM cd.bookings

 OR

TRUNCATE cd.bookings

8. Delete a member from the cd.members table

DELETE FROM cd.members WHERE memid = 37

9. Delete based on a subquery

DELETE FROM cd.members
WHERE memid NOT IN (
	SELECT DISTINCT memid
	FROM cd.bookings
)

## Recursive Queries
1. Find the upward recommendation chain for member ID 27

-- Solution 1: Use a INNER JOIN

WITH RECURSIVE recommenders(recommender) AS (
        -- Return the first record that will be UNIONed with subsequent iterations
        SELECT recommendedby AS recommender
        FROM cd.members
        WHERE memid = 27
    UNION ALL
        -- Add another record during each iteration by joining the recommendedby
        -- ID in the table above with the memid of the table below
        SELECT m.recommendedby AS recommender
        FROM recommenders r
        INNER JOIN cd.members m
        ON r.recommender = m.memid
)

-- JOIN the ids of the recommenders table to the members table to get info on them
SELECT recommender, firstname, surname
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
ORDER BY recommender DESC

-- Solution 2: Use a subquery in the table that is UNIONed
WITH RECURSIVE recommenders(recommender) AS (
        SELECT recommendedby AS recommender
        FROM cd.members
        WHERE memid = 27
    UNION ALL
        SELECT m.recommendedby AS recommender
        FROM recommenders r, cd.members m
        WHERE r.recommender = m.memid
)

SELECT recommender, firstname, surname
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
ORDER BY recommender DESC

2. Find the downward recommendation chain for member ID 1

-- Here I'm using a subquery in the FROM statement, but we can
-- just as easily use an INNER JOIN
WITH RECURSIVE recommendees(memid, firstname, surname) AS (
        SELECT memid, firstname, surname
        FROM cd.members
        WHERE recommendedby = 1
    UNION ALL
        SELECT m.memid, m.firstname, m.surname
        FROM recommendees r, cd.members m
        WHERE r.memid = m.recommendedby
)

SELECT * FROM recommendees
ORDER BY memid ASC

3. Produce a CTE that can return the upward recommendation chain for any member

WITH RECURSIVE recommenders(memid, recommender) AS (
        SELECT memid, recommendedby AS recommender
        FROM cd.members
    UNION ALL
        SELECT r.memid, recommendedby
        FROM recommenders r, cd.members m
        WHERE r.recommender = m.memid
)

SELECT r.memid, recommender, firstname, surname
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
WHERE r.memid IN (12, 22)
ORDER BY r.memid ASC, r.recommender DESC

## Simple SQL Queries
1. Retrieve everything from a table

SELECT * FROM cd.facilities

2. Retrieve specific columns from a table

SELECT name, membercost FROM cd.facilities

3. Control which rows are retrieved

SELECT * FROM cd.facilities
WHERE membercost > 0

4. Control which rows are retrieved - part 2

SELECT facid, name, membercost, monthlymaintenance FROM cd.facilities
WHERE membercost > 0
AND membercost < (monthlymaintenance / 50)

5. Basic string searches

# Using LIKE

SELECT *
FROM cd.facilities
WHERE name LIKE '%Tennis%'

# Using SIMILAR TO

SELECT *
FROM cd.facilities
WHERE name SIMILAR TO '%Tennis%'

# Using Regular Expressions

SELECT *
FROM cd.facilities
WHERE name ~ '.*Tennis.*'

7. Classify results into buckets

SELECT name,
CASE
WHEN monthlymaintenance > 100 THEN 'expensive'
ELSE 'cheap' END AS cost
FROM cd.facilities

8. Working with dates

SELECT m.memid, m.surname, m.firstname, m.joindate
FROM cd.members AS m
WHERE m.joindate >= date '2012-09-01' -- explicitly cast to a date type

9. Removing duplicates, and ordering results

SELECT DISTINCT(surname)
FROM cd.members
ORDER BY surname
LIMIT 10

10. Combining results from multiple queries

SELECT surname
FROM cd.members
UNION
SELECT name
FROM cd.facilities

11. Simple aggregation

SELECT MAX(m.joindate) AS lastest_signup_date
FROM cd.members m

12. More aggregation

-- We first do a subquery to get the latest signup date for all members. Then, we use HAVING to limit the
aggregation to values greater than or equal to this value. Also note that using the primary key of the
members table (memid) allows us to avoid putting unnecessary columns in the GROUP BY that are in the SELECT,
but only for presentation purposes.

SELECT m.firstname, m.surname, MAX(m.joindate) AS latest_signup_date
FROM cd.members m
GROUP BY m.memid
HAVING m.joindate >= (
    SELECT MAX(m2.joindate)
    FROM cd.members m2
)

## String Operation
1. Format the names of members

-- using the concat_ws function
SELECT concat_ws(', ', surname, firstname) AS name
FROM cd.members

-- we can also use the concat function

SELECT concat(surname, ', ', firstname) AS name
FROM cd.members

-- or the || operator

SELECT surname || ', ' || firstname AS name
FROM cd.members

2. Find facilities by a name prefix

-- using LIKE

SELECT *
FROM cd.facilities
WHERE name LIKE 'Tennis%'

-- using  regexes

SELECT *
FROM cd.facilities
WHERE name ~ '^Tennis'

3. Perform a case-insensitive search

-- using regexs
SELECT *
FROM cd.facilities
WHERE name ~* '^tennis'

-- using ILIKE

SELECT *
FROM cd.facilities
WHERE name ILIKE 'Tennis%'

-- using lower()

SELECT *
FROM cd.facilities
WHERE lower(name) LIKE 'tennis%'

-- using upper()

SELECT *
FROM cd.facilities
WHERE upper(name) LIKE 'TENNIS%'

4. Find telephone numbers with parentheses

SELECT memid, telephone
FROM cd.members
WHERE telephone SIMILAR TO '\([0-9]{3}\)%'
ORDER BY memid

5. Pad zip codes with leading zeroes

-- using to_char

SELECT to_char(zipcode, '09999') AS formatted_zip_code
FROM cd.members
ORDER BY formatted_zip_code

-- Postgres-specific casting

SELECT lpad(zipcode::text, 5, '0') AS formatted_zip_code
FROM cd.members
ORDER BY formatted_zip_code

-- SQL-compliant casting
SELECT lpad(cast(zipcode AS char(5)), 5, '0') AS formatted_zip_code
FROM cd.members
ORDER BY formatted_zip_code

6. Count the number of members whose surname starts with each letter of the alphabet

SELECT upper(left(surname, 1)) AS letter, COUNT(*)
FROM cd.members
GROUP BY letter
ORDER BY letter

7. Clean up telephone numbers

-- using regex_replace

SELECT memid, regexp_replace(telephone, '[\s\(\)-]', '', 'g') AS telephone
FROM cd.members
ORDER BY memid

-- using translate

SELECT memid, translate(telephone, ' ()-', '') AS telephone
FROM cd.members
ORDER BY memid
	1. Count the number of facilities

	SELECT COUNT(*)
	FROM cd.facilities

	2. Count the number of expensive facilities

	SELECT COUNT(*)
	FROM cd.facilities
	WHERE guestcost > 10

	3. Count the number of recommendations each member makes.

	SELECT recommendedby, COUNT(*)
	FROM cd.members
	WHERE recommendedby IS NOT NULL
	GROUP BY recommendedby
	ORDER BY recommendedby

	4. List the total slots booked per facility

	SELECT facid, SUM(slots) AS total_slots
	FROM cd.bookings
	GROUP BY facid
	ORDER BY facid

	5. List the total slots booked per facility in a given month

	SELECT facid, SUM(slots) AS "Total Slots"
	FROM cd.bookings
	WHERE starttime BETWEEN '2012-09-01' AND '2012-10-01'
	GROUP BY facid
	ORDER BY "Total Slots"

	6. List the total slots booked per facility per month

	SELECT facid, date_part('month', starttime) AS month, SUM(slots) AS "Total Slots"
	FROM cd.bookings
	WHERE starttime BETWEEN '2012-01-01' AND '2012-12-31'
	GROUP BY facid, month
	ORDER BY facid, month

	7. Find the count of members who have made at least one booking

	-- we don't necessarily need to also select from the members table
	-- but it ensures all members are connected to a booking
	SELECT COUNT(DISTINCT m.memid)
	FROM cd.members m, cd.bookings b
	WHERE m.memid = b.memid

	8. List facilities with more than 1000 slots booked

	SELECT facid, SUM(slots) AS total_slots
	FROM cd.bookings
	GROUP BY facid
	HAVING SUM(slots) > 1000
	ORDER BY facid

	9. Find the total revenue of each facility

	SELECT f.name, SUM(
	CASE
	WHEN memid = 0
	THEN guestcost * slots
	ELSE membercost * slots
	END
	) AS revenue
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	GROUP BY f.name
	ORDER BY revenue

	10. Find facilities with a total revenue less than 1000

	-- Approach 1: Use HAVING
	SELECT name, SUM(
	CASE
	WHEN memid = 0 THEN slots * guestcost
	ELSE slots * membercost
	END
	) AS revenue
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	GROUP BY facid
	HAVING SUM(
	CASE
	WHEN memid = 0 THEN slots * guestcost
	ELSE slots * membercost
	END
	) < 1000
	ORDER BY revenue

	-- Approach 2: Using a subquery and then apply a WHERE clause outside the subquery
	SELECT name, revenue FROM (
	SELEct name,
	SUM(
	CASE
	WHEN memid = 0 THEN slots * guestcost
	ELSE slots * membercost
	END
	) AS revenue
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	GROUP BY facid
	) as revenue_subquery
	WHERE revenue < 1000
	ORDER BY revenue

	11. Output the facility id that has the highest number of slots booked

	-- Select facilities whose total slots match the highest from the collection of facilities
	WITH slots_booked_per_facility AS (
	SELECT facid, SUM(slots) as total_slots
	FROM cd.bookings
	GROUP BY facid
	)
	SELECT *
	FROM slots_booked_per_facility
	WHERE total_slots = (
	SELECT MAX(total_slots) FROM slots_booked_per_facility
	)

	12. List the total slots booked per facility per month, part 2

	SELECT facid, date_part('month', starttime) as month,
	SUM(slots) as total_slots
	FROM cd.bookings
	WHERE date_part('year', starttime) = 2012
	-- This will generate aggregate stats for unique combinations
	-- of facid and month, including when the month column iS NULL (resulting
	-- in a sum of all months for a given facility, and also when the facid and
	-- month columns are NULL, resulting in total for all months across all facilities
	GROUP BY ROLLUP (facid, month)
	ORDER BY facid, month

	13. List the total hours booked per named facility

	SELECT f.facid, name, to_char(SUM(slots) * 0.5, '99,999.99') as total_hours
	FROM cd.bookings b, cd.facilities f
	WHERE b.facid = f.facid
	GROUP BY f.facid, name
	ORDER BY facid

	14. List each member's first booking after September 1st 2012

	SELECT surname, firstname, memid, MIN(starttime) as starttime
	FROM cd.bookings b
	INNER JOIN cd.members m
	USING (memid)
	WHERE starttime >= '2012-09-01'
	GROUP BY memid, surname, firstname
	ORDER BY memid

	15. Produce a list of member names, with each row containing the total member count

	SELECT COUNT(*) OVER(), firstname, surname
	FROM cd.members
	ORDER BY joindate

	16. Produce a numbered list of members

	SELECT ROW_NUMBER() OVER (
	ORDER BY joindate
	),
	firstname, surname
	FROM cd.members

	17. Output the facility id that has the highest number of slots booked, again

	-- RANK() will output identical numbers with gaps in the event of ties
	-- So in this example, it is preferable to DENSE_RANK()
	WITH ranking_of_facility_bookings AS (
	SELECT RANK() OVER (
	ORDER BY SUM(slots) DESC
	), facid, SUM(slots) AS total
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	GROUP BY facid
	)

	SELECT facid, total
	FROM ranking_of_facility_bookings
	WHERE rank = 1

	18. Rank members by (rounded) hours used

	SELECT firstname, surname, round(SUM(slots * 0.5), -1) As hours,
	RANK() OVER (
	ORDER BY round(SUM(slots * 0.5), -1) DESC
	) AS rank
	FROM cd.members m
	INNER JOIN cd.bookings b
	USING (memid)
	GROUP BY memid
	ORDER BY rank, surname, firstname

	19. Find the top three revenue generating facilities

	WITH revenue_by_facility AS (
	SELECT f.name,
	SUM(
	CASE WHEN memid = 1
	THEN slots * guestcost
	ELSE slots * membercost
	END
	) AS revenue
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	INNER JOIN cd.members m
	USING (memid)
	GROUP BY f.name
	)

	SELECT name, RANK() OVER (
	ORDER BY revenue DESC
	)
	FROM revenue_by_facility
	LIMIT 3

	20. Classify facilities by value

	WITH facility_rank_by_revenue AS (
	SELECT name,
	NTILE(3) OVER (
	ORDER BY SUM(
	CASE WHEN memid = 1
	THEN guestcost * slots
	ELSE membercost * slots
	END
	) DESC
	) AS revenue
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	GROUP BY name
	ORDER BY revenue, name
	)

	SELECT name,
	(CASE
	WHEN revenue = 1 THEN 'high'
	WHEN revenue = 2 THEN 'average'
	ELSE 'low'
	END) AS revenue
	FROM facility_rank_by_revenue

	21. Calculate the payback time for each facility

	WITH facility_cost_profile AS (
	SELECT DISTINCT name, initialoutlay, monthlymaintenance, SUM((
	CASE
	WHEN memid = 0 THEN guestcost * slots
	ELSE membercost * slots
	END) / 3)
	OVER (
	ORDER BY name
	) AS monthly_revenue
	FROM cd.facilities f
	INNER JOIN cd.bookings b
	USING (facid)
	)

	SELECT name, (
	initialoutlay / (monthly_revenue - monthlymaintenance)
	) as months
	FROM facility_cost_profile
	ORDER BY name

	22. Calculate a rolling average of total revenue

	WITH daily_revenue AS (
	SELECT DISTINCT starttime::date as date,
	SUM(
	CASE
	WHEN memid = 0 THEN guestcost * slots
	ELSE membercost * slots
	END
	) AS revenue
	FROM cd.bookings b
	INNER JOIN cd.facilities f
	USING (facid)
	GROUP BY date
	ORDER BY date
	), avg_daily_revenue AS (
	SELECT date,
	AVG(revenue) OVER (
	ROWS BETWEEN 14 PRECEDING AND CURRENT ROW
	) AS revenue
	FROM daily_revenue
	)

	SELECT * FROM avg_daily_revenue
	WHERE date BETWEEN '2012-08-01' AND '2012-08-31'

	3. Generate a list of all the dates in October 2012

	SELECT * FROM generate_series(timestamp '2012-10-01 00:00:00', timestamp '2012-10-31 00:00:00', '1 day')
	1. Produce a timestamp for 1 a.m. on the 31st of August 2012

	-- Two approaches

	-- Use a function
	SELECT make_timestamp(2012, 8, 31, 1, 0, 0)

	-- Create a record with a timestamp data type
	SELECT timestamp '2012-08-31 01:00:00'

	2. Subtract timestamps from each other

	SELECT timestamp '2012-08-31 01:00:00' - timestamp '2012-07-30 01:00:00' AS interval

	3. Generate a list of all the dates in October 2012

	SELECT * FROM generate_series(timestamp '2012-10-01 00:00:00', timestamp '2012-10-31 00:00:00', '1 day')

	4. Get the day of the month from a timestamp

	SELECT date_part('day', timestamp '2012-08-31')

	OR
	-- extract and date_part are equivalent
	SELECT extract('day', timestamp '2012-08-31')

	5. Work out the number of seconds between timestamps

	SELECT EXTRACT(EPOCH FROM timestamp '2012-09-02 00:00:00' - timestamp '2012-08-31 01:00:00')

	6. Work out the number of days in each month of 2012

	SELECT date_part('month', current_month) AS current_month,
	(current_month + interval '1 month') - current_month AS length
	FROM generate_series(timestamp '2012-01-01 00:00', timestamp '2012-12-31 23:59', '1 month') AS current_month

	7. Work out the number of days remaining in the month

	SELECT (date_trunc('month', timestamp '2012-02-11 01:00:00') + '1 month') -
	date_trunc('day', timestamp '2012-02-11 01:00:00') AS remaining

	-- or using a subquery

	SELECT (date_trunc('month', ts) + '1 month') - date_trunc('day', ts)
	FROM (SELECT timestamp '2012-02-11 01:00:00' As ts) AS my_time

	8. Work out the end time of bookings

	SELECT starttime, (starttime + slots * interval '30 minutes') As endtime
	FROM cd.bookings
	ORDER BY endtime DESC, starttime DESC
	LIMIT 10

	9. Return a count of bookings for each month

	SELECT date_trunc('month', starttime) AS month, COUNT(*)
	FROM cd.bookings
	GROUP BY month
	ORDER BY month

	10. Work out the utilisation percentage for each facility by month

	WITH booking_detail AS (
	SELECT name, date_trunc('month', starttime) AS month, starttime, slots,
	starttime + (slots * INTERVAL '30 minutes') AS endtime
	FROM cd.facilities
	INNER JOIN cd.bookings
	USING (facid)
	), time_booked_per_appt AS (
	SELECT *,
	(endtime - starttime) AS booking_duration,
	EXTRACT(EPOCH FROM (endtime - starttime)) AS booking_duration_in_sec
	FROM booking_detail
	), monthly_availability AS (
	SELECT month, (month + INTERVAL '1 month') - month AS length,
	EXTRACT(EPOCH FROM make_time(20, 30, 0) - make_time(8, 0, 0)) AS daily_availability
	FROM booking_detail
	GROUP BY month
	), time_utilized_by_month AS (
	SELECT name, month, SUM(booking_duration_in_sec) AS time_utilized
	FROM time_booked_per_appt
	GROUP BY name, month
	)

	SELECT name, tubm.month,
	round(CAST (time_utilized / (extract('day' from length) * daily_availability) * 100 AS numeric), 1) AS utilization
	FROM time_utilized_by_month tubm
	INNER JOIN monthly_availability ma
	ON tubm.month = ma.month
	ORDER BY name, month
	1. Retrieve the start times of members' bookings

	SELECT b.starttime
	FROM cd.members m
	INNER JOIN cd.bookings b
	ON m.memid = b.memid
	WHERE m.firstname = 'David' AND m.surname = 'Farrell'

	2. Work out the start times of bookings for tennis courts

	SELECT starttime AS start, name
	FROM cd.bookings b
	INNER JOIN cd.facilities f
	ON b.facid = f.facid
	WHERE name LIKE 'Tennis%'
	AND date_trunc('day', b.starttime) = '2012-09-21'
	ORDER BY start

	3. Produce a list of all members who have recommended another member

	-- Method 1: Semi-join

	SELECT firstname, surname
	FROM cd.members
	WHERE memid IN (
	SELECT DISTINCT(recommendedby)
	FROM cd.members
	WHERE recommendedby IS NOT NULL
	)
	ORDER BY surname, firstname

	-- Method 2: Self-join using a FROM clause
	SELECT DISTINCT m1.firstname, m1.surname
	FROM cd.members m1, cd.members m2
	WHERE m1.memid = m2.recommendedby
	ORDER BY m1.surname, m1.surname

	-- Method 3: Self-join using an INNER JOIN
	SELECT DISTINCT m1.firstname, m1.surname
	FROM cd.members m1
	INNER JOIN cd.members m2
	ON m1.memid = m2.recommendedby
	ORDER BY m1.surname, m1.surname

	4. Produce a list of all members, along with their recommender

	SELECT m1.firstname AS memfname, m1.surname AS memsname, m2.firstname AS recfname, m2.surname AS recsname
	FROM cd.members m1
	LEFT OUTER JOIN cd.members m2
	ON m1.recommendedby = m2.memid
	ORDER BY memsname, memfname

	5. Produce a list of all members who have used a tennis court

	SELECT DISTINCT m.firstname \|\| ' ' \|\| m.surname AS member, f.name AS facility
	FROM cd.members AS m
	INNER JOIN cd.bookings AS b
	USING (memid)
	INNER JOIN cd.facilities AS f
	ON b.facid = f.facid AND f.name LIKE 'Tennis%' -- placing the condition in the join clause but could have used a WHERE clause too
	ORDER BY member

	6. Produce a list of costly bookings

	SELECT m.firstname \|\| ' ' \|\| m.surname AS member, name,
	CASE WHEN m.surname = 'GUEST' THEN slots * guestcost
	ELSE slots * membercost
	END AS cost
	FROM cd.members AS m
	INNER JOIN cd.bookings AS b
	USING (memid)
	INNER JOIN cd.facilities AS f
	USING (facid)
	WHERE date_trunc('day', starttime) = '2012-09-14'
	-- the WHERE clause is applied before the SELECT,
	-- so we can't refer to the alias name, ie cost > 30
	AND (
	CASE WHEN m.surname = 'GUEST' THEN slots * guestcost
	ELSE slots * membercost
	END
	) > 30
	ORDER BY cost DESC

	7. Produce a list of all members, along with their recommender, using no joins.

	-- Use a correlated subquery within a SELECT clause to fetch the recommender
	-- for each member. The subquery is run for each record in the outer query
	SELECT DISTINCT m.firstname \|\| ' ' \|\| m.surname AS member, (
	SELECT recommenders.firstname \|\| ' ' \|\| recommenders.surname
	FROM cd.members AS recommenders
	WHERE m.recommendedby = recommenders.memid) AS recommender
	FROM cd.members m
	ORDER BY member ASC

	8. Produce a list of costly bookings, using a subquery

	-- booking costs is a temporary table that computes each bookings' cost.
	-- This temporary table is then join to the members table to link it to the member's name
	-- I prefer this to the solution given because the booking costs are separate from
	-- the members table and there's one less join in the subquery

	SELECT concat_ws(' ', m.firstname, m.surname ) AS member, name, cost
	FROM cd.members m,
	(SELECT memid, name, starttime,
	CASE WHEN b.memid = 0 THEN (f.guestcost * b.slots)
	ELSE (f.membercost * b.slots)
	END AS cost
	FROM cd.bookings b
	INNER JOIN cd.facilities f
	USING (facid)) AS booking_costs
	WHERE m.memid = booking_costs.memid
	AND cost > 30
	AND date_trunc('day', booking_costs.starttime) = '2012-09-14'
	ORDER BY cost DESC
	1. Insert some data into a table

	Approach 1:

	INSERT INTO cd.facilities VALUES
	(9, 'Spa', 20, 30, 100000, 800)

	Approach 2: Specify column names

	INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance)
	VALUES
	(9, 'Spa', 20, 30, 100000, 800)

	2. Insert multiple rows of data into a table

	INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) VALUES
	(9, 'Spa', 20, 30, 100000, 800),
	(10, 'Squash Court 2', 3.5, 17.5, 5000, 80)

	3. Insert calculated data into a table

	Approach 1: Use the SELECT in place of VALUES

	INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) SELECT
	(SELECT MAX(facid) FROM cd.facilities) + 1, 'Spa', 20, 30, 100000, 800

	Approach 2: Use VALUES with computed data

	INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) VALUES
	((SELECT MAX(facid) + 1 FROM cd.facilities), 'Spa', 20, 30, 100000, 800)

	4. Update some existing data

	UPDATE cd.facilities
	SET initialoutlay = 10000
	WHERE facid = 1

	5. Update multiple rows and columns at the same time

	UPDATE cd.facilities
	SET membercost = 6, guestcost = 30
	WHERE name LIKE 'Tennis Court%'

	6. Update a row based on the contents of another row

	-- Use UPDATE FROM
	UPDATE cd.facilities AS court_2
	SET membercost = 1.1 * court_1.membercost,
	guestcost = 1.1 * court_1.guestcost
	FROM (SELECT * FROM cd.facilities WHERE facid = 0) AS court_1
	WHERE court_2.facid = 1

	7. Delete all bookings

	DELETE FROM cd.bookings

	OR

	TRUNCATE cd.bookings

	8. Delete a member from the cd.members table

	DELETE FROM cd.members WHERE memid = 37

	9. Delete based on a subquery

	DELETE FROM cd.members
	WHERE memid NOT IN (
	SELECT DISTINCT memid
	FROM cd.bookings
	)
	1. Find the upward recommendation chain for member ID 27

	-- Solution 1: Use a INNER JOIN

	WITH RECURSIVE recommenders(recommender) AS (
	-- Return the first record that will be UNIONed with subsequent iterations
	SELECT recommendedby AS recommender
	FROM cd.members
	WHERE memid = 27
	UNION ALL
	-- Add another record during each iteration by joining the recommendedby
	-- ID in the table above with the memid of the table below
	SELECT m.recommendedby AS recommender
	FROM recommenders r
	INNER JOIN cd.members m
	ON r.recommender = m.memid
	)

	-- JOIN the ids of the recommenders table to the members table to get info on them
	SELECT recommender, firstname, surname
	FROM recommenders r
	INNER JOIN cd.members m
	ON r.recommender = m.memid
	ORDER BY recommender DESC

	-- Solution 2: Use a subquery in the table that is UNIONed
	WITH RECURSIVE recommenders(recommender) AS (
	SELECT recommendedby AS recommender
	FROM cd.members
	WHERE memid = 27
	UNION ALL
	SELECT m.recommendedby AS recommender
	FROM recommenders r, cd.members m
	WHERE r.recommender = m.memid
	)

	SELECT recommender, firstname, surname
	FROM recommenders r
	INNER JOIN cd.members m
	ON r.recommender = m.memid
	ORDER BY recommender DESC

	2. Find the downward recommendation chain for member ID 1

	-- Here I'm using a subquery in the FROM statement, but we can
	-- just as easily use an INNER JOIN
	WITH RECURSIVE recommendees(memid, firstname, surname) AS (
	SELECT memid, firstname, surname
	FROM cd.members
	WHERE recommendedby = 1
	UNION ALL
	SELECT m.memid, m.firstname, m.surname
	FROM recommendees r, cd.members m
	WHERE r.memid = m.recommendedby
	)

	SELECT * FROM recommendees
	ORDER BY memid ASC

	3. Produce a CTE that can return the upward recommendation chain for any member

	WITH RECURSIVE recommenders(memid, recommender) AS (
	SELECT memid, recommendedby AS recommender
	FROM cd.members
	UNION ALL
	SELECT r.memid, recommendedby
	FROM recommenders r, cd.members m
	WHERE r.recommender = m.memid
	)

	SELECT r.memid, recommender, firstname, surname
	FROM recommenders r
	INNER JOIN cd.members m
	ON r.recommender = m.memid
	WHERE r.memid IN (12, 22)
	ORDER BY r.memid ASC, r.recommender DESC
	1. Retrieve everything from a table

	SELECT * FROM cd.facilities

	2. Retrieve specific columns from a table

	SELECT name, membercost FROM cd.facilities

	3. Control which rows are retrieved

	SELECT * FROM cd.facilities
	WHERE membercost > 0

	4. Control which rows are retrieved - part 2

	SELECT facid, name, membercost, monthlymaintenance FROM cd.facilities
	WHERE membercost > 0
	AND membercost < (monthlymaintenance / 50)

	5. Basic string searches

	# Using LIKE

	SELECT *
	FROM cd.facilities
	WHERE name LIKE '%Tennis%'

	# Using SIMILAR TO

	SELECT *
	FROM cd.facilities
	WHERE name SIMILAR TO '%Tennis%'

	# Using Regular Expressions

	SELECT *
	FROM cd.facilities
	WHERE name ~ '.Tennis.'

	7. Classify results into buckets

	SELECT name,
	CASE
	WHEN monthlymaintenance > 100 THEN 'expensive'
	ELSE 'cheap' END AS cost
	FROM cd.facilities

	8. Working with dates

	SELECT m.memid, m.surname, m.firstname, m.joindate
	FROM cd.members AS m
	WHERE m.joindate >= date '2012-09-01' -- explicitly cast to a date type

	9. Removing duplicates, and ordering results

	SELECT DISTINCT(surname)
	FROM cd.members
	ORDER BY surname
	LIMIT 10

	10. Combining results from multiple queries

	SELECT surname
	FROM cd.members
	UNION
	SELECT name
	FROM cd.facilities

	11. Simple aggregation

	SELECT MAX(m.joindate) AS lastest_signup_date
	FROM cd.members m

	12. More aggregation

	-- We first do a subquery to get the latest signup date for all members. Then, we use HAVING to limit the
	aggregation to values greater than or equal to this value. Also note that using the primary key of the
	members table (memid) allows us to avoid putting unnecessary columns in the GROUP BY that are in the SELECT,
	but only for presentation purposes.

	SELECT m.firstname, m.surname, MAX(m.joindate) AS latest_signup_date
	FROM cd.members m
	GROUP BY m.memid
	HAVING m.joindate >= (
	SELECT MAX(m2.joindate)
	FROM cd.members m2
	)
	1. Format the names of members

	-- using the concat_ws function
	SELECT concat_ws(', ', surname, firstname) AS name
	FROM cd.members

	-- we can also use the concat function

	SELECT concat(surname, ', ', firstname) AS name
	FROM cd.members

	-- or the \|\| operator

	SELECT surname \|\| ', ' \|\| firstname AS name
	FROM cd.members

	2. Find facilities by a name prefix

	-- using LIKE

	SELECT *
	FROM cd.facilities
	WHERE name LIKE 'Tennis%'

	-- using regexes

	SELECT *
	FROM cd.facilities
	WHERE name ~ '^Tennis'

	3. Perform a case-insensitive search

	-- using regexs
	SELECT *
	FROM cd.facilities
	WHERE name ~* '^tennis'

	-- using ILIKE

	SELECT *
	FROM cd.facilities
	WHERE name ILIKE 'Tennis%'

	-- using lower()

	SELECT *
	FROM cd.facilities
	WHERE lower(name) LIKE 'tennis%'

	-- using upper()

	SELECT *
	FROM cd.facilities
	WHERE upper(name) LIKE 'TENNIS%'

	4. Find telephone numbers with parentheses

	SELECT memid, telephone
	FROM cd.members
	WHERE telephone SIMILAR TO '\([0-9]{3}\)%'
	ORDER BY memid

	5. Pad zip codes with leading zeroes

	-- using to_char

	SELECT to_char(zipcode, '09999') AS formatted_zip_code
	FROM cd.members
	ORDER BY formatted_zip_code

	-- Postgres-specific casting

	SELECT lpad(zipcode::text, 5, '0') AS formatted_zip_code
	FROM cd.members
	ORDER BY formatted_zip_code

	-- SQL-compliant casting
	SELECT lpad(cast(zipcode AS char(5)), 5, '0') AS formatted_zip_code
	FROM cd.members
	ORDER BY formatted_zip_code

	6. Count the number of members whose surname starts with each letter of the alphabet

	SELECT upper(left(surname, 1)) AS letter, COUNT(*)
	FROM cd.members
	GROUP BY letter
	ORDER BY letter

	7. Clean up telephone numbers

	-- using regex_replace

	SELECT memid, regexp_replace(telephone, '[\s\(\)-]', '', 'g') AS telephone
	FROM cd.members
	ORDER BY memid

	-- using translate

	SELECT memid, translate(telephone, ' ()-', '') AS telephone
	FROM cd.members
	ORDER BY memid