Skip to content

Instantly share code, notes, and snippets.

@paulghaddad
Last active August 15, 2018 12:18
Show Gist options
  • Save paulghaddad/9a257f24429f81ea1f573a871aa6ddf6 to your computer and use it in GitHub Desktop.
Save paulghaddad/9a257f24429f81ea1f573a871aa6ddf6 to your computer and use it in GitHub Desktop.
PG Exercises
1. Count the number of facilities
SELECT COUNT(*)
FROM cd.facilities
2. Count the number of expensive facilities
SELECT COUNT(*)
FROM cd.facilities
WHERE guestcost > 10
3. Count the number of recommendations each member makes.
SELECT recommendedby, COUNT(*)
FROM cd.members
WHERE recommendedby IS NOT NULL
GROUP BY recommendedby
ORDER BY recommendedby
4. List the total slots booked per facility
SELECT facid, SUM(slots) AS total_slots
FROM cd.bookings
GROUP BY facid
ORDER BY facid
5. List the total slots booked per facility in a given month
SELECT facid, SUM(slots) AS "Total Slots"
FROM cd.bookings
WHERE starttime BETWEEN '2012-09-01' AND '2012-10-01'
GROUP BY facid
ORDER BY "Total Slots"
6. List the total slots booked per facility per month
SELECT facid, date_part('month', starttime) AS month, SUM(slots) AS "Total Slots"
FROM cd.bookings
WHERE starttime BETWEEN '2012-01-01' AND '2012-12-31'
GROUP BY facid, month
ORDER BY facid, month
7. Find the count of members who have made at least one booking
-- we don't necessarily need to also select from the members table
-- but it ensures all members are connected to a booking
SELECT COUNT(DISTINCT m.memid)
FROM cd.members m, cd.bookings b
WHERE m.memid = b.memid
8. List facilities with more than 1000 slots booked
SELECT facid, SUM(slots) AS total_slots
FROM cd.bookings
GROUP BY facid
HAVING SUM(slots) > 1000
ORDER BY facid
9. Find the total revenue of each facility
SELECT f.name, SUM(
CASE
WHEN memid = 0
THEN guestcost * slots
ELSE membercost * slots
END
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY f.name
ORDER BY revenue
10. Find facilities with a total revenue less than 1000
-- Approach 1: Use HAVING
SELECT name, SUM(
CASE
WHEN memid = 0 THEN slots * guestcost
ELSE slots * membercost
END
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY facid
HAVING SUM(
CASE
WHEN memid = 0 THEN slots * guestcost
ELSE slots * membercost
END
) < 1000
ORDER BY revenue
-- Approach 2: Using a subquery and then apply a WHERE clause outside the subquery
SELECT name, revenue FROM (
SELEct name,
SUM(
CASE
WHEN memid = 0 THEN slots * guestcost
ELSE slots * membercost
END
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY facid
) as revenue_subquery
WHERE revenue < 1000
ORDER BY revenue
11. Output the facility id that has the highest number of slots booked
-- Select facilities whose total slots match the highest from the collection of facilities
WITH slots_booked_per_facility AS (
SELECT facid, SUM(slots) as total_slots
FROM cd.bookings
GROUP BY facid
)
SELECT *
FROM slots_booked_per_facility
WHERE total_slots = (
SELECT MAX(total_slots) FROM slots_booked_per_facility
)
12. List the total slots booked per facility per month, part 2
SELECT facid, date_part('month', starttime) as month,
SUM(slots) as total_slots
FROM cd.bookings
WHERE date_part('year', starttime) = 2012
-- This will generate aggregate stats for unique combinations
-- of facid and month, including when the month column iS NULL (resulting
-- in a sum of all months for a given facility, and also when the facid and
-- month columns are NULL, resulting in total for all months across all facilities
GROUP BY ROLLUP (facid, month)
ORDER BY facid, month
13. List the total hours booked per named facility
SELECT f.facid, name, to_char(SUM(slots) * 0.5, '99,999.99') as total_hours
FROM cd.bookings b, cd.facilities f
WHERE b.facid = f.facid
GROUP BY f.facid, name
ORDER BY facid
14. List each member's first booking after September 1st 2012
SELECT surname, firstname, memid, MIN(starttime) as starttime
FROM cd.bookings b
INNER JOIN cd.members m
USING (memid)
WHERE starttime >= '2012-09-01'
GROUP BY memid, surname, firstname
ORDER BY memid
15. Produce a list of member names, with each row containing the total member count
SELECT COUNT(*) OVER(), firstname, surname
FROM cd.members
ORDER BY joindate
16. Produce a numbered list of members
SELECT ROW_NUMBER() OVER (
ORDER BY joindate
),
firstname, surname
FROM cd.members
17. Output the facility id that has the highest number of slots booked, again
-- RANK() will output identical numbers with gaps in the event of ties
-- So in this example, it is preferable to DENSE_RANK()
WITH ranking_of_facility_bookings AS (
SELECT RANK() OVER (
ORDER BY SUM(slots) DESC
), facid, SUM(slots) AS total
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY facid
)
SELECT facid, total
FROM ranking_of_facility_bookings
WHERE rank = 1
18. Rank members by (rounded) hours used
SELECT firstname, surname, round(SUM(slots * 0.5), -1) As hours,
RANK() OVER (
ORDER BY round(SUM(slots * 0.5), -1) DESC
) AS rank
FROM cd.members m
INNER JOIN cd.bookings b
USING (memid)
GROUP BY memid
ORDER BY rank, surname, firstname
19. Find the top three revenue generating facilities
WITH revenue_by_facility AS (
SELECT f.name,
SUM(
CASE WHEN memid = 1
THEN slots * guestcost
ELSE slots * membercost
END
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
INNER JOIN cd.members m
USING (memid)
GROUP BY f.name
)
SELECT name, RANK() OVER (
ORDER BY revenue DESC
)
FROM revenue_by_facility
LIMIT 3
20. Classify facilities by value
WITH facility_rank_by_revenue AS (
SELECT name,
NTILE(3) OVER (
ORDER BY SUM(
CASE WHEN memid = 1
THEN guestcost * slots
ELSE membercost * slots
END
) DESC
) AS revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
GROUP BY name
ORDER BY revenue, name
)
SELECT name,
(CASE
WHEN revenue = 1 THEN 'high'
WHEN revenue = 2 THEN 'average'
ELSE 'low'
END) AS revenue
FROM facility_rank_by_revenue
21. Calculate the payback time for each facility
WITH facility_cost_profile AS (
SELECT DISTINCT name, initialoutlay, monthlymaintenance, SUM((
CASE
WHEN memid = 0 THEN guestcost * slots
ELSE membercost * slots
END) / 3)
OVER (
ORDER BY name
) AS monthly_revenue
FROM cd.facilities f
INNER JOIN cd.bookings b
USING (facid)
)
SELECT name, (
initialoutlay / (monthly_revenue - monthlymaintenance)
) as months
FROM facility_cost_profile
ORDER BY name
22. Calculate a rolling average of total revenue
WITH daily_revenue AS (
SELECT DISTINCT starttime::date as date,
SUM(
CASE
WHEN memid = 0 THEN guestcost * slots
ELSE membercost * slots
END
) AS revenue
FROM cd.bookings b
INNER JOIN cd.facilities f
USING (facid)
GROUP BY date
ORDER BY date
), avg_daily_revenue AS (
SELECT date,
AVG(revenue) OVER (
ROWS BETWEEN 14 PRECEDING AND CURRENT ROW
) AS revenue
FROM daily_revenue
)
SELECT * FROM avg_daily_revenue
WHERE date BETWEEN '2012-08-01' AND '2012-08-31'
3. Generate a list of all the dates in October 2012
SELECT * FROM generate_series(timestamp '2012-10-01 00:00:00', timestamp '2012-10-31 00:00:00', '1 day')
1. Produce a timestamp for 1 a.m. on the 31st of August 2012
-- Two approaches
-- Use a function
SELECT make_timestamp(2012, 8, 31, 1, 0, 0)
-- Create a record with a timestamp data type
SELECT timestamp '2012-08-31 01:00:00'
2. Subtract timestamps from each other
SELECT timestamp '2012-08-31 01:00:00' - timestamp '2012-07-30 01:00:00' AS interval
3. Generate a list of all the dates in October 2012
SELECT * FROM generate_series(timestamp '2012-10-01 00:00:00', timestamp '2012-10-31 00:00:00', '1 day')
4. Get the day of the month from a timestamp
SELECT date_part('day', timestamp '2012-08-31')
OR
-- extract and date_part are equivalent
SELECT extract('day', timestamp '2012-08-31')
5. Work out the number of seconds between timestamps
SELECT EXTRACT(EPOCH FROM timestamp '2012-09-02 00:00:00' - timestamp '2012-08-31 01:00:00')
6. Work out the number of days in each month of 2012
SELECT date_part('month', current_month) AS current_month,
(current_month + interval '1 month') - current_month AS length
FROM generate_series(timestamp '2012-01-01 00:00', timestamp '2012-12-31 23:59', '1 month') AS current_month
7. Work out the number of days remaining in the month
SELECT (date_trunc('month', timestamp '2012-02-11 01:00:00') + '1 month') -
date_trunc('day', timestamp '2012-02-11 01:00:00') AS remaining
-- or using a subquery
SELECT (date_trunc('month', ts) + '1 month') - date_trunc('day', ts)
FROM (SELECT timestamp '2012-02-11 01:00:00' As ts) AS my_time
8. Work out the end time of bookings
SELECT starttime, (starttime + slots * interval '30 minutes') As endtime
FROM cd.bookings
ORDER BY endtime DESC, starttime DESC
LIMIT 10
9. Return a count of bookings for each month
SELECT date_trunc('month', starttime) AS month, COUNT(*)
FROM cd.bookings
GROUP BY month
ORDER BY month
10. Work out the utilisation percentage for each facility by month
WITH booking_detail AS (
SELECT name, date_trunc('month', starttime) AS month, starttime, slots,
starttime + (slots * INTERVAL '30 minutes') AS endtime
FROM cd.facilities
INNER JOIN cd.bookings
USING (facid)
), time_booked_per_appt AS (
SELECT *,
(endtime - starttime) AS booking_duration,
EXTRACT(EPOCH FROM (endtime - starttime)) AS booking_duration_in_sec
FROM booking_detail
), monthly_availability AS (
SELECT month, (month + INTERVAL '1 month') - month AS length,
EXTRACT(EPOCH FROM make_time(20, 30, 0) - make_time(8, 0, 0)) AS daily_availability
FROM booking_detail
GROUP BY month
), time_utilized_by_month AS (
SELECT name, month, SUM(booking_duration_in_sec) AS time_utilized
FROM time_booked_per_appt
GROUP BY name, month
)
SELECT name, tubm.month,
round(CAST (time_utilized / (extract('day' from length) * daily_availability) * 100 AS numeric), 1) AS utilization
FROM time_utilized_by_month tubm
INNER JOIN monthly_availability ma
ON tubm.month = ma.month
ORDER BY name, month
1. Retrieve the start times of members' bookings
SELECT b.starttime
FROM cd.members m
INNER JOIN cd.bookings b
ON m.memid = b.memid
WHERE m.firstname = 'David' AND m.surname = 'Farrell'
2. Work out the start times of bookings for tennis courts
SELECT starttime AS start, name
FROM cd.bookings b
INNER JOIN cd.facilities f
ON b.facid = f.facid
WHERE name LIKE 'Tennis%'
AND date_trunc('day', b.starttime) = '2012-09-21'
ORDER BY start
3. Produce a list of all members who have recommended another member
-- Method 1: Semi-join
SELECT firstname, surname
FROM cd.members
WHERE memid IN (
SELECT DISTINCT(recommendedby)
FROM cd.members
WHERE recommendedby IS NOT NULL
)
ORDER BY surname, firstname
-- Method 2: Self-join using a FROM clause
SELECT DISTINCT m1.firstname, m1.surname
FROM cd.members m1, cd.members m2
WHERE m1.memid = m2.recommendedby
ORDER BY m1.surname, m1.surname
-- Method 3: Self-join using an INNER JOIN
SELECT DISTINCT m1.firstname, m1.surname
FROM cd.members m1
INNER JOIN cd.members m2
ON m1.memid = m2.recommendedby
ORDER BY m1.surname, m1.surname
4. Produce a list of all members, along with their recommender
SELECT m1.firstname AS memfname, m1.surname AS memsname, m2.firstname AS recfname, m2.surname AS recsname
FROM cd.members m1
LEFT OUTER JOIN cd.members m2
ON m1.recommendedby = m2.memid
ORDER BY memsname, memfname
5. Produce a list of all members who have used a tennis court
SELECT DISTINCT m.firstname || ' ' || m.surname AS member, f.name AS facility
FROM cd.members AS m
INNER JOIN cd.bookings AS b
USING (memid)
INNER JOIN cd.facilities AS f
ON b.facid = f.facid AND f.name LIKE 'Tennis%' -- placing the condition in the join clause but could have used a WHERE clause too
ORDER BY member
6. Produce a list of costly bookings
SELECT m.firstname || ' ' || m.surname AS member, name,
CASE WHEN m.surname = 'GUEST' THEN slots * guestcost
ELSE slots * membercost
END AS cost
FROM cd.members AS m
INNER JOIN cd.bookings AS b
USING (memid)
INNER JOIN cd.facilities AS f
USING (facid)
WHERE date_trunc('day', starttime) = '2012-09-14'
-- the WHERE clause is applied before the SELECT,
-- so we can't refer to the alias name, ie cost > 30
AND (
CASE WHEN m.surname = 'GUEST' THEN slots * guestcost
ELSE slots * membercost
END
) > 30
ORDER BY cost DESC
7. Produce a list of all members, along with their recommender, using no joins.
-- Use a correlated subquery within a SELECT clause to fetch the recommender
-- for each member. The subquery is run for each record in the outer query
SELECT DISTINCT m.firstname || ' ' || m.surname AS member, (
SELECT recommenders.firstname || ' ' || recommenders.surname
FROM cd.members AS recommenders
WHERE m.recommendedby = recommenders.memid) AS recommender
FROM cd.members m
ORDER BY member ASC
8. Produce a list of costly bookings, using a subquery
-- booking costs is a temporary table that computes each bookings' cost.
-- This temporary table is then join to the members table to link it to the member's name
-- I prefer this to the solution given because the booking costs are separate from
-- the members table and there's one less join in the subquery
SELECT concat_ws(' ', m.firstname, m.surname ) AS member, name, cost
FROM cd.members m,
(SELECT memid, name, starttime,
CASE WHEN b.memid = 0 THEN (f.guestcost * b.slots)
ELSE (f.membercost * b.slots)
END AS cost
FROM cd.bookings b
INNER JOIN cd.facilities f
USING (facid)) AS booking_costs
WHERE m.memid = booking_costs.memid
AND cost > 30
AND date_trunc('day', booking_costs.starttime) = '2012-09-14'
ORDER BY cost DESC
1. Insert some data into a table
Approach 1:
INSERT INTO cd.facilities VALUES
(9, 'Spa', 20, 30, 100000, 800)
Approach 2: Specify column names
INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance)
VALUES
(9, 'Spa', 20, 30, 100000, 800)
2. Insert multiple rows of data into a table
INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) VALUES
(9, 'Spa', 20, 30, 100000, 800),
(10, 'Squash Court 2', 3.5, 17.5, 5000, 80)
3. Insert calculated data into a table
Approach 1: Use the SELECT in place of VALUES
INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) SELECT
(SELECT MAX(facid) FROM cd.facilities) + 1, 'Spa', 20, 30, 100000, 800
Approach 2: Use VALUES with computed data
INSERT INTO cd.facilities (facid, name, membercost, guestcost, initialoutlay, monthlymaintenance) VALUES
((SELECT MAX(facid) + 1 FROM cd.facilities), 'Spa', 20, 30, 100000, 800)
4. Update some existing data
UPDATE cd.facilities
SET initialoutlay = 10000
WHERE facid = 1
5. Update multiple rows and columns at the same time
UPDATE cd.facilities
SET membercost = 6, guestcost = 30
WHERE name LIKE 'Tennis Court%'
6. Update a row based on the contents of another row
-- Use UPDATE FROM
UPDATE cd.facilities AS court_2
SET membercost = 1.1 * court_1.membercost,
guestcost = 1.1 * court_1.guestcost
FROM (SELECT * FROM cd.facilities WHERE facid = 0) AS court_1
WHERE court_2.facid = 1
7. Delete all bookings
DELETE FROM cd.bookings
OR
TRUNCATE cd.bookings
8. Delete a member from the cd.members table
DELETE FROM cd.members WHERE memid = 37
9. Delete based on a subquery
DELETE FROM cd.members
WHERE memid NOT IN (
SELECT DISTINCT memid
FROM cd.bookings
)
1. Find the upward recommendation chain for member ID 27
-- Solution 1: Use a INNER JOIN
WITH RECURSIVE recommenders(recommender) AS (
-- Return the first record that will be UNIONed with subsequent iterations
SELECT recommendedby AS recommender
FROM cd.members
WHERE memid = 27
UNION ALL
-- Add another record during each iteration by joining the recommendedby
-- ID in the table above with the memid of the table below
SELECT m.recommendedby AS recommender
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
)
-- JOIN the ids of the recommenders table to the members table to get info on them
SELECT recommender, firstname, surname
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
ORDER BY recommender DESC
-- Solution 2: Use a subquery in the table that is UNIONed
WITH RECURSIVE recommenders(recommender) AS (
SELECT recommendedby AS recommender
FROM cd.members
WHERE memid = 27
UNION ALL
SELECT m.recommendedby AS recommender
FROM recommenders r, cd.members m
WHERE r.recommender = m.memid
)
SELECT recommender, firstname, surname
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
ORDER BY recommender DESC
2. Find the downward recommendation chain for member ID 1
-- Here I'm using a subquery in the FROM statement, but we can
-- just as easily use an INNER JOIN
WITH RECURSIVE recommendees(memid, firstname, surname) AS (
SELECT memid, firstname, surname
FROM cd.members
WHERE recommendedby = 1
UNION ALL
SELECT m.memid, m.firstname, m.surname
FROM recommendees r, cd.members m
WHERE r.memid = m.recommendedby
)
SELECT * FROM recommendees
ORDER BY memid ASC
3. Produce a CTE that can return the upward recommendation chain for any member
WITH RECURSIVE recommenders(memid, recommender) AS (
SELECT memid, recommendedby AS recommender
FROM cd.members
UNION ALL
SELECT r.memid, recommendedby
FROM recommenders r, cd.members m
WHERE r.recommender = m.memid
)
SELECT r.memid, recommender, firstname, surname
FROM recommenders r
INNER JOIN cd.members m
ON r.recommender = m.memid
WHERE r.memid IN (12, 22)
ORDER BY r.memid ASC, r.recommender DESC
1. Retrieve everything from a table
SELECT * FROM cd.facilities
2. Retrieve specific columns from a table
SELECT name, membercost FROM cd.facilities
3. Control which rows are retrieved
SELECT * FROM cd.facilities
WHERE membercost > 0
4. Control which rows are retrieved - part 2
SELECT facid, name, membercost, monthlymaintenance FROM cd.facilities
WHERE membercost > 0
AND membercost < (monthlymaintenance / 50)
5. Basic string searches
# Using LIKE
SELECT *
FROM cd.facilities
WHERE name LIKE '%Tennis%'
# Using SIMILAR TO
SELECT *
FROM cd.facilities
WHERE name SIMILAR TO '%Tennis%'
# Using Regular Expressions
SELECT *
FROM cd.facilities
WHERE name ~ '.*Tennis.*'
7. Classify results into buckets
SELECT name,
CASE
WHEN monthlymaintenance > 100 THEN 'expensive'
ELSE 'cheap' END AS cost
FROM cd.facilities
8. Working with dates
SELECT m.memid, m.surname, m.firstname, m.joindate
FROM cd.members AS m
WHERE m.joindate >= date '2012-09-01' -- explicitly cast to a date type
9. Removing duplicates, and ordering results
SELECT DISTINCT(surname)
FROM cd.members
ORDER BY surname
LIMIT 10
10. Combining results from multiple queries
SELECT surname
FROM cd.members
UNION
SELECT name
FROM cd.facilities
11. Simple aggregation
SELECT MAX(m.joindate) AS lastest_signup_date
FROM cd.members m
12. More aggregation
-- We first do a subquery to get the latest signup date for all members. Then, we use HAVING to limit the
aggregation to values greater than or equal to this value. Also note that using the primary key of the
members table (memid) allows us to avoid putting unnecessary columns in the GROUP BY that are in the SELECT,
but only for presentation purposes.
SELECT m.firstname, m.surname, MAX(m.joindate) AS latest_signup_date
FROM cd.members m
GROUP BY m.memid
HAVING m.joindate >= (
SELECT MAX(m2.joindate)
FROM cd.members m2
)
1. Format the names of members
-- using the concat_ws function
SELECT concat_ws(', ', surname, firstname) AS name
FROM cd.members
-- we can also use the concat function
SELECT concat(surname, ', ', firstname) AS name
FROM cd.members
-- or the || operator
SELECT surname || ', ' || firstname AS name
FROM cd.members
2. Find facilities by a name prefix
-- using LIKE
SELECT *
FROM cd.facilities
WHERE name LIKE 'Tennis%'
-- using regexes
SELECT *
FROM cd.facilities
WHERE name ~ '^Tennis'
3. Perform a case-insensitive search
-- using regexs
SELECT *
FROM cd.facilities
WHERE name ~* '^tennis'
-- using ILIKE
SELECT *
FROM cd.facilities
WHERE name ILIKE 'Tennis%'
-- using lower()
SELECT *
FROM cd.facilities
WHERE lower(name) LIKE 'tennis%'
-- using upper()
SELECT *
FROM cd.facilities
WHERE upper(name) LIKE 'TENNIS%'
4. Find telephone numbers with parentheses
SELECT memid, telephone
FROM cd.members
WHERE telephone SIMILAR TO '\([0-9]{3}\)%'
ORDER BY memid
5. Pad zip codes with leading zeroes
-- using to_char
SELECT to_char(zipcode, '09999') AS formatted_zip_code
FROM cd.members
ORDER BY formatted_zip_code
-- Postgres-specific casting
SELECT lpad(zipcode::text, 5, '0') AS formatted_zip_code
FROM cd.members
ORDER BY formatted_zip_code
-- SQL-compliant casting
SELECT lpad(cast(zipcode AS char(5)), 5, '0') AS formatted_zip_code
FROM cd.members
ORDER BY formatted_zip_code
6. Count the number of members whose surname starts with each letter of the alphabet
SELECT upper(left(surname, 1)) AS letter, COUNT(*)
FROM cd.members
GROUP BY letter
ORDER BY letter
7. Clean up telephone numbers
-- using regex_replace
SELECT memid, regexp_replace(telephone, '[\s\(\)-]', '', 'g') AS telephone
FROM cd.members
ORDER BY memid
-- using translate
SELECT memid, translate(telephone, ' ()-', '') AS telephone
FROM cd.members
ORDER BY memid
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment