Skip to content

Instantly share code, notes, and snippets.

@ciardullo-apps
Created October 11, 2019 00:26
Show Gist options
  • Save ciardullo-apps/bfbe81beab28d48d0908915de5dc8893 to your computer and use it in GitHub Desktop.
Save ciardullo-apps/bfbe81beab28d48d0908915de5dc8893 to your computer and use it in GitHub Desktop.
chinook-2.sql
-- ------------------------
-- Demonstrates SQL statements in MySQL, such as
-- Subqueries
-- JOIN and GROUP BY
-- OUTER JOIN
-- Named subqueries
-- Correlated subqueries
-- CASE Statements
-- Window Functions
-- ROLLUP extension for GROUP BY
-- ------------------------
-- ------------------------
-- Subqueries
-- ------------------------
-- Show count of unsold tracks
SELECT count(t.TrackId)
FROM Track t
WHERE t.TrackId NOT IN (SELECT DISTINCT TrackId FROM InvoiceLine);
-- ------------------------
-- JOIN and GROUP BY
-- ------------------------
-- Show count of unsold tracks using an outer join
SELECT count(t.TrackId)
FROM Track t LEFT OUTER JOIN InvoiceLine l USING (TrackId)
WHERE l.Trackid IS NULL;
-- Show all tracks and total sales, 0 for unsold tracks
SELECT t.TrackId, SUM(IFNULL(Quantity, 0)), SUM(iFnull(l.UnitPrice, 0) * iFnull(Quantity, 0))
FROM Track t
LEFT OUTER JOIN InvoiceLine l USING (TrackId)
GROUP BY t.TrackId
ORDER BY 1 desc;
-- Show sales by month and year
select year(invoicedate), month(invoicedate), count(*), sum(total)
from Invoice
group by year(invoicedate), month(invoicedate);
-- List number of tracks sold and total sales for each country
SELECT c.Country, COUNT(*) as ct, sum(Quantity), sum(Total) as tt
From Customer c JOIN Invoice i on i.CustomerId = c.CustomerId
JOIN InvoiceLine l ON i.InvoiceId = l.InvoiceId
GROUP BY c.Country;
-- List number of tracks sold and total sales for each genre
-- Join with subquery for name
SELECT Name, ct, tt
FROM Genre JOIN (
SELECT g.GenreId, COUNT(*) as ct, SUM(Quantity), SUM(l.UnitPrice * Quantity) as tt
From Genre g JOIN Track t on t.GenreId = g.GenreId
JOIN InvoiceLine l on l.TrackId = t.TrackId
GROUP BY GenreId
) gg ON Genre.GenreId = gg.GenreId;
-- Which customer bought the most TV Shows songs?
SELECT CustomerId, COUNT(*) as totes
FROM Customer c
JOIN Invoice i USING(CustomerId)
JOIN InvoiceLine l USING (InvoiceId)
JOIN Track t USING (TrackId)
JOIN Genre g USING (GenreId)
WHERE g.Name = 'TV Shows'
GROUP BY CustomerId
ORDER BY totes desc LIMIT 1;
-- Which sales agent made the most in sales over all?
-- Join with subquery for name
SELECT firstname, lastname, total_sales
FROM Employee e
JOIN (
SELECT SupportRepId, SUM(Total) as total_sales
FROM Customer c
JOIN Invoice i on c.CustomerId = i.CustomerId
GROUP BY SupportRepId) a
ON e.EmployeeId = a.SupportRepId;
-- Show all albums with no tracks sold
SELECT m.AlbumId, count(l.TrackId)
FROM InvoiceLine l
JOIN Track t USING(TrackId)
RIGHT OUTER JOIN Album m using(AlbumId)
GROUP BY m.AlbumId
HAVING COUNT(l.TrackId) = 0;
-- Show tracks from Album ID 1 and number of times they sold, 0 if unsold
-- Outer Join with named subquery
select name, total_tracks_sold
from Track JOIN (
select t.trackid, count(il.InvoiceLineId) as total_tracks_sold
From Track t left outer join InvoiceLine il
on t.trackid = il.trackid
WHERE AlbumId = 1
group by t.trackid) a ON Track.trackid = a.trackid;
-- All Customer First name and Invoice total for invoices not containing tracks in genres Rock, Metal, Alternative & Punk, or Latin
SELECT c.CustomerId, c.FirstName, SUM(Total)
FROM Customer c JOIN Invoice i on c.CustomerId = i.CustomerId
JOIN InvoiceLine l on i.InvoiceId = l.InvoiceId
Join Track t on l.TrackId = t.TrackId
JOIN Genre g on t.GenreId = g.GenreId
WHERE g.Name NOT IN ('Rock', 'Metal', 'Alternative & Punk', 'Latin')
GROUP BY c.CustomerId, c.FirstName;
-- Does every album have at least one track sold?
-- No, there are 43 albums with no tracks sold
SELECT COUNT(*) FROM Album; -- Returns 347
SELECT COUNT(DISTINCT m.AlbumId) -- Returns 304
FROM Album m
JOIN Track t USING(AlbumId)
JOIN InvoiceLine l USING(TrackId);
-- ------------------------
-- Correlated Subquery
-- ------------------------
-- All Customer First name and Invoice total for customers whose sales rep is not Jane (emp id 3) or Margaret (4).
-- Should return only customers whose salesrep is Steve (5)
SELECT c1.FirstName, i.Total, i.InvoiceId
FROM Invoice i
JOIN Customer c1 on i.CustomerId = c1.CustomerId
WHERE NOT EXISTS (
SELECT *
FROM Customer c2
JOIN Employee e on c2.SupportRepId = e.EmployeeId
WHERE c1.CustomerId = c2.CustomerId
AND e.FirstName IN ('Jane', 'Margaret')
)
ORDER BY InvoiceId;
-- All Customer First name for customers who have never bought a Latin track
SELECT c.CustomerId, c.FirstName
FROM Customer c
WHERE NOT EXISTS (
SELECT *
FROM Invoice i
JOIN InvoiceLine l on i.InvoiceId = l.InvoiceId
JOIN Track t on l.TrackId = t.TrackId
JOIN Genre g on t.GenreId = g.GenreId
WHERE c.CustomerId = i.CustomerId AND
g.Name IN ('Latin')
);
-- Test for above query. The customerids in the where clause were returned by the above query,
-- You should not see the Genre specified in the where clause of the above query
select distinct g.name from Invoice i
join InvoiceLine l on i.invoiceid = l.invoiceid
join Track t on l.TrackId = t.TrackId
join Genre g on t.GenreId = g.GenreId
where customerid in (8, 30, 43)
order by 1;
-- Correlated Subquery
-- Every track and total sales for all tracks not Rock, Metal, Alternative & Punk, or Latin
SELECT t1.TrackId, count(*), SUM(l.UnitPrice * Quantity)
FROM Track t1 JOIN InvoiceLine l on t1.TrackId = l.TrackId
WHERE NOT EXISTS (
SELECT *
FROM Track t2 JOIN Genre g on t2.GenreId = g.GenreId
WHERE t1.TrackId = t2.TrackId
AND g.Name IN ('Rock', 'Metal', 'Alternative & Punk', 'Latin')
)
GROUP BY t1.TrackId
order by 1;
-- ------------------------
-- CASE Statements
-- ------------------------
-- SEARCHED CASE statement
SELECT c.FirstName,
c.Country,
CASE
WHEN c.Country IN ('USA', 'Canada', 'Mexico') THEN 'North America'
ELSE 'Elsewhere'
END as Continent
FROM Customer c;
-- SEARCHED CASE statement
SELECT Name,
CASE
WHEN UnitPrice < 1.00 THEN 'Economy'
WHEN UnitPrice BETWEEN 1.00 and 2.00 THEN 'Average'
ELSE 'Expensive'
END as 'Category'
FROM Track;
-- SIMPLE CASE
SELECT Quantity,
CASE Quantity
WHEN 1 THEN 'ONE'
WHEN 2 THEN 'TWO'
ELSE 'MORE'
END as ASD
FROM InvoiceLine;
-- ------------------------
-- DATE functions in MySQL
-- ------------------------
-- Show sales by day of week
select dayofweek(invoicedate) as dow, count(*) as ct, sum(total) as sm
from Invoice
group by dayofweek(invoicedate)
order by 1;
-- Show sales by day of week name (uses above as named subquery)
select
CASE dow
WHEN 1 THEN 'Sunday'
WHEN 2 THEN 'Monday'
WHEN 3 THEN 'Tuesday'
WHEN 4 THEN 'Wednesday'
WHEN 5 THEN 'Thursday'
WHEN 6 THEN 'Friday'
WHEN 7 THEN 'Saturday'
END as ddow,
ct, sm
FROM (
select dayofweek(invoicedate) as dow, count(*) as ct, sum(total) as sm
from Invoice
group by dayofweek(invoicedate)
) a
order by dow;
-- ------------------------
-- Window Functions
-- ------------------------
-- Customer activity by year/month/customer with totals per customer
SELECT customerid, year(invoicedate), month(invoicedate), count(*), sum(total), sum(sum(total)) over(partition by customerid)
FROM Invoice
GROUP BY customerid, year(invoicedate), month(invoicedate)
order by CustomerId, year(invoicedate), month(invoicedate);
-- Window Functions: shows sales by year/month with additional column for totals per year (the WINDOW)
SELECT year(invoicedate), month(invoicedate), count(*), sum(total), sum(sum(total)) over(partition by year(invoicedate))
FROM Invoice
GROUP BY year(invoicedate), month(invoicedate)
order by year(invoicedate), month(invoicedate);
-- ------------------------
-- ROLLUP
-- ------------------------
-- ROLLUP: shows totals by year and month with additional rows for totals by year and grand totals
SELECT year(invoicedate) as yy, month(invoicedate) as mm, sum(total) as tot
FROM Invoice
GROUP BY year(invoicedate), month(invoicedate) WITH ROLLUP
ORDER BY -yy desc, -mm desc; -- for nulls last, prefix column with minus and switch to desc
-- Which customer has the most diverse taste in music? I.e. bought songs in the most genres?
-- Customer 57 bought songs in 12 of the 25 genres.
SELECT CustomerId, COUNT(DISTINCT g.GenreId) as totes
FROM Customer c
JOIN Invoice i USING(CustomerId)
JOIN InvoiceLine l USING (InvoiceId)
JOIN Track t USING (TrackId)
JOIN Genre g USING (GenreId)
GROUP BY CustomerId
ORDER BY totes desc LIMIT 1;
-- Test for above - returns the same number of rows as the count from the above query
SELECT g.Name, count(*)
FROM Customer c
JOIN Invoice i USING(CustomerId)
JOIN InvoiceLine l USING (InvoiceId)
JOIN Track t USING (TrackId)
JOIN Genre g USING (GenreId)
WHERE c.CustomerId = 57
GROUP BY g.Name;
-- ------------------------
-- Putting it all together with...Led Zeppelin!
-- ------------------------
-- Show all Led Zeppelin tracks sold
SELECT DISTINCT t.trackid
FROM Album m
JOIN Track t ON m.albumid = t.albumid
JOIN InvoiceLine il ON t.trackid = il.trackid
JOIN Artist r ON m.artistid = r.artistid
WHERE r.name = 'Led Zeppelin';
-- Show all Led Zeppelin songs that have sold more than once
SELECT l.TrackId, COUNT(*)
FROM Artist r
JOIN Album m USING(ArtistId)
JOIN Track t USING(AlbumId)
JOIN InvoiceLine l USING(TrackId)
WHERE r.name = 'Led Zeppelin'
GROUP BY l.TrackId
HAVING COUNT(*) > 1
ORDER BY l.TrackId;
-- Show all Led Zeppelin albums with at least one track being sold more than once
SELECT albumid, count(*) as TotalTracksSold, count(DISTINCT l.TrackId) as UniqueTotalTracksSold
from Artist r
JOIN Album m USING(ArtistId)
JOIN Track t using(AlbumId)
JOIN InvoiceLine l USING(TrackId)
WHERE r.name = 'Led Zeppelin'
GROUP BY AlbumId
HAVING count(*) != count(DISTINCT l.TrackId);
-- All artists with more albums than Led Zeppelin
-- JOIN with GROUP BY and HAVING with subquery, use named subquery for name
SELECT name, NumAlbums
FROM Artist
JOIN (
SELECT ArtistId, COUNT(AlbumId) NumAlbums
FROM Album
GROUP BY ArtistId
HAVING COUNT(AlbumId) > (
SELECT COUNT(al.AlbumId)
FROM Album al JOIN Artist ar ON al.ArtistId = ar.ArtistId
WHERE ar.Name IN ('Led Zeppelin')
GROUP BY ar.ArtistId )
) b ON Artist.artistid = b.artistid;
-- Use correlated subquery to show all unsold Led Zeppelin track ids with album ids
SELECT m.AlbumId, t.TrackId
FROM Artist r
JOIN Album m USING(ArtistId)
JOIN Track t using(AlbumId)
WHERE r.name = 'Led Zeppelin'
AND NOT EXISTS
(SELECT *
FROM InvoiceLine l
WHERE l.TrackId = t.TrackId);
-- Same query as above, with album and track names
select m2.Title, t2.TrackId, t2.Name
from Album m2
JOIN Track t2 USING(AlbumId)
JOIN (
SELECT m.AlbumId, t.TrackId
FROM Artist r
JOIN Album m USING(ArtistId)
JOIN Track t using(AlbumId)
WHERE r.name = 'Led Zeppelin'
AND NOT EXISTS
(SELECT *
FROM InvoiceLine l
WHERE l.TrackId = t.TrackId)
) a
ON t2.TrackId = a.TrackId;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment