-
-
Save ciardullo-apps/bfbe81beab28d48d0908915de5dc8893 to your computer and use it in GitHub Desktop.
chinook-2.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- ------------------------ | |
-- Demonstrates SQL statements in MySQL, such as | |
-- Subqueries | |
-- JOIN and GROUP BY | |
-- OUTER JOIN | |
-- Named subqueries | |
-- Correlated subqueries | |
-- CASE Statements | |
-- Window Functions | |
-- ROLLUP extension for GROUP BY | |
-- ------------------------ | |
-- ------------------------ | |
-- Subqueries | |
-- ------------------------ | |
-- Show count of unsold tracks | |
SELECT count(t.TrackId) | |
FROM Track t | |
WHERE t.TrackId NOT IN (SELECT DISTINCT TrackId FROM InvoiceLine); | |
-- ------------------------ | |
-- JOIN and GROUP BY | |
-- ------------------------ | |
-- Show count of unsold tracks using an outer join | |
SELECT count(t.TrackId) | |
FROM Track t LEFT OUTER JOIN InvoiceLine l USING (TrackId) | |
WHERE l.Trackid IS NULL; | |
-- Show all tracks and total sales, 0 for unsold tracks | |
SELECT t.TrackId, SUM(IFNULL(Quantity, 0)), SUM(iFnull(l.UnitPrice, 0) * iFnull(Quantity, 0)) | |
FROM Track t | |
LEFT OUTER JOIN InvoiceLine l USING (TrackId) | |
GROUP BY t.TrackId | |
ORDER BY 1 desc; | |
-- Show sales by month and year | |
select year(invoicedate), month(invoicedate), count(*), sum(total) | |
from Invoice | |
group by year(invoicedate), month(invoicedate); | |
-- List number of tracks sold and total sales for each country | |
SELECT c.Country, COUNT(*) as ct, sum(Quantity), sum(Total) as tt | |
From Customer c JOIN Invoice i on i.CustomerId = c.CustomerId | |
JOIN InvoiceLine l ON i.InvoiceId = l.InvoiceId | |
GROUP BY c.Country; | |
-- List number of tracks sold and total sales for each genre | |
-- Join with subquery for name | |
SELECT Name, ct, tt | |
FROM Genre JOIN ( | |
SELECT g.GenreId, COUNT(*) as ct, SUM(Quantity), SUM(l.UnitPrice * Quantity) as tt | |
From Genre g JOIN Track t on t.GenreId = g.GenreId | |
JOIN InvoiceLine l on l.TrackId = t.TrackId | |
GROUP BY GenreId | |
) gg ON Genre.GenreId = gg.GenreId; | |
-- Which customer bought the most TV Shows songs? | |
SELECT CustomerId, COUNT(*) as totes | |
FROM Customer c | |
JOIN Invoice i USING(CustomerId) | |
JOIN InvoiceLine l USING (InvoiceId) | |
JOIN Track t USING (TrackId) | |
JOIN Genre g USING (GenreId) | |
WHERE g.Name = 'TV Shows' | |
GROUP BY CustomerId | |
ORDER BY totes desc LIMIT 1; | |
-- Which sales agent made the most in sales over all? | |
-- Join with subquery for name | |
SELECT firstname, lastname, total_sales | |
FROM Employee e | |
JOIN ( | |
SELECT SupportRepId, SUM(Total) as total_sales | |
FROM Customer c | |
JOIN Invoice i on c.CustomerId = i.CustomerId | |
GROUP BY SupportRepId) a | |
ON e.EmployeeId = a.SupportRepId; | |
-- Show all albums with no tracks sold | |
SELECT m.AlbumId, count(l.TrackId) | |
FROM InvoiceLine l | |
JOIN Track t USING(TrackId) | |
RIGHT OUTER JOIN Album m using(AlbumId) | |
GROUP BY m.AlbumId | |
HAVING COUNT(l.TrackId) = 0; | |
-- Show tracks from Album ID 1 and number of times they sold, 0 if unsold | |
-- Outer Join with named subquery | |
select name, total_tracks_sold | |
from Track JOIN ( | |
select t.trackid, count(il.InvoiceLineId) as total_tracks_sold | |
From Track t left outer join InvoiceLine il | |
on t.trackid = il.trackid | |
WHERE AlbumId = 1 | |
group by t.trackid) a ON Track.trackid = a.trackid; | |
-- All Customer First name and Invoice total for invoices not containing tracks in genres Rock, Metal, Alternative & Punk, or Latin | |
SELECT c.CustomerId, c.FirstName, SUM(Total) | |
FROM Customer c JOIN Invoice i on c.CustomerId = i.CustomerId | |
JOIN InvoiceLine l on i.InvoiceId = l.InvoiceId | |
Join Track t on l.TrackId = t.TrackId | |
JOIN Genre g on t.GenreId = g.GenreId | |
WHERE g.Name NOT IN ('Rock', 'Metal', 'Alternative & Punk', 'Latin') | |
GROUP BY c.CustomerId, c.FirstName; | |
-- Does every album have at least one track sold? | |
-- No, there are 43 albums with no tracks sold | |
SELECT COUNT(*) FROM Album; -- Returns 347 | |
SELECT COUNT(DISTINCT m.AlbumId) -- Returns 304 | |
FROM Album m | |
JOIN Track t USING(AlbumId) | |
JOIN InvoiceLine l USING(TrackId); | |
-- ------------------------ | |
-- Correlated Subquery | |
-- ------------------------ | |
-- All Customer First name and Invoice total for customers whose sales rep is not Jane (emp id 3) or Margaret (4). | |
-- Should return only customers whose salesrep is Steve (5) | |
SELECT c1.FirstName, i.Total, i.InvoiceId | |
FROM Invoice i | |
JOIN Customer c1 on i.CustomerId = c1.CustomerId | |
WHERE NOT EXISTS ( | |
SELECT * | |
FROM Customer c2 | |
JOIN Employee e on c2.SupportRepId = e.EmployeeId | |
WHERE c1.CustomerId = c2.CustomerId | |
AND e.FirstName IN ('Jane', 'Margaret') | |
) | |
ORDER BY InvoiceId; | |
-- All Customer First name for customers who have never bought a Latin track | |
SELECT c.CustomerId, c.FirstName | |
FROM Customer c | |
WHERE NOT EXISTS ( | |
SELECT * | |
FROM Invoice i | |
JOIN InvoiceLine l on i.InvoiceId = l.InvoiceId | |
JOIN Track t on l.TrackId = t.TrackId | |
JOIN Genre g on t.GenreId = g.GenreId | |
WHERE c.CustomerId = i.CustomerId AND | |
g.Name IN ('Latin') | |
); | |
-- Test for above query. The customerids in the where clause were returned by the above query, | |
-- You should not see the Genre specified in the where clause of the above query | |
select distinct g.name from Invoice i | |
join InvoiceLine l on i.invoiceid = l.invoiceid | |
join Track t on l.TrackId = t.TrackId | |
join Genre g on t.GenreId = g.GenreId | |
where customerid in (8, 30, 43) | |
order by 1; | |
-- Correlated Subquery | |
-- Every track and total sales for all tracks not Rock, Metal, Alternative & Punk, or Latin | |
SELECT t1.TrackId, count(*), SUM(l.UnitPrice * Quantity) | |
FROM Track t1 JOIN InvoiceLine l on t1.TrackId = l.TrackId | |
WHERE NOT EXISTS ( | |
SELECT * | |
FROM Track t2 JOIN Genre g on t2.GenreId = g.GenreId | |
WHERE t1.TrackId = t2.TrackId | |
AND g.Name IN ('Rock', 'Metal', 'Alternative & Punk', 'Latin') | |
) | |
GROUP BY t1.TrackId | |
order by 1; | |
-- ------------------------ | |
-- CASE Statements | |
-- ------------------------ | |
-- SEARCHED CASE statement | |
SELECT c.FirstName, | |
c.Country, | |
CASE | |
WHEN c.Country IN ('USA', 'Canada', 'Mexico') THEN 'North America' | |
ELSE 'Elsewhere' | |
END as Continent | |
FROM Customer c; | |
-- SEARCHED CASE statement | |
SELECT Name, | |
CASE | |
WHEN UnitPrice < 1.00 THEN 'Economy' | |
WHEN UnitPrice BETWEEN 1.00 and 2.00 THEN 'Average' | |
ELSE 'Expensive' | |
END as 'Category' | |
FROM Track; | |
-- SIMPLE CASE | |
SELECT Quantity, | |
CASE Quantity | |
WHEN 1 THEN 'ONE' | |
WHEN 2 THEN 'TWO' | |
ELSE 'MORE' | |
END as ASD | |
FROM InvoiceLine; | |
-- ------------------------ | |
-- DATE functions in MySQL | |
-- ------------------------ | |
-- Show sales by day of week | |
select dayofweek(invoicedate) as dow, count(*) as ct, sum(total) as sm | |
from Invoice | |
group by dayofweek(invoicedate) | |
order by 1; | |
-- Show sales by day of week name (uses above as named subquery) | |
select | |
CASE dow | |
WHEN 1 THEN 'Sunday' | |
WHEN 2 THEN 'Monday' | |
WHEN 3 THEN 'Tuesday' | |
WHEN 4 THEN 'Wednesday' | |
WHEN 5 THEN 'Thursday' | |
WHEN 6 THEN 'Friday' | |
WHEN 7 THEN 'Saturday' | |
END as ddow, | |
ct, sm | |
FROM ( | |
select dayofweek(invoicedate) as dow, count(*) as ct, sum(total) as sm | |
from Invoice | |
group by dayofweek(invoicedate) | |
) a | |
order by dow; | |
-- ------------------------ | |
-- Window Functions | |
-- ------------------------ | |
-- Customer activity by year/month/customer with totals per customer | |
SELECT customerid, year(invoicedate), month(invoicedate), count(*), sum(total), sum(sum(total)) over(partition by customerid) | |
FROM Invoice | |
GROUP BY customerid, year(invoicedate), month(invoicedate) | |
order by CustomerId, year(invoicedate), month(invoicedate); | |
-- Window Functions: shows sales by year/month with additional column for totals per year (the WINDOW) | |
SELECT year(invoicedate), month(invoicedate), count(*), sum(total), sum(sum(total)) over(partition by year(invoicedate)) | |
FROM Invoice | |
GROUP BY year(invoicedate), month(invoicedate) | |
order by year(invoicedate), month(invoicedate); | |
-- ------------------------ | |
-- ROLLUP | |
-- ------------------------ | |
-- ROLLUP: shows totals by year and month with additional rows for totals by year and grand totals | |
SELECT year(invoicedate) as yy, month(invoicedate) as mm, sum(total) as tot | |
FROM Invoice | |
GROUP BY year(invoicedate), month(invoicedate) WITH ROLLUP | |
ORDER BY -yy desc, -mm desc; -- for nulls last, prefix column with minus and switch to desc | |
-- Which customer has the most diverse taste in music? I.e. bought songs in the most genres? | |
-- Customer 57 bought songs in 12 of the 25 genres. | |
SELECT CustomerId, COUNT(DISTINCT g.GenreId) as totes | |
FROM Customer c | |
JOIN Invoice i USING(CustomerId) | |
JOIN InvoiceLine l USING (InvoiceId) | |
JOIN Track t USING (TrackId) | |
JOIN Genre g USING (GenreId) | |
GROUP BY CustomerId | |
ORDER BY totes desc LIMIT 1; | |
-- Test for above - returns the same number of rows as the count from the above query | |
SELECT g.Name, count(*) | |
FROM Customer c | |
JOIN Invoice i USING(CustomerId) | |
JOIN InvoiceLine l USING (InvoiceId) | |
JOIN Track t USING (TrackId) | |
JOIN Genre g USING (GenreId) | |
WHERE c.CustomerId = 57 | |
GROUP BY g.Name; | |
-- ------------------------ | |
-- Putting it all together with...Led Zeppelin! | |
-- ------------------------ | |
-- Show all Led Zeppelin tracks sold | |
SELECT DISTINCT t.trackid | |
FROM Album m | |
JOIN Track t ON m.albumid = t.albumid | |
JOIN InvoiceLine il ON t.trackid = il.trackid | |
JOIN Artist r ON m.artistid = r.artistid | |
WHERE r.name = 'Led Zeppelin'; | |
-- Show all Led Zeppelin songs that have sold more than once | |
SELECT l.TrackId, COUNT(*) | |
FROM Artist r | |
JOIN Album m USING(ArtistId) | |
JOIN Track t USING(AlbumId) | |
JOIN InvoiceLine l USING(TrackId) | |
WHERE r.name = 'Led Zeppelin' | |
GROUP BY l.TrackId | |
HAVING COUNT(*) > 1 | |
ORDER BY l.TrackId; | |
-- Show all Led Zeppelin albums with at least one track being sold more than once | |
SELECT albumid, count(*) as TotalTracksSold, count(DISTINCT l.TrackId) as UniqueTotalTracksSold | |
from Artist r | |
JOIN Album m USING(ArtistId) | |
JOIN Track t using(AlbumId) | |
JOIN InvoiceLine l USING(TrackId) | |
WHERE r.name = 'Led Zeppelin' | |
GROUP BY AlbumId | |
HAVING count(*) != count(DISTINCT l.TrackId); | |
-- All artists with more albums than Led Zeppelin | |
-- JOIN with GROUP BY and HAVING with subquery, use named subquery for name | |
SELECT name, NumAlbums | |
FROM Artist | |
JOIN ( | |
SELECT ArtistId, COUNT(AlbumId) NumAlbums | |
FROM Album | |
GROUP BY ArtistId | |
HAVING COUNT(AlbumId) > ( | |
SELECT COUNT(al.AlbumId) | |
FROM Album al JOIN Artist ar ON al.ArtistId = ar.ArtistId | |
WHERE ar.Name IN ('Led Zeppelin') | |
GROUP BY ar.ArtistId ) | |
) b ON Artist.artistid = b.artistid; | |
-- Use correlated subquery to show all unsold Led Zeppelin track ids with album ids | |
SELECT m.AlbumId, t.TrackId | |
FROM Artist r | |
JOIN Album m USING(ArtistId) | |
JOIN Track t using(AlbumId) | |
WHERE r.name = 'Led Zeppelin' | |
AND NOT EXISTS | |
(SELECT * | |
FROM InvoiceLine l | |
WHERE l.TrackId = t.TrackId); | |
-- Same query as above, with album and track names | |
select m2.Title, t2.TrackId, t2.Name | |
from Album m2 | |
JOIN Track t2 USING(AlbumId) | |
JOIN ( | |
SELECT m.AlbumId, t.TrackId | |
FROM Artist r | |
JOIN Album m USING(ArtistId) | |
JOIN Track t using(AlbumId) | |
WHERE r.name = 'Led Zeppelin' | |
AND NOT EXISTS | |
(SELECT * | |
FROM InvoiceLine l | |
WHERE l.TrackId = t.TrackId) | |
) a | |
ON t2.TrackId = a.TrackId; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment