Skip to content

Instantly share code, notes, and snippets.

@rorymalcolm
Created May 1, 2023 14:29
Show Gist options
  • Save rorymalcolm/8c85672f2538b79d01f287fcd5844d50 to your computer and use it in GitHub Desktop.
Save rorymalcolm/8c85672f2538b79d01f287fcd5844d50 to your computer and use it in GitHub Desktop.
CREATE OR REPLACE TABLE listening_history_with_county AS SELECT listening_history_cleaned.*, CASE WHEN county_visits.county_name IS NULL THEN 'Midlothian' ELSE county_visits.county_name END as county_name FROM listening_history_cleaned LEFT JOIN county_visits ON listening_history_cleaned.timestamp BETWEEN county_visits.start_time AND county_visits.end_time;
--- now we want the top songs listened to in each county by play_time_ms, and only the top song for each county
CREATE OR REPLACE VIEW total_playtime_ms_by_county AS SELECT county_name, artist, track, SUM(play_time_ms) as total_play_time FROM listening_history_with_county GROUP BY county_name, artist, track;
---- now create a cte to rank the songs by play time in each county, and only select the top song for each county
WITH ranked_songs_by_county AS (
SELECT
county_name
, artist
, track
, total_play_time
, RANK() OVER (PARTITION BY county_name ORDER BY total_play_time DESC) as rank
FROM total_playtime_ms_by_county
)
SELECT
county_name
, artist
, track
, total_play_time
FROM ranked_songs_by_county
WHERE rank = 1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment