Last active
October 21, 2020 02:17
-
-
Save nrktkt/56b50a23087a35d8a2e0198e83520cb3 to your computer and use it in GitHub Desktop.
cheat sheet with postgres queries for common analytics metrics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- based off the metrics on the dashboard of the mixpanel music finder demo | |
create table events ( | |
id bigserial primary key, | |
user_id varchar, | |
created_at timestamptz not null default now(), | |
name varchar not null, | |
properties jsonb not null default '{}' | |
); | |
insert into events (user_id, name, properties, created_at) values | |
('1', 'signUp', '{"planType": "free", "city": "San Francisco", "region": "California"}', '2020-10-19T22:42:26Z'), | |
('2', 'signUp', '{"planType": "paid", "city": "San Diego", "region": "California"}', '2020-10-19T22:42:26Z'), | |
('3', 'signUp', '{"planType": "paid", "city": "New York", "region": "New York"}', '2020-10-19T22:42:26Z'), | |
('4', 'signUp', '{"planType": "free", "city": "Austin", "region": "Texas"}', '2020-10-19T22:42:26Z'), | |
('5', 'signUp', '{"planType": "free", "city": "San Francisco", "region": "California"}', '2019-12-18T22:42:26Z') | |
; | |
insert into events (user_id, name, properties, created_at) values | |
('1', 'songPlay', '{"genre": "Country"}', '2020-10-20T22:42:26Z'), | |
('1', 'songPlay', '{"genre": "Rock"}', '2020-09-20T22:42:26Z'), | |
('1', 'songPlay', '{"genre": "Gospel"}', '2020-10-25T22:42:26Z'), | |
('2', 'songPlay', '{"genre": "Pop"}', '2020-08-20T22:42:26Z'), | |
('2', 'songPlay', '{"genre": "Rock"}', '2020-09-20T22:42:26Z'), | |
('2', 'songPlay', '{"genre": "Hip-Hop"}', '2020-10-20T22:42:26Z'), | |
('3', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T22:42:26Z'), | |
('3', 'songPlay', '{"genre": "Pop"}', '2020-10-20T22:42:26Z'), | |
('3', 'songPlay', '{"genre": "Country"}', '2020-10-15T22:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T01:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T03:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T05:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T06:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T07:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T08:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T15:42:26Z'), | |
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T22:42:26Z'), | |
('5', 'songPlay', '{"genre": "Pop"}', '2019-12-19T22:42:26Z'); | |
insert into events (user_id, name, created_at) values | |
('-1', 'signupPageView', '2020-10-19T22:42:26Z'), | |
('1', 'signupPageView', '2020-09-18T22:42:26Z'), | |
('2', 'signupPageView', '2020-10-18T22:42:26Z'), | |
('3', 'signupPageView', '2020-10-15T22:42:26Z'), | |
('4', 'signupPageView', '2020-10-18T22:42:26Z'), | |
('5', 'signupPageView', '2019-12-17T22:42:26Z'); | |
-- See the events most frequently triggered in your product | |
-- What are the most common user actions [in the last month]? | |
select name, count(*) as count | |
from events | |
where (now() - INTERVAL '1 MONTH') < created_at | |
group by name | |
order by count desc; | |
-- Filter down to a cohort and segment by a different property | |
-- In what regions are people signing up for the free plan? | |
select properties ->> 'region' as region, count(*) | |
from events | |
where name = 'signUp' and properties ->> 'planType' = 'free' | |
group by region; | |
-- Select an event to see how it trends over time across properties | |
-- What genres are people listening to? | |
select | |
DATE_TRUNC('day', created_at) as date, | |
properties ->> 'genre' as genre, | |
count(*) | |
from events | |
where name = 'songPlay' | |
group by genre, date; | |
-- Add a dimension to better understand each property | |
-- What plan types are most common in different cities? | |
select | |
properties ->> 'city' as city, | |
properties ->> 'planType' as plan_type, | |
count(*) | |
from events | |
where name = 'signUp' | |
group by rollup (city, plan_type); | |
-- View MAU over time | |
-- Monthly Active Users triggering "Song Play" | |
-- is this efficient? is there a pg10 window function that would be better here? | |
select | |
DATE_TRUNC('day', e.created_at) as day, | |
count(distinct e2.user_id) | |
from events e | |
join events e2 on | |
e2.created_at >= e.created_at - INTERVAL '1 MONTH' | |
and e2.created_at <= e.created_at | |
and e.name = e2.name | |
where e.name = 'songPlay' | |
group by day | |
order by day; | |
-- DAU/MAU | |
-- Stickiness (DAU/MAU) of users triggering "Song Play." | |
select dau.day, dau.count::float / mau.count as "dau/mau" | |
from ( | |
select | |
DATE_TRUNC('day', e.created_at) as day, | |
count(distinct e2.user_id) as count | |
from events e | |
join events e2 on | |
e2.created_at >= e.created_at - INTERVAL '1 MONTH' | |
and e2.created_at <= e.created_at | |
and e.name = e2.name | |
where e.name = 'songPlay' | |
group by day | |
) mau join | |
( | |
select | |
DATE_TRUNC('day', created_at) as day, | |
count(distinct user_id) as count | |
from events | |
where name = 'songPlay' | |
group by day | |
) dau on mau.day = dau.day; | |
-- View conversion rates through key user flows | |
-- How many new users play a song within 30 days? | |
-- The first three columns return the total number of users who participated in each step of the flow | |
-- The last two columns show the number of users who converted from one stage to the next | |
-- wip: need to add 30 day restriction | |
select | |
count(signup_page_view) as signupPageView, | |
count(sign_up) as signUp, | |
count(song_play) as songPlay, | |
sum(case when (signup_page_view < sign_up) then 1 else 0 end) as conversion_1_2, | |
sum(case when (signup_page_view < sign_up and sign_up < song_play) | |
then 1 else 0 end | |
) as conversion_2_3 | |
from | |
( | |
select | |
max(case when (name = 'signupPageView') then e1 end) as signup_page_view, | |
max(case when (name = 'signUp') then e1 end) as sign_up, | |
max(case when (name = 'songPlay') then e1 end) as song_play | |
from ( | |
select user_id, name, min(created_at) as e1 | |
from events | |
where name in ('signupPageView', 'signUp', 'songPlay') | |
group by user_id, name | |
) a | |
group by user_id | |
) b | |
; | |
-- Analyze changes in conversion rates over time | |
-- How has the percentage of new users who play a song within 30 days changed over time? | |
-- wip: actually shows the percentage of users who converted in a given month | |
select "month", signupPageView::float / conversion_2_3 * 100 as conversion_rate | |
from ( | |
select | |
"month", | |
count(signup_page_view) as signupPageView, | |
sum(case when (signup_page_view < sign_up and sign_up < song_play) | |
then 1 else 0 end | |
) as conversion_2_3 | |
from ( | |
select | |
"month", | |
max(case when (name = 'signupPageView') then e1 end) as signup_page_view, | |
max(case when (name = 'signUp') then e1 end) as sign_up, | |
max(case when (name = 'songPlay') then e1 end) as song_play | |
from ( | |
select | |
user_id, | |
name, | |
date_trunc('month', created_at) as "month", | |
min(created_at) as e1 | |
from events | |
where name in ('signupPageView', 'signUp', 'songPlay') | |
group by user_id, name, "month" | |
) a | |
group by "month", user_id | |
) b | |
group by "month" | |
) c | |
; | |
-- Analyze user retention | |
-- How many users play a song N weeks later? | |
-- Understand how frequently users perform key events | |
-- How many users did Song Play at least X days in a week? |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment