Skip to content

Instantly share code, notes, and snippets.

@nrktkt
Last active October 21, 2020 02:17
Show Gist options
  • Save nrktkt/56b50a23087a35d8a2e0198e83520cb3 to your computer and use it in GitHub Desktop.
Save nrktkt/56b50a23087a35d8a2e0198e83520cb3 to your computer and use it in GitHub Desktop.
cheat sheet with postgres queries for common analytics metrics
-- based off the metrics on the dashboard of the mixpanel music finder demo
create table events (
id bigserial primary key,
user_id varchar,
created_at timestamptz not null default now(),
name varchar not null,
properties jsonb not null default '{}'
);
insert into events (user_id, name, properties, created_at) values
('1', 'signUp', '{"planType": "free", "city": "San Francisco", "region": "California"}', '2020-10-19T22:42:26Z'),
('2', 'signUp', '{"planType": "paid", "city": "San Diego", "region": "California"}', '2020-10-19T22:42:26Z'),
('3', 'signUp', '{"planType": "paid", "city": "New York", "region": "New York"}', '2020-10-19T22:42:26Z'),
('4', 'signUp', '{"planType": "free", "city": "Austin", "region": "Texas"}', '2020-10-19T22:42:26Z'),
('5', 'signUp', '{"planType": "free", "city": "San Francisco", "region": "California"}', '2019-12-18T22:42:26Z')
;
insert into events (user_id, name, properties, created_at) values
('1', 'songPlay', '{"genre": "Country"}', '2020-10-20T22:42:26Z'),
('1', 'songPlay', '{"genre": "Rock"}', '2020-09-20T22:42:26Z'),
('1', 'songPlay', '{"genre": "Gospel"}', '2020-10-25T22:42:26Z'),
('2', 'songPlay', '{"genre": "Pop"}', '2020-08-20T22:42:26Z'),
('2', 'songPlay', '{"genre": "Rock"}', '2020-09-20T22:42:26Z'),
('2', 'songPlay', '{"genre": "Hip-Hop"}', '2020-10-20T22:42:26Z'),
('3', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T22:42:26Z'),
('3', 'songPlay', '{"genre": "Pop"}', '2020-10-20T22:42:26Z'),
('3', 'songPlay', '{"genre": "Country"}', '2020-10-15T22:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T01:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T03:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T05:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T06:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T07:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T08:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T15:42:26Z'),
('4', 'songPlay', '{"genre": "Electronic"}', '2020-10-20T22:42:26Z'),
('5', 'songPlay', '{"genre": "Pop"}', '2019-12-19T22:42:26Z');
insert into events (user_id, name, created_at) values
('-1', 'signupPageView', '2020-10-19T22:42:26Z'),
('1', 'signupPageView', '2020-09-18T22:42:26Z'),
('2', 'signupPageView', '2020-10-18T22:42:26Z'),
('3', 'signupPageView', '2020-10-15T22:42:26Z'),
('4', 'signupPageView', '2020-10-18T22:42:26Z'),
('5', 'signupPageView', '2019-12-17T22:42:26Z');
-- See the events most frequently triggered in your product
-- What are the most common user actions [in the last month]?
select name, count(*) as count
from events
where (now() - INTERVAL '1 MONTH') < created_at
group by name
order by count desc;
-- Filter down to a cohort and segment by a different property
-- In what regions are people signing up for the free plan?
select properties ->> 'region' as region, count(*)
from events
where name = 'signUp' and properties ->> 'planType' = 'free'
group by region;
-- Select an event to see how it trends over time across properties
-- What genres are people listening to?
select
DATE_TRUNC('day', created_at) as date,
properties ->> 'genre' as genre,
count(*)
from events
where name = 'songPlay'
group by genre, date;
-- Add a dimension to better understand each property
-- What plan types are most common in different cities?
select
properties ->> 'city' as city,
properties ->> 'planType' as plan_type,
count(*)
from events
where name = 'signUp'
group by rollup (city, plan_type);
-- View MAU over time
-- Monthly Active Users triggering "Song Play"
-- is this efficient? is there a pg10 window function that would be better here?
select
DATE_TRUNC('day', e.created_at) as day,
count(distinct e2.user_id)
from events e
join events e2 on
e2.created_at >= e.created_at - INTERVAL '1 MONTH'
and e2.created_at <= e.created_at
and e.name = e2.name
where e.name = 'songPlay'
group by day
order by day;
-- DAU/MAU
-- Stickiness (DAU/MAU) of users triggering "Song Play."
select dau.day, dau.count::float / mau.count as "dau/mau"
from (
select
DATE_TRUNC('day', e.created_at) as day,
count(distinct e2.user_id) as count
from events e
join events e2 on
e2.created_at >= e.created_at - INTERVAL '1 MONTH'
and e2.created_at <= e.created_at
and e.name = e2.name
where e.name = 'songPlay'
group by day
) mau join
(
select
DATE_TRUNC('day', created_at) as day,
count(distinct user_id) as count
from events
where name = 'songPlay'
group by day
) dau on mau.day = dau.day;
-- View conversion rates through key user flows
-- How many new users play a song within 30 days?
-- The first three columns return the total number of users who participated in each step of the flow
-- The last two columns show the number of users who converted from one stage to the next
-- wip: need to add 30 day restriction
select
count(signup_page_view) as signupPageView,
count(sign_up) as signUp,
count(song_play) as songPlay,
sum(case when (signup_page_view < sign_up) then 1 else 0 end) as conversion_1_2,
sum(case when (signup_page_view < sign_up and sign_up < song_play)
then 1 else 0 end
) as conversion_2_3
from
(
select
max(case when (name = 'signupPageView') then e1 end) as signup_page_view,
max(case when (name = 'signUp') then e1 end) as sign_up,
max(case when (name = 'songPlay') then e1 end) as song_play
from (
select user_id, name, min(created_at) as e1
from events
where name in ('signupPageView', 'signUp', 'songPlay')
group by user_id, name
) a
group by user_id
) b
;
-- Analyze changes in conversion rates over time
-- How has the percentage of new users who play a song within 30 days changed over time?
-- wip: actually shows the percentage of users who converted in a given month
select "month", signupPageView::float / conversion_2_3 * 100 as conversion_rate
from (
select
"month",
count(signup_page_view) as signupPageView,
sum(case when (signup_page_view < sign_up and sign_up < song_play)
then 1 else 0 end
) as conversion_2_3
from (
select
"month",
max(case when (name = 'signupPageView') then e1 end) as signup_page_view,
max(case when (name = 'signUp') then e1 end) as sign_up,
max(case when (name = 'songPlay') then e1 end) as song_play
from (
select
user_id,
name,
date_trunc('month', created_at) as "month",
min(created_at) as e1
from events
where name in ('signupPageView', 'signUp', 'songPlay')
group by user_id, name, "month"
) a
group by "month", user_id
) b
group by "month"
) c
;
-- Analyze user retention
-- How many users play a song N weeks later?
-- Understand how frequently users perform key events
-- How many users did Song Play at least X days in a week?
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment