Skip to content

Instantly share code, notes, and snippets.

@danthegoodman1
Last active September 6, 2023 16:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save danthegoodman1/a8148382996fbb4b300155f011d9f353 to your computer and use it in GitHub Desktop.
Save danthegoodman1/a8148382996fbb4b300155f011d9f353 to your computer and use it in GitHub Desktop.
ClickHouse rolling net dollar retention
-- Rolling ndr
with tbl as (
-- fake table
select c1 as usr, c2 as price, c3 as d from VALUES (
('a', 10, toDate('2023-08-09'))
, ('a', 10, toDate('2023-08-10'))
, ('a', 8, toDate('2023-08-11'))
, ('a', 11, toDate('2023-08-12'))
, ('a', 12, toDate('2023-08-13'))
, ('b', 10, toDate('2023-08-09'))
, ('b', 10, toDate('2023-08-10'))
, ('b', 9, toDate('2023-08-11'))
, ('b', 11, toDate('2023-08-12'))
, ('b', 12, toDate('2023-08-13'))
, ('c', 11, toDate('2023-08-11'))
, ('c', 11, toDate('2023-08-12'))
, ('c', 15, toDate('2023-08-13'))
, ('c', 15, toDate('2023-08-15')) -- a non-contig day
)
)
, mins as (
-- get the first date and price we saw them at (aggregate down if needed)
select usr, min(d) as first_d
from tbl
group by usr
order by usr asc
)
, buckets as (
-- aggregate the days based on the delta to the first date
select usr, sum(price) as price, mins.first_d, dateDiff('day', mins.first_d, d) as delta
from tbl
left join mins on mins.usr = tbl.usr
group by usr, delta, mins.first_d
)
-- select * from first_deltas;
, coalesce(lagInFrame(price, 1) over (partition by buckets.usr order by delta), 1) as lag_price -- reusable statements :D
, per_user as (
-- get each user's ndr
select buckets.usr as usr
, mins.first_d as start
, delta
, if (lag_price = 0, 1, price/lag_price) as ndr -- prevent divide by 0 (filtered out later)
from buckets
left join mins on mins.usr = buckets.usr
order by buckets.usr asc, start asc, delta asc with fill
)
-- select * from per_user where delta != 0;
, aggs as (
-- get the average ndr for each start date
-- could use this as final select to have tabular format
select
start -- could do toStartOfInterval here since that only changes the shown start, but not the deltas
, delta
-- , uniq(if(ndr = 0, null, usr)) as users -- don't count the user if they didn't do anything this delta
, uniq(usr) as users -- count them regardless of whether the earned anything
, round(avg(ndr), 2) as ndr_a
from per_user
where delta != 0
group by start, delta
order by start asc, delta asc
)
-- select * from aggs;
-- group up in arrays for each start date
select start, groupArray(ndr_a) as ndr_a, groupArray(users) as users_a, groupArray(delta) as delta_a
from aggs
group by start
;
@danthegoodman1
Copy link
Author

This is used when you are observing the delta between the previous interval and the current, rather than the first and the current.

This makes it resistant to the "honeymoon period". You can verify the values by observing that delta=1 is the same value as the relative chart, but the following columns have different values.

delta=0 is omitted as it's always 1

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment