Last active
August 29, 2015 14:25
-
-
Save quarterdome/60ef51dd4a6cae3a92aa to your computer and use it in GitHub Desktop.
Defining a session from Redshift events table
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
with | |
-- First add a column to tracks table with the timestamp of | |
-- previous event (using lag window function). Note, that | |
-- lag() will return null if it is first event. | |
linked_tracks as ( | |
select | |
*, | |
lag(sent_at, 1) over | |
(partition by user_id order by sent_at) as last_event_at | |
from web.tracks | |
), | |
-- Now add another integer column with 1 if it is first event in | |
-- session, and 0 if it is not. Session break is determined by | |
-- the interval between event sent_at and last_event_at. | |
new_session_tracks as ( | |
select | |
*, | |
case | |
when | |
sent_at - last_event_at >= '60 minutes'::interval | |
or | |
last_event_at is null | |
then | |
1 | |
else | |
0 | |
end as is_new_session | |
from linked_tracks | |
) | |
-- Finally, compute the rolling sum of is_new_session in sent_at order: | |
--- * The global rolling sum, gives globally unique session id. | |
--- * Resetting sum for each user, gives user specific unique session id. | |
-- * user_session_id is also a sequential number of users session, which is convenient for some queries. | |
select | |
*, | |
sum(is_new_session) over | |
( order by user_id, sent_at | |
rows between unbounded preceding and current row ) as global_session_id, | |
sum(is_new_session) over | |
( partition by user_id | |
order by sent_at | |
rows between unbounded preceding and current row ) as user_session_id | |
from new_session_tracks | |
; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment