Last active
March 27, 2020 08:24
-
-
Save markrittman/6e9d87db3e66c5819242f4e975f650a2 to your computer and use it in GitHub Desktop.
SQL script for use with Redshift as a Segment warehouse destination. Aggregates page views into user sessions with 60 seconds cut-off and adds count of page views in session, time between sessions, bounced session flag, session length and truncates session start time to the hour
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT | |
date_trunc('hours'::text, session_start_ts) AS session_start_hour, | |
session_start_ts, | |
date_diff ('seconds'::text, | |
pg_catalog.lead(session_start_ts, 1) OVER (PARTITION BY user_id ORDER BY session_start_ts DESC), | |
session_start_ts) AS secs_between_sessions, | |
user_id, | |
global_session_id, | |
user_session_id, | |
page_views_in_session, | |
CASE WHEN (page_views_in_session = 1) THEN | |
1 | |
ELSE | |
0 | |
END AS bounced_sessions, | |
session_length_ts, | |
entrance_url | |
FROM ( | |
SELECT | |
id, | |
received_at, | |
uuid, | |
context_library_name, | |
context_page_path, | |
"path", | |
referrer, | |
context_ip, | |
context_page_title, | |
sent_at, | |
"timestamp", | |
title, | |
uuid_ts, | |
context_page_referrer, | |
context_library_version, | |
context_page_url, | |
context_user_agent, | |
url, | |
user_id, | |
anonymous_id, | |
original_timestamp, | |
context_page_search, | |
search, | |
context_protocols_source_id, | |
context_locale, | |
context_campaign_source, | |
context_campaign_content, | |
context_campaign_medium, | |
context_campaign_name, | |
context_campaign_term, | |
last_event, | |
is_new_session, | |
is_session_entrance_page, | |
global_session_id, | |
user_session_id, | |
count(*) OVER (PARTITION BY user_id, | |
user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS page_views_in_session, | |
min(received_at) OVER (PARTITION BY user_id, | |
user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS session_start_ts, | |
"max" (received_at) OVER (PARTITION BY user_id, | |
user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS session_end_ts, | |
date_diff (('seconds'::character varying)::text, min(received_at) OVER (PARTITION BY user_id,user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING), | |
max (received_at) OVER (PARTITION BY user_id,user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) AS session_length_ts, | |
first_value ((url)::text) OVER (PARTITION BY user_id, user_session_id ORDER BY received_at ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS entrance_url | |
FROM ( | |
SELECT | |
id, | |
received_at, | |
uuid, | |
context_library_name, | |
context_page_path, | |
"path", | |
referrer, | |
context_ip, | |
context_page_title, | |
sent_at, | |
"timestamp", | |
title, | |
uuid_ts, | |
context_page_referrer, | |
context_library_version, | |
context_page_url, | |
context_user_agent, | |
url, | |
user_id, | |
anonymous_id, | |
original_timestamp, | |
context_page_search, | |
search, | |
context_protocols_source_id, | |
context_locale, | |
context_campaign_source, | |
context_campaign_content, | |
context_campaign_medium, | |
context_campaign_name, | |
context_campaign_term, | |
last_event, | |
is_new_session, | |
(is_new_session = 1) AS is_session_entrance_page, | |
sum(is_new_session) OVER (ORDER BY user_id, | |
received_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS global_session_id, | |
sum(is_new_session) OVER (PARTITION BY user_id ORDER BY received_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS user_session_id | |
FROM ( | |
SELECT | |
id, | |
received_at, | |
uuid, | |
context_library_name, | |
context_page_path, | |
"path", | |
referrer, | |
context_ip, | |
context_page_title, | |
sent_at, | |
"timestamp", | |
title, | |
uuid_ts, | |
context_page_referrer, | |
context_library_version, | |
context_page_url, | |
context_user_agent, | |
url, | |
user_id, | |
anonymous_id, | |
original_timestamp, | |
context_page_search, | |
search, | |
context_protocols_source_id, | |
context_locale, | |
context_campaign_source, | |
context_campaign_content, | |
context_campaign_medium, | |
context_campaign_name, | |
context_campaign_term, | |
last_event, | |
CASE WHEN ((("date_part" (('EPOCH'::character varying)::text,received_at) - "date_part" (('EPOCH'::character varying)::text,last_event)) >= (60 * 10)) | |
OR(last_event IS NULL)) THEN 1 | |
ELSE | |
0 | |
END AS is_new_session | |
FROM ( | |
SELECT | |
id, | |
received_at, | |
uuid, | |
context_library_name, | |
context_page_path, | |
"path", | |
referrer, | |
context_ip, | |
context_page_title, | |
sent_at, | |
"timestamp", | |
title, | |
uuid_ts, | |
context_page_referrer, | |
context_library_version, | |
context_page_url, | |
context_user_agent, | |
url, | |
user_id, | |
anonymous_id, | |
original_timestamp, | |
context_page_search, | |
search, | |
context_protocols_source_id, | |
context_locale, | |
context_campaign_source, | |
context_campaign_content, | |
context_campaign_medium, | |
context_campaign_name, | |
context_campaign_term, | |
pg_catalog.lead(received_at, 1) OVER (PARTITION BY user_id ORDER BY received_at DESC) AS last_event | |
FROM | |
javascript.pages) "last") final) pages) derived_table1 | |
GROUP BY | |
date_trunc('hours'::text, session_start_ts), | |
session_start_ts, | |
user_id, | |
global_session_id, | |
user_session_id, | |
page_views_in_session, | |
CASE WHEN (page_views_in_session = 1) THEN | |
1 | |
ELSE | |
0 | |
END, | |
session_length_ts, | |
entrance_url | |
ORDER BY | |
user_id, | |
user_session_id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment