Skip to content

Instantly share code, notes, and snippets.

@markrittman
Last active March 27, 2020 08:24
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markrittman/6e9d87db3e66c5819242f4e975f650a2 to your computer and use it in GitHub Desktop.
Save markrittman/6e9d87db3e66c5819242f4e975f650a2 to your computer and use it in GitHub Desktop.
SQL script for use with Redshift as a Segment warehouse destination. Aggregates page views into user sessions with 60 seconds cut-off and adds count of page views in session, time between sessions, bounced session flag, session length and truncates session start time to the hour
SELECT
date_trunc('hours'::text, session_start_ts) AS session_start_hour,
session_start_ts,
date_diff ('seconds'::text,
pg_catalog.lead(session_start_ts, 1) OVER (PARTITION BY user_id ORDER BY session_start_ts DESC),
session_start_ts) AS secs_between_sessions,
user_id,
global_session_id,
user_session_id,
page_views_in_session,
CASE WHEN (page_views_in_session = 1) THEN
1
ELSE
0
END AS bounced_sessions,
session_length_ts,
entrance_url
FROM (
SELECT
id,
received_at,
uuid,
context_library_name,
context_page_path,
"path",
referrer,
context_ip,
context_page_title,
sent_at,
"timestamp",
title,
uuid_ts,
context_page_referrer,
context_library_version,
context_page_url,
context_user_agent,
url,
user_id,
anonymous_id,
original_timestamp,
context_page_search,
search,
context_protocols_source_id,
context_locale,
context_campaign_source,
context_campaign_content,
context_campaign_medium,
context_campaign_name,
context_campaign_term,
last_event,
is_new_session,
is_session_entrance_page,
global_session_id,
user_session_id,
count(*) OVER (PARTITION BY user_id,
user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS page_views_in_session,
min(received_at) OVER (PARTITION BY user_id,
user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS session_start_ts,
"max" (received_at) OVER (PARTITION BY user_id,
user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS session_end_ts,
date_diff (('seconds'::character varying)::text, min(received_at) OVER (PARTITION BY user_id,user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
max (received_at) OVER (PARTITION BY user_id,user_session_id ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)) AS session_length_ts,
first_value ((url)::text) OVER (PARTITION BY user_id, user_session_id ORDER BY received_at ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS entrance_url
FROM (
SELECT
id,
received_at,
uuid,
context_library_name,
context_page_path,
"path",
referrer,
context_ip,
context_page_title,
sent_at,
"timestamp",
title,
uuid_ts,
context_page_referrer,
context_library_version,
context_page_url,
context_user_agent,
url,
user_id,
anonymous_id,
original_timestamp,
context_page_search,
search,
context_protocols_source_id,
context_locale,
context_campaign_source,
context_campaign_content,
context_campaign_medium,
context_campaign_name,
context_campaign_term,
last_event,
is_new_session,
(is_new_session = 1) AS is_session_entrance_page,
sum(is_new_session) OVER (ORDER BY user_id,
received_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS global_session_id,
sum(is_new_session) OVER (PARTITION BY user_id ORDER BY received_at ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS user_session_id
FROM (
SELECT
id,
received_at,
uuid,
context_library_name,
context_page_path,
"path",
referrer,
context_ip,
context_page_title,
sent_at,
"timestamp",
title,
uuid_ts,
context_page_referrer,
context_library_version,
context_page_url,
context_user_agent,
url,
user_id,
anonymous_id,
original_timestamp,
context_page_search,
search,
context_protocols_source_id,
context_locale,
context_campaign_source,
context_campaign_content,
context_campaign_medium,
context_campaign_name,
context_campaign_term,
last_event,
CASE WHEN ((("date_part" (('EPOCH'::character varying)::text,received_at) - "date_part" (('EPOCH'::character varying)::text,last_event)) >= (60 * 10))
OR(last_event IS NULL)) THEN 1
ELSE
0
END AS is_new_session
FROM (
SELECT
id,
received_at,
uuid,
context_library_name,
context_page_path,
"path",
referrer,
context_ip,
context_page_title,
sent_at,
"timestamp",
title,
uuid_ts,
context_page_referrer,
context_library_version,
context_page_url,
context_user_agent,
url,
user_id,
anonymous_id,
original_timestamp,
context_page_search,
search,
context_protocols_source_id,
context_locale,
context_campaign_source,
context_campaign_content,
context_campaign_medium,
context_campaign_name,
context_campaign_term,
pg_catalog.lead(received_at, 1) OVER (PARTITION BY user_id ORDER BY received_at DESC) AS last_event
FROM
javascript.pages) "last") final) pages) derived_table1
GROUP BY
date_trunc('hours'::text, session_start_ts),
session_start_ts,
user_id,
global_session_id,
user_session_id,
page_views_in_session,
CASE WHEN (page_views_in_session = 1) THEN
1
ELSE
0
END,
session_length_ts,
entrance_url
ORDER BY
user_id,
user_session_id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment