Skip to content

Instantly share code, notes, and snippets.

@dwl285
Last active March 16, 2020 16:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dwl285/cca564669e52c8495e112e647fa2f35f to your computer and use it in GitHub Desktop.
Save dwl285/cca564669e52c8495e112e647fa2f35f to your computer and use it in GitHub Desktop.
Sessionisation from Looker to Dataform
config {
type: "view"
}
select distinct
timestamp,
user_id,
name as event_name,
is as event_id
from
source.events_raw
where
event_name not in ('test_event', 'clicked_logout_button_old')
config {
type: "view"
}
with session_starts as (
select
*,
coalesce(
(
timestamp_diff(timestamp,
lag(timestamp) over (partition by user_id order by timestamp asc),
second) >= 30*60, -- 30 minute session timeout
true
) as session_start_event
from
${ref("events_cleaned")}
),
session_index as (
-- add a session_index (users first session = 1, users second session = 2 etc)
select
*,
sum(if (session_start_event, 1, 0)) over (
partition by user_id
order by
timestamp asc
) as session_index
from
session_starts
),
session_id as (
-- add a unique session_id to each session
select
*,
farm_fingerprint(
concat(
cast(session_index as string),
"|",
cast(user_id as string)
)
) as session_id
from
session_index
config {
type: "table",
name: "sessions",
schema: "analytics"
}
select
session_id,
user_id,
session_index,
min(timestamp) as session_start_timestamp,
max(timestamp) as session_end_timestamp,
count(distinct event_id) as events_in_session
from
session_id
group by
session_id, session_index
view: sessions {
derived_table: {
sql_create:
with events_cleaned as (
-- filter out invalid / old events and remove duplicates
select distinct
timestamp,
user_id,
name as event_name,
id as event_id
from
source.events_raw
where
event_name not in ('test_event', 'clicked_logout_button_old')
),
session_starts as (
-- label the event that starts the session
select
*,
coalesce(
(
timestamp_diff(timestamp,
lag(timestamp) over (partition by user_id order by timestamp asc),
second) >= 30*60, -- 30 minute session timeout
true
) as session_start_event
from
events_cleaned
),
session_index as (
-- add a session_index (users first session = 1, users second session = 2 etc)
select
*,
sum(if (session_start_event, 1, 0)) over (
partition by user_id
order by
timestamp asc
) as session_index
from
session_starts
),
session_id as (
-- add a unique session_id to each session
select
*,
farm_fingerprint(
concat(
cast(session_index as string),
"|",
cast(user_id as string)
)
) as session_id
from
session_index
)
select
session_id,
user_id,
session_index,
min(timestamp) as session_start_timestamp,
max(timestamp) as session_end_timestamp,
count(distinct event_id) as events_in_session
from
session_id
group by
session_id, session_index
;;
}
dimension: session_id {
type: string
sql: ${TABLE}.session_id ;;
}
dimension: user_id {
type: string
sql: ${TABLE}.user_id ;;
}
dimension: session_index {
type: number
sql: ${TABLE}.session_index ;;
}
dimension: events_in_session {
type: number
sql: ${TABLE}.events_in_session ;;
}
dimension_group: session_start_timestamp {
type: time
sql: ${TABLE}.session_start_timestamp ;;
timeframes: [
date,
time,
raw
]
}
dimension_group: session_end_timestamp {
type: time
sql: ${TABLE}.session_end_timestamp ;;
timeframes: [
date,
time,
raw
]
}
measure: sessions {
type: count
}
measure: users {
type: count_distinct
sql: ${user_id} ;;
}
measure: events {
type: sum
sql: ${events_in_session} ;;
}
}
view: sessions {
sql_table_name: analytics.sessions ;;
}
dimension: session_id {
type: string
sql: ${TABLE}.session_id ;;
}
dimension: user_id {
type: string
sql: ${TABLE}.user_id ;;
}
dimension: session_index {
type: number
sql: ${TABLE}.session_index ;;
}
dimension: events_in_session {
type: number
sql: ${TABLE}.events_in_session ;;
}
dimension_group: session_start_timestamp {
type: time
sql: ${TABLE}.session_start_timestamp ;;
timeframes: [
date,
time,
raw
]
}
dimension_group: session_end_timestamp {
type: time
sql: ${TABLE}.session_end_timestamp ;;
timeframes: [
date,
time,
raw
]
}
measure: sessions {
type: count
}
measure: users {
type: count_distinct
sql: ${user_id} ;;
}
measure: events {
type: sum
sql: ${events_in_session} ;;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment