Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markrittman/d61ab26cdc392524d4ce7835c8f15b7f to your computer and use it in GitHub Desktop.
Save markrittman/d61ab26cdc392524d4ce7835c8f15b7f to your computer and use it in GitHub Desktop.
Segment Events unioned with Identify Event, ID Stitched, User Details Backfilled and Events Grouped into User Consideration-Stage Journey
with events as (
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
'page_view' as event_type,
concat(split(context_ip,'.')[safe_offset(0)],'.***.***.',split(context_ip,'.')[safe_offset(3)]) as context_ip,
context_page_path,
context_page_referrer,
context_page_title,
context_user_agent,
context_campaign_source,
context_campaign_medium,
context_campaign_name,
cast (null as string) as requirement
from
`ra-development.company_website.pages`
union all
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
event as event_type,
concat(split(context_ip,'.')[safe_offset(0)],'.***.***.',split(context_ip,'.')[safe_offset(3)]) as context_ip,
context_page_path,
context_page_referrer,
context_page_title,
context_user_agent,
context_campaign_source,
context_campaign_medium,
context_campaign_name,
cast (null as string) as requirement
from
`ra-development.company_website.tracks`
where
event in ('podcast_episode_played',
'pricing_link_clicked',
'hero_image_clicked',
'contact_us_submitted',
'collateral_viewed',
'clicked_email_link',
'clicked_email',
'casestudy_clicked',
'booked_a_meeting',
'about_us_clicked',
'pressed_button',
'pressed_a_button',
'contact_us_pressed')
union all
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
event as event_type,
cast(null as string) as context_ip,
cast(null as string),
cast(null as string),
cast(null as string),
cast(null as string),
utm_source as context_campaign_source,
cast(null as string) as context_campaign_medium,
utm_campaign as context_campaign_name,
meeting_purpose as requirement
from
`ra-development.zapier_source.meeting_booked`
where
anonymous_id is not null
union all
select
anonymous_id,
user_id,
name,
email,
timestamp,
'identify' as event_type,
cast(null as string) as context_ip,
cast(null as string),
cast(null as string),
cast(null as string),
cast(null as string),
cast(null as string) as context_campaign_source,
cast(null as string) as context_campaign_medium,
cast(null as string) as context_campaign_name,
cast(null as string) as requirement
from
`ra-development.zapier_source.identifies`
where
anonymous_id is not null
),
id_stitching as (
select
distinct anonymous_id as anonymous_id,
last_value(user_id ignore nulls) over (partition by anonymous_id order by timestamp rows between unbounded preceding and unbounded following ) as user_id,
min(timestamp) over (partition by anonymous_id ) as first_seen_at,
max(timestamp) over (partition by anonymous_id ) as last_seen_at
from
events ),
mapped as (
select
coalesce(i.user_id,
e.anonymous_id) as blended_user_id,
e.*
from
events e
left join
id_stitching i
using
(anonymous_id)
),
names_backfilled as (
select
* except (name,
email),
last_value(email ignore nulls) over (partition by blended_user_id order by timestamp rows between unbounded preceding and unbounded following ) as email,
last_value(name ignore nulls) over (partition by blended_user_id order by timestamp rows between unbounded preceding and unbounded following ) as name
from
mapped )
select
blended_user_id,
name,
email,
array_agg(
struct(
timestamp,
event_type,
context_ip,
anonymous_id,
user_id,
context_page_path,
context_page_referrer,
context_page_title,
context_user_agent,
context_campaign_source,
context_campaign_medium,
context_campaign_source,
requirement)
order by timestamp
) event
from names_backfilled
group by
1,2,3
order by
1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment