Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save markrittman/d14a40a0b4e44cc89379100955aa3ae8 to your computer and use it in GitHub Desktop.
Save markrittman/d14a40a0b4e44cc89379100955aa3ae8 to your computer and use it in GitHub Desktop.
User journey stitching with inferred identify for off-platform activity
with events as (
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
'page_view' as event_type,
concat(split(context_ip,'.')[safe_offset(0)],'.***.***.',split(context_ip,'.')[safe_offset(3)]) as context_ip,
context_page_path,
context_page_referrer,
context_page_title,
context_user_agent,
context_campaign_source,
context_campaign_medium,
context_campaign_name,
cast (null as string) as requirement
from
`ra-development.company_website.pages`
union all
select
anonymous_id,
coalesce(
split(
split(
context_page_url,'ajs_aid=')[safe_offset(1)]
,'&')[safe_offset(0)]
,user_id) as user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
event as event_type,
concat(split(context_ip,'.')[safe_offset(0)],'.***.***.',split(context_ip,'.')[safe_offset(3)]) as context_ip,
context_page_path,
context_page_referrer,
context_page_title,
context_user_agent,
context_campaign_source,
context_campaign_medium,
context_campaign_name,
cast (null as string) as requirement
from
`ra-development.company_website.tracks`
where
event in ('podcast_episode_played',
'pricing_link_clicked',
'hero_image_clicked',
'contact_us_submitted',
'collateral_viewed',
'clicked_email_link',
'clicked_email',
'casestudy_clicked',
'booked_a_meeting',
'about_us_clicked',
'pressed_button',
'pressed_a_button',
'contact_us_pressed')
and
user_id != '5d2bb0e8-ba41-463c-a438-27bf8b3c3e35'
union all
select
anonymous_id,
user_id,
cast(null as string) as name,
cast(null as string) as email,
timestamp,
event as event_type,
cast(null as string) as context_ip,
cast(null as string) as context_page_path,
cast(null as string) as context_page_referrer,
cast(null as string) as context_page_title,
cast(null as string) as context_user_agent,
utm_source as context_campaign_source,
cast(null as string) as context_campaign_medium,
utm_campaign as context_campaign_name,
meeting_purpose as requirement
from
`ra-development.zapier_source.meeting_booked`
where
anonymous_id is not null
union all
select
anonymous_id,
user_id,
name,
email,
timestamp,
'identify' as event_type,
cast(null as string) as context_ip,
cast(null as string) as context_page_path,
cast(null as string) as context_page_referrer,
cast(null as string) as context_page_title,
cast(null as string) as context_user_agent,
cast(null as string) as context_campaign_source,
cast(null as string) as context_campaign_medium,
cast(null as string) as context_campaign_name,
cast(null as string) as requirement
from
`ra-development.zapier_source.identifies`
where
anonymous_id is not null
),
social_media as (
select
l.profileurl as anonymous_id,
cast(coalesce(p.email,e.email) as string) as user_id,
l.fullName as name,
cast(coalesce(p.email,e.email) as string) as email,
cast(parse_timestamp('%b %Y',l.followedat) as timestamp) as timestamp,
'profile_followed' as event_type,
cast(null as string) as context_ip,
cast(null as string) as context_page_path,
cast(null as string) as context_page_referrer,
cast(null as string) as context_page_title,
cast(null as string) as context_user_agent,
cast(null as string) as context_campaign_source,
cast(null as string) as context_campaign_medium,
cast(null as string) as context_campaign_name,
cast(null as string) as requirement,
true as is_inferred_identity
from
`ra-development.coupler_socialmedia.linkedin_followers` l
left join
`ra-development.coupler_socialmedia.linkedin_profiles` p
on
l.profileurl = p.linkedinprofile
left join
events e -- if we have a user name in linkedin, we try and supplement with email from our own data
on
l.fullName = e.name
group by
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
union all
-- add linkedin post likers now, and supplement with email address from our own data if we can match on name
select
l.profilelink as anonymous_id,
cast(coalesce(p.email,e.email) as string) as user_id,
l.name as name,
cast(coalesce(p.email,e.email) as string) as email,
l.timestamp as timestamp,
'article_liked' as event_type,
cast(null as string) as context_ip,
cast(null as string) as context_page_path,
cast(null as string) as context_page_referrer,
cast(null as string) as context_page_title,
cast(null as string) as context_user_agent,
cast(null as string) as context_campaign_source,
cast(null as string) as context_campaign_medium,
cast(null as string) as context_campaign_name,
cast(null as string) as requirement,
true as is_inferred_identity
from
`ra-development.coupler_socialmedia.linkedin_post_likers` l
left join
`ra-development.coupler_socialmedia.linkedin_profiles` p
on
l.profilelink = p.linkedinprofile
left join
events e
on
l.name = e.name
where
l.profilelink is not null
group by
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16),
all_events as (
select
*,
false as is_inferred_identity
from
events
union all
select
*
from
social_media
),
id_stitching as (
select
distinct anonymous_id as anonymous_id,
last_value(user_id ignore nulls) over (partition by anonymous_id order by timestamp rows between unbounded preceding and unbounded following ) as user_id,
min(timestamp) over (partition by anonymous_id ) as first_seen_at,
max(timestamp) over (partition by anonymous_id ) as last_seen_at
from
all_events ),
mapped as (
select
coalesce(i.user_id,
e.anonymous_id) as blended_user_id,
e.*
from
all_events e
left join
id_stitching i
using
(anonymous_id)
),
names_backfilled as (
select
* except (name,
email),
last_value(email ignore nulls) over (partition by blended_user_id order by timestamp rows between unbounded preceding and unbounded following ) as email,
last_value(name ignore nulls) over (partition by blended_user_id order by timestamp rows between unbounded preceding and unbounded following ) as name
from
mapped )
select
blended_user_id,
name,
email,
logical_or(is_inferred_identity) as is_inferred_identity,
array_agg(
struct(
timestamp,
event_type,
context_ip,
anonymous_id,
user_id,
is_inferred_identity,
context_page_path,
context_page_referrer,
context_page_title,
context_user_agent,
context_campaign_source,
context_campaign_medium,
context_campaign_source,
requirement)
order by timestamp
) event
from names_backfilled
group by 1,2,3
order by
1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment