Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save quarterdome/c0e8c5c230190fd24b8a to your computer and use it in GitHub Desktop.
Save quarterdome/c0e8c5c230190fd24b8a to your computer and use it in GitHub Desktop.
-- This SQL snippet attributes an interest to traffic source and campaign.
-- Any interest event is attributed to FIRST traffic_source event for that
-- user.
with
-- first order traffic_sources per user, and number them with row_number
ordered_traffic_sources as (
select
*,
row_number() over
( partition by user_id
order by sent_at) as rn
from web.traffic_source
),
-- drop all traffic sources but first one
first_traffic_sources as (
select *
from ordered_traffic_sources
where rn = 1
),
-- Merge traffic_sources and interest events in one event pool.
-- * pad with 'null' every field that is not common between these two events
-- * add a new field 'type' with event type: interest or traffic_source.
merged as (
select
'traffic_source' as type,
sent_at,
user_id,
name as traffic_source_name,
campaign,
null as interest_id,
null as state,
null as action,
null as contract,
null as source_app
from first_traffic_sources
union
select
'interest' as type,
sent_at,
user_id,
null as traffic_source_name,
null as campaign,
interest_id,
state,
action,
contract,
source_app
from api.interest
where
source_app in ('TabletApartmentList', 'ApartmentList', 'MobileApartmentList')
and
sent_at > '2015-06-20' -- traffic sources where not recorded before 06/15, giving couple days buffer
),
-- Copy traffic_source campaign and name to all subsequent events.
attributed as (
select
*,
coalesce( campaign,
lag(campaign) ignore nulls over
( partition by user_id
order by sent_at ) ) as interest_campaign,
coalesce( traffic_source_name,
lag(traffic_source_name) ignore nulls over
( partition by user_id
order by sent_at ) ) as interest_traffic_source
from merged
)
-- Filter out traffic_source events, and leave only interests
select
user_id,
sent_at,
interest_id,
action,
state,
contract,
source_app,
interest_traffic_source as traffic_source,
interest_campaign as campaign
from attributed
where type = 'interest'
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment