Last active
August 29, 2015 14:26
-
-
Save quarterdome/c0e8c5c230190fd24b8a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This SQL snippet attributes an interest to traffic source and campaign. | |
-- Any interest event is attributed to FIRST traffic_source event for that | |
-- user. | |
with | |
-- first order traffic_sources per user, and number them with row_number | |
ordered_traffic_sources as ( | |
select | |
*, | |
row_number() over | |
( partition by user_id | |
order by sent_at) as rn | |
from web.traffic_source | |
), | |
-- drop all traffic sources but first one | |
first_traffic_sources as ( | |
select * | |
from ordered_traffic_sources | |
where rn = 1 | |
), | |
-- Merge traffic_sources and interest events in one event pool. | |
-- * pad with 'null' every field that is not common between these two events | |
-- * add a new field 'type' with event type: interest or traffic_source. | |
merged as ( | |
select | |
'traffic_source' as type, | |
sent_at, | |
user_id, | |
name as traffic_source_name, | |
campaign, | |
null as interest_id, | |
null as state, | |
null as action, | |
null as contract, | |
null as source_app | |
from first_traffic_sources | |
union | |
select | |
'interest' as type, | |
sent_at, | |
user_id, | |
null as traffic_source_name, | |
null as campaign, | |
interest_id, | |
state, | |
action, | |
contract, | |
source_app | |
from api.interest | |
where | |
source_app in ('TabletApartmentList', 'ApartmentList', 'MobileApartmentList') | |
and | |
sent_at > '2015-06-20' -- traffic sources where not recorded before 06/15, giving couple days buffer | |
), | |
-- Copy traffic_source campaign and name to all subsequent events. | |
attributed as ( | |
select | |
*, | |
coalesce( campaign, | |
lag(campaign) ignore nulls over | |
( partition by user_id | |
order by sent_at ) ) as interest_campaign, | |
coalesce( traffic_source_name, | |
lag(traffic_source_name) ignore nulls over | |
( partition by user_id | |
order by sent_at ) ) as interest_traffic_source | |
from merged | |
) | |
-- Filter out traffic_source events, and leave only interests | |
select | |
user_id, | |
sent_at, | |
interest_id, | |
action, | |
state, | |
contract, | |
source_app, | |
interest_traffic_source as traffic_source, | |
interest_campaign as campaign | |
from attributed | |
where type = 'interest' | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment