Last active
August 29, 2015 14:25
-
-
Save quarterdome/292785b60a685a600cba to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This SQL snippet attributes an interest to traffic source. Any interest | |
-- event is attributed to previous traffic_source event. | |
with | |
-- Merge traffic_sources and interest events in one event pool. | |
-- * pad with 'null' every field that is not common between these two events | |
-- * add a new field 'is_first_event'; set it to 1 for traffic_source events | |
-- and to 0 for interest events | |
-- * add a new field 'type' with event type: interest or traffic_source. | |
merged as ( | |
select | |
'traffic_source' as type, | |
sent_at, | |
user_id, | |
name as traffic_source_name, | |
campaign, | |
null as interest_id, | |
null as state, | |
null as action, | |
null as contract, | |
null as source_app, | |
1 as is_first_event | |
from web.traffic_source | |
union | |
select | |
'interest' as type, | |
sent_at, | |
user_id, | |
null as traffic_source_name, | |
null as campaign, | |
interest_id, | |
state, | |
action, | |
contract, | |
source_app, | |
0 as is_first_event | |
from api.interest | |
where | |
source_app in ('TabletApartmentList', 'ApartmentList', 'MobileApartmentList') | |
), | |
-- Add a new column with session_id per user. Partition table by user id, | |
-- and calculare running sum of is_first_event. This gives you a unique id. | |
interest_with_session as ( | |
select | |
*, | |
sum(is_first_event) over | |
( partition by user_id | |
order by sent_at | |
rows between unbounded preceding and current row ) as session_id | |
from merged | |
), | |
-- Copy traffic_source campaign and name to all subsequent events. | |
-- * Now partition by user_id, and session_id | |
-- * Use first_value() window function, to copy the value from first | |
-- event in partition to all other events. | |
interest_with_attribution as ( | |
select | |
*, | |
first_value(campaign) over | |
( partition by user_id, session_id | |
order by sent_at | |
rows between unbounded preceding and current row) as interest_campaign, | |
first_value(traffic_source_name) over | |
( partition by user_id, session_id | |
order by sent_at | |
rows between unbounded preceding and current row) as interest_traffic_source | |
from interest_with_session | |
) | |
-- Now filter out irrelevant columns, and filter keep only interest rows. | |
select | |
sent_at, | |
user_id, | |
interest_id, | |
state, | |
action, | |
contract, | |
interest_traffic_source, | |
interest_campaign | |
from interest_with_attribution | |
where type = 'interest' | |
; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment