Last active
August 29, 2015 14:25
-
-
Save quarterdome/b62f9a05daff4754e0e9 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- This SQL snippet attributes an interest to traffic source and campaign. | |
-- Any interest event is attributed to previous traffic_source event. | |
-- | |
-- The approach used here, uses 'lag() ignore nulls'. This is supported | |
-- on Redshift, but not in PostgreSQL. This approach yields a shorter, | |
-- more readable, and likely faster query, but it is less portable. | |
with | |
-- Merge traffic_sources and interest events in one event pool. | |
-- * pad with 'null' every field that is not common between these two events | |
-- * add a new field 'type' with event type: interest or traffic_source. | |
merged as ( | |
select | |
'traffic_source' as type, | |
sent_at, | |
user_id, | |
name as traffic_source_name, | |
campaign, | |
null as interest_id, | |
null as state, | |
null as action, | |
null as contract, | |
null as source_app | |
from web.traffic_source | |
union | |
select | |
'interest' as type, | |
sent_at, | |
user_id, | |
null as traffic_source_name, | |
null as campaign, | |
interest_id, | |
state, | |
action, | |
contract, | |
source_app | |
from api.interest | |
where | |
source_app in ('TabletApartmentList', 'ApartmentList', 'MobileApartmentList') | |
and | |
sent_at > '2015-06-20' -- traffic sources where not recorded before 06/15, giving couple days buffer | |
), | |
-- Copy traffic_source campaign and name to all subsequent events. | |
attributed as ( | |
select | |
*, | |
coalesce( campaign, | |
lag(campaign) ignore nulls over | |
( partition by user_id | |
order by sent_at ) ) as interest_campaign, | |
coalesce( traffic_source_name, | |
lag(traffic_source_name) ignore nulls over | |
( partition by user_id | |
order by sent_at ) ) as interest_traffic_source | |
from merged | |
) | |
-- Filter out traffic_source events, and leave only interests | |
select | |
user_id, | |
sent_at, | |
interest_id, | |
action, | |
state, | |
contract, | |
source_app, | |
interest_traffic_source as traffic_source, | |
interest_campaign as campaign | |
from attributed | |
where type = 'interest' | |
; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment