Skip to content

Instantly share code, notes, and snippets.

@jonhilgart22
Last active September 10, 2018 20:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jonhilgart22/ace8b4136559604be122d1072beb6277 to your computer and use it in GitHub Desktop.
Save jonhilgart22/ace8b4136559604be122d1072beb6277 to your computer and use it in GitHub Desktop.
-- TRAINING ---
with messager_message_and_webpage as (
select
mm.identity_id,
s.created_at as sub_start_date,
-- If the identity has subscribed, what emails did we send to them beforehand?
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then mm.messager_message_id else null end) as number_of_emails_sent,
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) and mm.email_outbound_link_opened_at is not null then 1 else null end) as number_of_emails_clicked,
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) and mm.email_outbound_sendgrid_first_open_at is not null then 1 else null end) as number_of_emails_opened,
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) and mm.user_replied_to_message is not null then 1 else null end) as number_of_emails_replied_to,
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_message_id else null end)as first_messager_message_id_sent,
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_message_id else null end) as last_messager_message_id_sent,
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_id else null end) as oldest_messager_campaign_sent,
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_id else null end) as newest_messager_campaign_sent,
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_variant_id else null end) as oldest_messager_campaign_variant_id_sent,
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_variant_id else null end) as newest_messager_campaign_variant_id_sent,
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_id else null end) as number_distinct_campaigns_sent
from analytics.messager_message_results mm
left join subscriptions s
-- find the emails sent before this user signed up
on s.user_id = mm.user_id
and s.created_at between getdate() - '8 months'::interval and getdate() - '3 months'::interval
where mm.messager_message_created_at between getdate() - '8 months'::interval and getdate() - '3 months'::interval -- training
group by 1,2
),
webpage_opens as (
select
u.user_id,
count(*) as n_webpages_opened
from user_events u
left join subscriptions s
on s.user_id = u.user_id
where
u.event_name = 'External Page Opened'
and u.created_at between getdate() - '8 months'::interval and coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval)
group by 1
)
select
i.identity_id,
i.identity_name,
i.canonical_email,
i.identity_domain as canonical_domain,
i.known_by_user_count,
case when duplicate_of_identity_id is not null then 1.0 else 0.0 end as has_duplicate_identity,
i.email_address_valid,
i.identity_imported_from,
i.gmail_verified_domain,
-- bias towards older
case when f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate() - '3 months'::interval) is null then 99999 else f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate() - '3 months'::interval) end as months_sent_last_email_sent_by_identity_to_another_user,
f_time_interval('month', i.identity_created_at , getdate() - '3 months'::interval) as age_in_months_of_identity,
-- bias towards older
f_time_interval('day', coalesce(i.last_email_sent_at, '2017-01-01'), getdate() - '3 months'::interval) as days_since_last_email_sent,
case when i.identity_state is null then 'null' else i.identity_state end as identity_state,
case when i.identity_country is null then 'null' else i.identity_country end as identity_country,
case when i.identity_gender is null then 'null' else i.identity_gender end as identity_gender,
case when i.identity_employment_title is null then 'null' else i.identity_employment_title end as identity_employment_title,
case when coalesce(i.company_subindustry, i.company_industry) is null then 'null' else coalesce(i.company_subindustry, i.company_industry) end as company_industry,
company_employees,
coalesce(m.number_of_emails_sent, 0) as number_of_emails_sent,
coalesce(m.number_of_emails_clicked, 0) as number_of_emails_clicked,
coalesce(m.number_of_emails_opened, 0) as number_of_emails_opened,
coalesce(m.number_of_emails_replied_to, 0) as number_of_emails_replied_to,
coalesce(m.first_messager_message_id_sent, 0) as first_messager_message_id_sent,
coalesce(m.last_messager_message_id_sent, 0) as last_messager_message_id_sent,
coalesce(m.oldest_messager_campaign_sent, 0) as oldest_messager_campaign_sent,
coalesce(m.newest_messager_campaign_sent, 0) as newest_messager_campaign_sent,
coalesce(m.oldest_messager_campaign_variant_id_sent, 0) as oldest_messager_campaign_variant_id_sent,
coalesce(m.newest_messager_campaign_variant_id_sent, 0) as newest_messager_campaign_variant_id_sent,
coalesce(m.number_distinct_campaigns_sent, 0) as number_distinct_campaigns_sent,
-- pick up on these states before they happen, not during signup
case when first_survey_completed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_completed_survey,
case when first_user_created_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_created_user,
case when first_terms_agreed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_terms_agreed,
coalesce(w.n_webpages_opened, 0) as n_webpages_opened,
len(split_part(identity_name, ' ', 1)) as first_name_length,
len(split_part(identity_name, ' ', 2)) as last_name_length,
-- for training data
case when has_subscribed is True then True else False end as has_subscribed
from identity_info_and_state i
left join messager_message_and_webpage m
on i.identity_id = m.identity_id
left join webpage_opens w
on w.user_id = i.user_id
where
i.email_address_valid is not null
and i.has_unsubscribed_from_emails is False
and i.identity_name is not null
and i.identity_created_at between getdate() - '8 months'::interval and getdate() - '3 months'::interval-- training
------- TESTING -------
with messager_message_and_webpage as (
select
mm.identity_id,
s.created_at as sub_start_date,
-- If the identity has subscribed, what emails did we send to them beforehand?
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then mm.messager_message_id else null end) as number_of_emails_sent,
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) and mm.email_outbound_link_opened_at is not null then 1 else null end) as number_of_emails_clicked,
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) and mm.email_outbound_sendgrid_first_open_at is not null then 1 else null end) as number_of_emails_opened,
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) and mm.user_replied_to_message is not null then 1 else null end) as number_of_emails_replied_to,
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_message_id else null end)as first_messager_message_id_sent,
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_message_id else null end) as last_messager_message_id_sent,
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_id else null end) as oldest_messager_campaign_sent,
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_id else null end) as newest_messager_campaign_sent,
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_variant_id else null end) as oldest_messager_campaign_variant_id_sent,
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_variant_id else null end) as newest_messager_campaign_variant_id_sent,
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_id else null end) as number_distinct_campaigns_sent
from analytics.messager_message_results mm
left join subscriptions s
-- find the emails sent before this user signed up
on s.user_id = mm.user_id
and s.created_at between getdate() - '3 months'::interval and getdate()
where mm.messager_message_created_at between getdate() - '3 months'::interval and getdate() -- training
group by 1,2
),
webpage_opens as (
select
u.user_id,
count(*) as n_webpages_opened
from user_events u
left join subscriptions s
on s.user_id = u.user_id
where
u.event_name = 'External Page Opened'
and u.created_at between getdate() - '3 months'::interval and coalesce(s.created_at - '1 day'::interval, getdate())
group by 1
)
select
i.identity_id,
i.identity_name,
i.canonical_email,
i.identity_domain as canonical_domain,
i.known_by_user_count,
case when duplicate_of_identity_id is not null then 1.0 else 0.0 end as has_duplicate_identity,
i.email_address_valid,
i.identity_imported_from,
i.gmail_verified_domain,
-- bias towards older
case when f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate()) is null then 99999 else f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate()) end as months_sent_last_email_sent_by_identity_to_another_user,
f_time_interval('month', i.identity_created_at , getdate()) as age_in_months_of_identity,
-- bias towards older
f_time_interval('day', coalesce(i.last_email_sent_at, '2017-01-01'), getdate()) as days_since_last_email_sent,
case when i.identity_state is null then 'null' else i.identity_state end as identity_state,
case when i.identity_country is null then 'null' else i.identity_country end as identity_country,
case when i.identity_gender is null then 'null' else i.identity_gender end as identity_gender,
case when i.identity_employment_title is null then 'null' else i.identity_employment_title end as identity_employment_title,
case when coalesce(i.company_subindustry, i.company_industry) is null then 'null' else coalesce(i.company_subindustry, i.company_industry) end as company_industry,
company_employees,
coalesce(m.number_of_emails_sent, 0) as number_of_emails_sent,
coalesce(m.number_of_emails_clicked, 0) as number_of_emails_clicked,
coalesce(m.number_of_emails_opened, 0) as number_of_emails_opened,
coalesce(m.number_of_emails_replied_to, 0) as number_of_emails_replied_to,
coalesce(m.first_messager_message_id_sent, 0) as first_messager_message_id_sent,
coalesce(m.last_messager_message_id_sent, 0) as last_messager_message_id_sent,
coalesce(m.oldest_messager_campaign_sent, 0) as oldest_messager_campaign_sent,
coalesce(m.newest_messager_campaign_sent, 0) as newest_messager_campaign_sent,
coalesce(m.oldest_messager_campaign_variant_id_sent, 0) as oldest_messager_campaign_variant_id_sent,
coalesce(m.newest_messager_campaign_variant_id_sent, 0) as newest_messager_campaign_variant_id_sent,
coalesce(m.number_distinct_campaigns_sent, 0) as number_distinct_campaigns_sent,
-- pick up on these states before they happen, not during signup
case when first_survey_completed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_completed_survey,
case when first_user_created_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_created_user,
case when first_terms_agreed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_terms_agreed,
coalesce(w.n_webpages_opened, 0) as n_webpages_opened,
len(split_part(identity_name, ' ', 1)) as first_name_length,
len(split_part(identity_name, ' ', 2)) as last_name_length,
-- for training/test data
case when has_subscribed is True then True else False end as has_subscribed
from identity_info_and_state i
left join messager_message_and_webpage m
on i.identity_id = m.identity_id
left join webpage_opens w
on w.user_id = i.user_id
where
i.email_address_valid is not null
and i.has_unsubscribed_from_emails is False
and i.identity_name is not null
and i.identity_created_at between getdate() - '3 months'::interval and getdate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment