Last active
September 10, 2018 20:01
-
-
Save jonhilgart22/ace8b4136559604be122d1072beb6277 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- TRAINING --- | |
with messager_message_and_webpage as ( | |
select | |
mm.identity_id, | |
s.created_at as sub_start_date, | |
-- If the identity has subscribed, what emails did we send to them beforehand? | |
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then mm.messager_message_id else null end) as number_of_emails_sent, | |
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) and mm.email_outbound_link_opened_at is not null then 1 else null end) as number_of_emails_clicked, | |
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) and mm.email_outbound_sendgrid_first_open_at is not null then 1 else null end) as number_of_emails_opened, | |
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) and mm.user_replied_to_message is not null then 1 else null end) as number_of_emails_replied_to, | |
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_message_id else null end)as first_messager_message_id_sent, | |
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_message_id else null end) as last_messager_message_id_sent, | |
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_id else null end) as oldest_messager_campaign_sent, | |
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_id else null end) as newest_messager_campaign_sent, | |
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_variant_id else null end) as oldest_messager_campaign_variant_id_sent, | |
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_variant_id else null end) as newest_messager_campaign_variant_id_sent, | |
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) then messager_campaign_id else null end) as number_distinct_campaigns_sent | |
from analytics.messager_message_results mm | |
left join subscriptions s | |
-- find the emails sent before this user signed up | |
on s.user_id = mm.user_id | |
and s.created_at between getdate() - '8 months'::interval and getdate() - '3 months'::interval | |
where mm.messager_message_created_at between getdate() - '8 months'::interval and getdate() - '3 months'::interval -- training | |
group by 1,2 | |
), | |
webpage_opens as ( | |
select | |
u.user_id, | |
count(*) as n_webpages_opened | |
from user_events u | |
left join subscriptions s | |
on s.user_id = u.user_id | |
where | |
u.event_name = 'External Page Opened' | |
and u.created_at between getdate() - '8 months'::interval and coalesce(s.created_at - '1 day'::interval, getdate() - '3 months'::interval) | |
group by 1 | |
) | |
select | |
i.identity_id, | |
i.identity_name, | |
i.canonical_email, | |
i.identity_domain as canonical_domain, | |
i.known_by_user_count, | |
case when duplicate_of_identity_id is not null then 1.0 else 0.0 end as has_duplicate_identity, | |
i.email_address_valid, | |
i.identity_imported_from, | |
i.gmail_verified_domain, | |
-- bias towards older | |
case when f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate() - '3 months'::interval) is null then 99999 else f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate() - '3 months'::interval) end as months_sent_last_email_sent_by_identity_to_another_user, | |
f_time_interval('month', i.identity_created_at , getdate() - '3 months'::interval) as age_in_months_of_identity, | |
-- bias towards older | |
f_time_interval('day', coalesce(i.last_email_sent_at, '2017-01-01'), getdate() - '3 months'::interval) as days_since_last_email_sent, | |
case when i.identity_state is null then 'null' else i.identity_state end as identity_state, | |
case when i.identity_country is null then 'null' else i.identity_country end as identity_country, | |
case when i.identity_gender is null then 'null' else i.identity_gender end as identity_gender, | |
case when i.identity_employment_title is null then 'null' else i.identity_employment_title end as identity_employment_title, | |
case when coalesce(i.company_subindustry, i.company_industry) is null then 'null' else coalesce(i.company_subindustry, i.company_industry) end as company_industry, | |
company_employees, | |
coalesce(m.number_of_emails_sent, 0) as number_of_emails_sent, | |
coalesce(m.number_of_emails_clicked, 0) as number_of_emails_clicked, | |
coalesce(m.number_of_emails_opened, 0) as number_of_emails_opened, | |
coalesce(m.number_of_emails_replied_to, 0) as number_of_emails_replied_to, | |
coalesce(m.first_messager_message_id_sent, 0) as first_messager_message_id_sent, | |
coalesce(m.last_messager_message_id_sent, 0) as last_messager_message_id_sent, | |
coalesce(m.oldest_messager_campaign_sent, 0) as oldest_messager_campaign_sent, | |
coalesce(m.newest_messager_campaign_sent, 0) as newest_messager_campaign_sent, | |
coalesce(m.oldest_messager_campaign_variant_id_sent, 0) as oldest_messager_campaign_variant_id_sent, | |
coalesce(m.newest_messager_campaign_variant_id_sent, 0) as newest_messager_campaign_variant_id_sent, | |
coalesce(m.number_distinct_campaigns_sent, 0) as number_distinct_campaigns_sent, | |
-- pick up on these states before they happen, not during signup | |
case when first_survey_completed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_completed_survey, | |
case when first_user_created_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_created_user, | |
case when first_terms_agreed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_terms_agreed, | |
coalesce(w.n_webpages_opened, 0) as n_webpages_opened, | |
len(split_part(identity_name, ' ', 1)) as first_name_length, | |
len(split_part(identity_name, ' ', 2)) as last_name_length, | |
-- for training data | |
case when has_subscribed is True then True else False end as has_subscribed | |
from identity_info_and_state i | |
left join messager_message_and_webpage m | |
on i.identity_id = m.identity_id | |
left join webpage_opens w | |
on w.user_id = i.user_id | |
where | |
i.email_address_valid is not null | |
and i.has_unsubscribed_from_emails is False | |
and i.identity_name is not null | |
and i.identity_created_at between getdate() - '8 months'::interval and getdate() - '3 months'::interval-- training | |
------- TESTING ------- | |
with messager_message_and_webpage as ( | |
select | |
mm.identity_id, | |
s.created_at as sub_start_date, | |
-- If the identity has subscribed, what emails did we send to them beforehand? | |
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then mm.messager_message_id else null end) as number_of_emails_sent, | |
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) and mm.email_outbound_link_opened_at is not null then 1 else null end) as number_of_emails_clicked, | |
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) and mm.email_outbound_sendgrid_first_open_at is not null then 1 else null end) as number_of_emails_opened, | |
sum(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) and mm.user_replied_to_message is not null then 1 else null end) as number_of_emails_replied_to, | |
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_message_id else null end)as first_messager_message_id_sent, | |
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_message_id else null end) as last_messager_message_id_sent, | |
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_id else null end) as oldest_messager_campaign_sent, | |
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_id else null end) as newest_messager_campaign_sent, | |
min(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_variant_id else null end) as oldest_messager_campaign_variant_id_sent, | |
max(case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_variant_id else null end) as newest_messager_campaign_variant_id_sent, | |
count(distinct case when mm.messager_message_created_at <= coalesce(s.created_at - '1 day'::interval, getdate()) then messager_campaign_id else null end) as number_distinct_campaigns_sent | |
from analytics.messager_message_results mm | |
left join subscriptions s | |
-- find the emails sent before this user signed up | |
on s.user_id = mm.user_id | |
and s.created_at between getdate() - '3 months'::interval and getdate() | |
where mm.messager_message_created_at between getdate() - '3 months'::interval and getdate() -- training | |
group by 1,2 | |
), | |
webpage_opens as ( | |
select | |
u.user_id, | |
count(*) as n_webpages_opened | |
from user_events u | |
left join subscriptions s | |
on s.user_id = u.user_id | |
where | |
u.event_name = 'External Page Opened' | |
and u.created_at between getdate() - '3 months'::interval and coalesce(s.created_at - '1 day'::interval, getdate()) | |
group by 1 | |
) | |
select | |
i.identity_id, | |
i.identity_name, | |
i.canonical_email, | |
i.identity_domain as canonical_domain, | |
i.known_by_user_count, | |
case when duplicate_of_identity_id is not null then 1.0 else 0.0 end as has_duplicate_identity, | |
i.email_address_valid, | |
i.identity_imported_from, | |
i.gmail_verified_domain, | |
-- bias towards older | |
case when f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate()) is null then 99999 else f_time_interval('month', i.last_email_sent_by_identity_to_another_user_at, getdate()) end as months_sent_last_email_sent_by_identity_to_another_user, | |
f_time_interval('month', i.identity_created_at , getdate()) as age_in_months_of_identity, | |
-- bias towards older | |
f_time_interval('day', coalesce(i.last_email_sent_at, '2017-01-01'), getdate()) as days_since_last_email_sent, | |
case when i.identity_state is null then 'null' else i.identity_state end as identity_state, | |
case when i.identity_country is null then 'null' else i.identity_country end as identity_country, | |
case when i.identity_gender is null then 'null' else i.identity_gender end as identity_gender, | |
case when i.identity_employment_title is null then 'null' else i.identity_employment_title end as identity_employment_title, | |
case when coalesce(i.company_subindustry, i.company_industry) is null then 'null' else coalesce(i.company_subindustry, i.company_industry) end as company_industry, | |
company_employees, | |
coalesce(m.number_of_emails_sent, 0) as number_of_emails_sent, | |
coalesce(m.number_of_emails_clicked, 0) as number_of_emails_clicked, | |
coalesce(m.number_of_emails_opened, 0) as number_of_emails_opened, | |
coalesce(m.number_of_emails_replied_to, 0) as number_of_emails_replied_to, | |
coalesce(m.first_messager_message_id_sent, 0) as first_messager_message_id_sent, | |
coalesce(m.last_messager_message_id_sent, 0) as last_messager_message_id_sent, | |
coalesce(m.oldest_messager_campaign_sent, 0) as oldest_messager_campaign_sent, | |
coalesce(m.newest_messager_campaign_sent, 0) as newest_messager_campaign_sent, | |
coalesce(m.oldest_messager_campaign_variant_id_sent, 0) as oldest_messager_campaign_variant_id_sent, | |
coalesce(m.newest_messager_campaign_variant_id_sent, 0) as newest_messager_campaign_variant_id_sent, | |
coalesce(m.number_distinct_campaigns_sent, 0) as number_distinct_campaigns_sent, | |
-- pick up on these states before they happen, not during signup | |
case when first_survey_completed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_completed_survey, | |
case when first_user_created_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_created_user, | |
case when first_terms_agreed_event_at is not null and first_survey_completed_event_at <= sub_start_date - '1 days'::interval then 1.0 else 0.0 end as has_terms_agreed, | |
coalesce(w.n_webpages_opened, 0) as n_webpages_opened, | |
len(split_part(identity_name, ' ', 1)) as first_name_length, | |
len(split_part(identity_name, ' ', 2)) as last_name_length, | |
-- for training/test data | |
case when has_subscribed is True then True else False end as has_subscribed | |
from identity_info_and_state i | |
left join messager_message_and_webpage m | |
on i.identity_id = m.identity_id | |
left join webpage_opens w | |
on w.user_id = i.user_id | |
where | |
i.email_address_valid is not null | |
and i.has_unsubscribed_from_emails is False | |
and i.identity_name is not null | |
and i.identity_created_at between getdate() - '3 months'::interval and getdate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment