DECLARE MIN_ACTIVITY_DATE STRING;
SET MIN_ACTIVITY_DATE = '20220101'; -- Cut off date for subscriber activity.
create or replace table `palringo-data-staging.analytics.churn_prediction_w1` (
user_id STRING
,first_seen DATE
,last_seen DATE
,days_inactive INT64
,churned_u INT64
,w1_gr_join INT64
,day123_gr_join INT64
,w1_chat INT64
,day123_chat INT64
,w1_avatar INT64
,day123_avatar INT64
,tag STRING
,IsPremium STRING
,platform STRING
,day123_messages INT64
,w1_messages INT64
,day123_events_watched INT64
,w1_events_watched INT64
,day123_credit_purchase_cnt INT64
,w1_credit_purchase_cnt INT64
)
PARTITION BY DATE_TRUNC(first_seen, month)
CLUSTER BY user_id
;
truncate table `palringo-data-staging.analytics.churn_prediction_w1`
;
insert `palringo-data-staging.analytics.churn_prediction_w1`
with fs as (
SELECT
c.first_seen
, c.user_id
, c.last_seen
FROM
`palringo-data.analytics.churn` c
WHERE
first_seen >= PARSE_DATE('%Y%m%d', MIN_ACTIVITY_DATE) -- pick only users for this year and remove users with multiple accounts and devices
and
multiple_accounts = false -- this can be another model feature.
-- and
-- multiple_devices = false -- multiple devices is fine as often it would be a re-install.
and user_id is not null
)
-- user daily session data
, s as (
select
connection_dt as dt
, cast(subscriberId as string) as user_id
, sum(messagesTotal) as messages
, sum(array_length(eventsWatched)) as events_watched
from `palringo-data.analytics.user_daily_session`
where
connection_dt >= PARSE_DATE('%Y%m%d', MIN_ACTIVITY_DATE)
group by 1,2
)
-- credit spend
, c as (
select
date(ts) as dt
,cast(subscriberid as string) as user_id
,count(creditPurchaseItemId) as items
from `palringo-data.production.credit_purchase_ro`
where date(_partitiontime) >= PARSE_DATE('%Y%m%d', MIN_ACTIVITY_DATE)
group by 1,2
)
-- add user activity data
, ua as (
select
ua.dt
, ua.user_id
, ua.platform
, ua.tag
, ua.IsPremium
, ua.event_name
, ua.eventCount
, s.messages
, s.events_watched
, c.items as credit_purchase_cnt
from
`palringo-data.analytics.fb_event_aggregates` ua
left join s on
s.dt = ua.dt
and ua.user_id = s.user_id
left join c on
c.dt = ua.dt
and ua.user_id = c.user_id
where
ua.dt >= PARSE_DATE('%Y%m%d', MIN_ACTIVITY_DATE)
and ua.user_id is not null
)
, d as (
select
fs.user_id
, fs.first_seen
, fs.last_seen
, IF( fs.last_seen < DATE_SUB(current_date(),INTERVAL 30 DAY) , 1, 0) AS churned_u
, DATE_DIFF(current_date(), fs.last_seen, day) AS days_inactive
, ua.dt
, DATE_DIFF(ua.dt, fs.first_seen, DAY) AS day_number
, ua.event_name
, ua.eventCount
, ua.tag
, ua.IsPremium
, ua.platform
, ua.messages
, ua.events_watched
, ua.credit_purchase_cnt
from fs
left join ua on ua.user_id = fs.user_id
order by
1,2,4
)
select
user_id
, first_seen
, last_seen
, days_inactive -- wasn't active N days -> Churn?
, churned_u
-- event counts:
-- join group.
, sum(IF(day_number in (0,1,2,3,4,5,6) and upper(event_name) in ('JOIN_GROUP','GROUP_JOIN'), eventCount,0)) as w1_gr_join
, sum(IF(day_number in (0,1,2) and upper(event_name) in ('JOIN_GROUP','GROUP_JOIN'), eventCount,0)) as day123_gr_join
--Open_Chat:
, sum(IF(day_number in (0,1,2,3,4,5,6) and upper(event_name) in ('OPEN_CHAT'), eventCount,0)) as w1_chat
, sum(IF(day_number in (0,1,2) and upper(event_name) in ('OPEN_CHAT'), eventCount,0)) as d123_chat
, sum(IF(day_number in (0,1,2,3,4,5,6) and upper(event_name) in ('SET_AVATAR'), eventCount,0)) as w1_set_avatar
, sum(IF(day_number in (0,1,2) and upper(event_name) in ('SET_AVATAR'), eventCount,0)) as d123_set_avatar
, max(IFNULL(tag, 'None')) tag
, max(IFNULL(IsPremium, 'False')) IsPremium
, max(IFNULL(platform, 'unknown')) platform
-- messages ssent
, sum(IF(day_number in (0,1,2) , messages,0)) as d123_messages
, sum(IF(day_number in (0,1,2,3,4,5,6) , messages,0)) as w1_messages
-- events watched
, sum(IF(day_number in (0,1,2) , events_watched,0)) as d123_events_watched
, sum(IF(day_number in (0,1,2,3,4,5,6) , events_watched,0)) as w1_events_watched
-- credit purchase count
, sum(IF(day_number in (0,1,2) , credit_purchase_cnt,0)) as d123_credit_purchase_cnt
, sum(IF(day_number in (0,1,2,3,4,5,6) , credit_purchase_cnt,0)) as w1_credit_purchase_cnt
from d
where
first_seen >= PARSE_DATE('%Y%m%d', MIN_ACTIVITY_DATE)
and user_id not in (select cast(subscriber_id as string) FROM `palringo-data.production.bot_ro_bot_v`)
group by
user_id
, first_seen
, last_seen
, days_inactive -- wasn't active N days -> Churn?
, churned_u
order by
user_id
, days_inactive desc