-- tag: create churn model - tag ends/
-- 1. Split train and test data:
CREATE OR REPLACE TABLE `palringo-data-staging.model_v2.churn_prediction_test_data_w1` AS
SELECT *
FROM `palringo-data-staging.analytics.churn_prediction_w1`
WHERE
MOD(ABS(FARM_FINGERPRINT(CAST(user_id AS STRING))), 5) = 0
AND
first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY) -- user has 30 days to churn / not churn.
-- exclude outliers
and day123_messages <400
and w1_messages < 800
and w1_events_watched < 15
and w1_gr_join < 48
;
CREATE OR REPLACE TABLE `palringo-data-staging.model_v2.churn_prediction_train_data_w1` as
SELECT *
FROM
`palringo-data-staging.analytics.churn_prediction_w1`
WHERE
NOT user_id IN (
SELECT DISTINCT user_id FROM `palringo-data-staging.model_v2.churn_prediction_test_data_w1`
)
AND
first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY) -- user has 30 days to churn / not churn.
-- exclude outliers
and day123_messages <400
and w1_messages < 800
and w1_events_watched < 15
and w1_gr_join < 48
;
-- 2. Check that the random sample did the right thing:
SELECT
countif( churned_u=0)
, countif( churned_u=1)
, countif( churned_u=0) / countif( churned_u=1)
FROM
`palringo-data-staging.analytics.churn_prediction_w1`
WHERE
first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY)
and day123_messages <400
and w1_messages < 800
and w1_events_watched < 15
and w1_gr_join < 48
;
-- 3. Create base model
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_w1`
OPTIONS (
model_type='logistic_reg',
input_label_cols=['churned_u']
-- auto_class_weights = TRUE
) AS
SELECT
churned_u
, platform
, IsPremium
, tag
, w1_gr_join
, w1_chat
, w1_avatar
, w1_messages
, w1_events_watched
, w1_credit_purchase_cnt
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
;
-- Day 1,2 and 3 only model.
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_day123`
OPTIONS (
model_type='logistic_reg',
input_label_cols=['churned_u']
-- auto_class_weights = TRUE
) AS
SELECT
churned_u
, platform
, IsPremium
, tag
, day123_gr_join
, day123_chat
, day123_avatar
, day123_messages
, day123_events_watched
, day123_credit_purchase_cnt
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
;
-- activity only model
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_w1_activity_only`
OPTIONS (
model_type='logistic_reg',
input_label_cols=['churned_u']
-- auto_class_weights = TRUE
) AS
SELECT
churned_u
, w1_gr_join
, w1_chat
, w1_avatar
, w1_messages
, w1_events_watched
, w1_credit_purchase_cnt
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
;
-- Trying to deal with imbalanced dataset
-- data_split_method='seq',
-- data_split_col='w1_chat'
-- https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create-glm#auto_class_weights
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_w1_imbalanced`
OPTIONS (
model_type='logistic_reg',
input_label_cols=['churned_u'],
auto_class_weights = TRUE,
data_split_method='RANDOM'
) AS
SELECT
churned_u
, w1_gr_join
, w1_chat
, w1_avatar
FROM
`palringo-data-staging.analytics.churn_prediction_w1`
WHERE
first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY)
;
-- 4. Evaluate Model fit on train data
-- Base model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1`,
(
SELECT
churned_u
, platform
, IsPremium
, tag
, w1_gr_join
, w1_chat
, w1_avatar
, w1_messages
, w1_events_watched
, w1_credit_purchase_cnt
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
)
)
;
-- churn_model_day123
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_day123`,
(
SELECT
churned_u
, platform
, IsPremium
, tag
, day123_gr_join
, day123_chat
, day123_avatar
, day123_messages
, day123_events_watched
, day123_credit_purchase_cnt
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
)
)
;
-- activity only model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1_activity_only`,
(
SELECT
churned_u
, w1_gr_join
, w1_chat
, w1_avatar
, w1_messages
, w1_events_watched
, w1_credit_purchase_cnt
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
)
)
;
-- imbalanced model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1_imbalanced`,
(
SELECT
churned_u
, w1_gr_join
, w1_chat
, w1_avatar
FROM
`palringo-data-staging.model_v2.churn_prediction_train_data_w1`
)
)
;
-- 5. Model fit on test data
-- Base model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1`,
(
SELECT
churned_u
, platform
, IsPremium
, tag
, w1_gr_join
, w1_chat
, w1_avatar
FROM
`palringo-data-staging.model_v2.churn_prediction_test_data_w1`
)
)
;
-- Day 1,2,3 model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_day123`,
(
SELECT
churned_u
, platform
, IsPremium
, tag
, day123_gr_join
, day123_chat
, day123_avatar
FROM
`palringo-data-staging.model_v2.churn_prediction_test_data_w1`
)
)
;
-- 6. Display your linear beta-values
SELECT * from ML.WEIGHTS(MODEL `palringo-data-staging.model_v2.churn_model_w1`)
;
-- 7. To get test data confusion matrix
SELECT *
FROM ML.CONFUSION_MATRIX(MODEL `palringo-data-staging.model_v2.churn_model_w1`,
(
SELECT churned_u
, platform
, IsPremium
, tag
, w1_gr_join
, w1_chat
, w1_avatar
FROM
`palringo-data-staging.model_v2.churn_prediction_test_data_w1`
)
)
-- 8. Predictions:
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1`, (
SELECT
'8000111' user_id
, 0 churned_u
, 'IOS' platform
, 'True' IsPremium
, 'Staff' tag
, 0 w1_gr_join
, 3 w1_chat
, 0 w1_avatar
, 10 w1_messages
, 2 w1_events_watched
, 300 credit_purchase_cnt
))
;
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1`, (
SELECT
'8000111' user_id
, 0 churned_u
, 'IOS' platform
, 'True' IsPremium
, 'None' tag
, 1 w1_gr_join
, 13 w1_chat
, 2 w1_avatar
, 10 w1_messages
, 2 w1_events_watched
, 300 w1_credit_purchase_cnt
))
;
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1`, (
SELECT
'8000111' user_id
, 0 churned_u
, 'IOS' platform
, 'False' IsPremium
, 'None' tag
, 2 w1_gr_join
, 13 w1_chat
, 2 w1_avatar
, 10 w1_messages
, 2 w1_events_watched
, 300 w1_credit_purchase_cnt
))
;
-- activity only
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1_activity_only`, (
SELECT
'8000111' user_id
, 0 churned_u
, 2 w1_gr_join
, 13 w1_chat
, 2 w1_avatar
, 140 w1_messages
, 70 w1_events_watched
, 300 w1_credit_purchase_cnt
))
;
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_day123`, (
SELECT
'8000111' user_id
, 0 churned_u
, 'IOS' platform
, 'False' IsPremium
, 'None' tag
, 7 day123_gr_join
, 113 day123_chat
, 5 day123_avatar
, 120 day123_messages
, 2 day123_events_watched
, 300 day123_credit_purchase_cnt
))
;
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_day123`, (
SELECT
'8000111' user_id
, 0 churned_u
, 'IOS' platform
, 'True' IsPremium
, 'None' tag
, 7 day123_gr_join
, 113 day123_chat
, 5 day123_avatar
, 220 day123_messages
, 2 day123_events_watched
, 300 day123_credit_purchase_cnt
))
;