Skip to content

Instantly share code, notes, and snippets.

@mshakhomirov
Created December 9, 2022 10:27
Show Gist options
  • Save mshakhomirov/a92cb344d7bcd5c9f73f75926b95e2f5 to your computer and use it in GitHub Desktop.
Save mshakhomirov/a92cb344d7bcd5c9f73f75926b95e2f5 to your computer and use it in GitHub Desktop.

New churn_model based on subscriber_id (not deviced)

-- tag: create churn model - tag ends/

-- 1. Split train and test data:
CREATE OR REPLACE TABLE `palringo-data-staging.model_v2.churn_prediction_test_data_w1` AS 
SELECT *
FROM `palringo-data-staging.analytics.churn_prediction_w1`
WHERE 
    MOD(ABS(FARM_FINGERPRINT(CAST(user_id AS STRING))), 5) = 0
    AND 
    first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY) -- user has 30 days to churn / not churn.
-- exclude outliers
and day123_messages <400
and w1_messages < 800
and w1_events_watched < 15
and w1_gr_join < 48
;

CREATE OR REPLACE TABLE `palringo-data-staging.model_v2.churn_prediction_train_data_w1` as
SELECT *
FROM
    `palringo-data-staging.analytics.churn_prediction_w1`
WHERE 
    NOT user_id IN (
    SELECT DISTINCT user_id FROM `palringo-data-staging.model_v2.churn_prediction_test_data_w1`
    )
AND 
    first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY) -- user has 30 days to churn / not churn.
-- exclude outliers
and day123_messages <400
and w1_messages < 800
and w1_events_watched < 15
and w1_gr_join < 48
;

-- 2. Check that the random sample did the right thing:

SELECT  
      countif( churned_u=0)
    , countif( churned_u=1)
    , countif( churned_u=0)   /  countif( churned_u=1)
FROM
    `palringo-data-staging.analytics.churn_prediction_w1`
WHERE 
first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY)
and day123_messages <400
and w1_messages < 800
and w1_events_watched < 15
and w1_gr_join < 48
;

-- 3. Create base model
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_w1`
OPTIONS (
    model_type='logistic_reg',
    input_label_cols=['churned_u']
    -- auto_class_weights = TRUE

) AS

SELECT 
      churned_u
    , platform
    , IsPremium
    , tag
    , w1_gr_join
    , w1_chat
    , w1_avatar
    , w1_messages
    , w1_events_watched
    , w1_credit_purchase_cnt

FROM
    `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
;

-- Day 1,2 and 3 only model.
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_day123`
OPTIONS (
    model_type='logistic_reg',
    input_label_cols=['churned_u']
    -- auto_class_weights = TRUE

) AS

SELECT 
      churned_u
    , platform
    , IsPremium
    , tag
    , day123_gr_join
    , day123_chat
    , day123_avatar
    , day123_messages
    , day123_events_watched
    , day123_credit_purchase_cnt

FROM
    `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
;

-- activity only model
CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_w1_activity_only`
OPTIONS (
    model_type='logistic_reg',
    input_label_cols=['churned_u']
    -- auto_class_weights = TRUE

) AS

SELECT 
      churned_u
    , w1_gr_join
    , w1_chat
    , w1_avatar
    , w1_messages
    , w1_events_watched
    , w1_credit_purchase_cnt

FROM
    `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
;

-- Trying to deal with imbalanced dataset
    -- data_split_method='seq',
    -- data_split_col='w1_chat'
-- https://cloud.google.com/bigquery-ml/docs/reference/standard-sql/bigqueryml-syntax-create-glm#auto_class_weights

CREATE OR REPLACE MODEL `palringo-data-staging.model_v2.churn_model_w1_imbalanced`
OPTIONS (
    model_type='logistic_reg',
    input_label_cols=['churned_u'],
    auto_class_weights = TRUE, 
    data_split_method='RANDOM'


) AS

SELECT 
        churned_u
        , w1_gr_join
        , w1_chat
        , w1_avatar
FROM
    `palringo-data-staging.analytics.churn_prediction_w1`
WHERE
    first_seen < DATE_SUB(current_date(),INTERVAL 30 DAY)
;





-- 4. Evaluate Model fit on train data
-- Base model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1`,
    (
    SELECT 
        churned_u
        , platform
        , IsPremium
        , tag
        , w1_gr_join
        , w1_chat
        , w1_avatar
        , w1_messages
        , w1_events_watched
        , w1_credit_purchase_cnt
    FROM
        `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
    )
)
;

-- churn_model_day123
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_day123`,
    (
    SELECT 
        churned_u
        , platform
        , IsPremium
        , tag
        , day123_gr_join
        , day123_chat
        , day123_avatar
        , day123_messages
        , day123_events_watched
        , day123_credit_purchase_cnt
    FROM
        `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
    )
)
;

-- activity only model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1_activity_only`,
    (
    SELECT 
    churned_u
    , w1_gr_join
    , w1_chat
    , w1_avatar
    , w1_messages
    , w1_events_watched
    , w1_credit_purchase_cnt
    FROM
        `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
    )
)
;
-- imbalanced model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1_imbalanced`,
    (
    SELECT 
        churned_u
        , w1_gr_join
        , w1_chat
        , w1_avatar
    FROM
        `palringo-data-staging.model_v2.churn_prediction_train_data_w1`
    )
)
;

-- 5. Model fit on test data
-- Base model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_w1`,
    (
    SELECT 
          churned_u
        , platform
        , IsPremium
        , tag
        , w1_gr_join
        , w1_chat
        , w1_avatar
    FROM
        `palringo-data-staging.model_v2.churn_prediction_test_data_w1`
    )
)
;

-- Day 1,2,3 model
SELECT *
FROM ML.EVALUATE(MODEL `palringo-data-staging.model_v2.churn_model_day123`,
    (
    SELECT 
          churned_u
        , platform
        , IsPremium
        , tag
        , day123_gr_join
        , day123_chat
        , day123_avatar
    FROM
        `palringo-data-staging.model_v2.churn_prediction_test_data_w1`
    )
)
;

-- 6. Display your linear beta-values
SELECT * from ML.WEIGHTS(MODEL `palringo-data-staging.model_v2.churn_model_w1`)
;

-- 7. To get test data confusion matrix
SELECT *
FROM ML.CONFUSION_MATRIX(MODEL `palringo-data-staging.model_v2.churn_model_w1`,
    (
  SELECT churned_u
, platform
, IsPremium
, tag
, w1_gr_join
, w1_chat
, w1_avatar
  FROM
    `palringo-data-staging.model_v2.churn_prediction_test_data_w1`
    )
)

-- 8. Predictions:

SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1`, (
SELECT 
 '8000111'                  user_id
, 0                         churned_u
, 'IOS'                     platform
, 'True'                    IsPremium
, 'Staff'                   tag
, 0                         w1_gr_join
, 3                         w1_chat
, 0                         w1_avatar
, 10                        w1_messages
, 2                         w1_events_watched
, 300                       credit_purchase_cnt
))
;

SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1`, (
SELECT 
 '8000111'                  user_id
, 0                         churned_u
, 'IOS'                     platform
, 'True'                    IsPremium
, 'None'                   tag
, 1                         w1_gr_join
, 13                         w1_chat
, 2                         w1_avatar
, 10                        w1_messages
, 2                         w1_events_watched
, 300                       w1_credit_purchase_cnt
))
;
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1`, (
SELECT 
 '8000111'                  user_id
, 0                         churned_u
, 'IOS'                     platform
, 'False'                   IsPremium
, 'None'                    tag
, 2                         w1_gr_join
, 13                        w1_chat
, 2                         w1_avatar
, 10                        w1_messages
, 2                         w1_events_watched
, 300                       w1_credit_purchase_cnt
))
;

-- activity only
SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_w1_activity_only`, (
SELECT 
 '8000111'                  user_id
, 0                         churned_u
, 2                         w1_gr_join
, 13                        w1_chat
, 2                         w1_avatar
, 140                       w1_messages
, 70                        w1_events_watched
, 300                       w1_credit_purchase_cnt
))
;



SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_day123`, (
SELECT 
 '8000111'                  user_id
, 0                         churned_u
, 'IOS'                     platform
, 'False'                   IsPremium
, 'None'                    tag
, 7                         day123_gr_join
, 113                       day123_chat
, 5                         day123_avatar
, 120                       day123_messages
, 2                         day123_events_watched
, 300                       day123_credit_purchase_cnt
))
;

SELECT * FROM ML.PREDICT(MODEL `palringo-data-staging.model_v2.churn_model_day123`, (
SELECT 
 '8000111'                  user_id
, 0                         churned_u
, 'IOS'                     platform
, 'True'                   IsPremium
, 'None'                    tag
, 7                         day123_gr_join
, 113                       day123_chat
, 5                         day123_avatar
, 220                       day123_messages
, 2                         day123_events_watched
, 300                       day123_credit_purchase_cnt
))
;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment