Mike Shakhomirov mshakhomirov

## 20230209_export_data.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mshakhomirov
                / 20230209_export_data.md
            
            
              Created
              March 7, 2023 14:36
            
          
    CREATE OR REPLACE EXTERNAL TABLE source.custom_hive_partitioned_table
WITH PARTITION COLUMNS (
dt STRING, -- column order must match the external path
lang STRING)
OPTIONS (
uris = ['gs://events-export-avro/public-project/avro_external_test/*'],
format = 'AVRO',
hive_partition_uri_prefix = 'gs://events-export-avro/public-project/avro_external_test',
require_hive_partition_filter = false);

  
## simple_stack_with_lambda.yaml
AWSTemplateFormatVersion: '2010-09-09'
Description: AWS S3 data lake stack.
Parameters:

  SourceDataBucketName:
    Description: Data lake bucket with source data files.
    Type: String
    Default: datalake.staging.aws

Resources:

## simple_stack_with_lambda.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mshakhomirov
                / simple_stack_with_lambda.md
            
            
              Created
              March 6, 2023 15:09
            
          
    AWSTemplateFormatVersion: '2010-09-09'
Description: AWS S3 data lake stack.
Parameters:
SourceDataBucketName:
Description: Data lake bucket with source data files.
Type: String
Default: datalake.staging.aws
Resources:

  
## simple_stack_with_lambda.yaml
AWSTemplateFormatVersion: '2010-09-09'
Description: AWS S3 data lake stack.
Parameters:

  SourceDataBucketName:
    Description: Data lake bucket with source data files.
    Type: String
    Default: datalake.staging.aws

  StackPackageS3Key:

## 20220109_deleted_barred_subscribers.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mshakhomirov
                / 20220109_deleted_barred_subscribers.md
            
            
              Created
              January 11, 2023 09:13
            
          
    Full delete

curl -v -X DELETE "http://subscriber.test.palringo.aws/subscriber/78034896/delete" -H "Content-Type:application/json" -H "Palringo-RequesterId:78034896" --data '{"reason":"test", "author": 78034896, "source": 78034896}' 

curl -v -X DELETE "http://subscriber.test.palringo.aws/subscriber/78034896/delete" -H "Content-Type:application/json" -H "Palringo-RequesterId:78034896" --data '{"reason":"test", "author": 78034896, "source": 78034896}' 
Undo full delete

curl -v -X POST "http://subscriber.test.palringo.aws/subscriber/1/undelete" -H "Content-Type:application/json" -H "Palringo-RequesterId:78034896"

  
## 20230110_testing_dryRun.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mshakhomirov
                / 20230110_testing_dryRun.md
            
            
              Created
              January 10, 2023 09:06
            
          
    DryRun testing in staging

curl -v -X PUT "http://bigquery-data.test.palringo.aws/job/create/" -H "Content-Type:application/json" -H "Palringo-RequesterId:78034896" --data '{"criteria": {"groupMembership": {"idList": [896]}}, "pageSize": 10,"code": "myJobId202301100857", "dryRun": true}'
curl -v GET http://bigquery-data.test.palringo.aws/job/myJobId202301100857/1

  
## 20221207_new_churn_model_w1.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mshakhomirov
                / 20221207_new_churn_model_w1.md
            
            
              Created
              December 9, 2022 10:27
            
          
    New churn_model based on subscriber_id (not deviced)

-- tag: create churn model - tag ends/

-- 1. Split train and test data:
CREATE OR REPLACE TABLE `palringo-data-staging.model_v2.churn_prediction_test_data_w1` AS 
SELECT *
FROM `palringo-data-staging.analytics.churn_prediction_w1`
WHERE 

  
## 20221207_new_churn_prediction_dataset.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                mshakhomirov
                / 20221207_new_churn_prediction_dataset.md
            
            
              Created
              December 9, 2022 10:18
            
          
    Suggested tweaks for churn model / dataset churn_predictions:


Change last_seen_d_u (based on device) to last_seen (subscriber_id) > churned_u
Use d1, d2, d0 for one model and week_1 activity for another for model training and prediction.
Change MIN_ACTIVITY_DATE = '20220501'
Remove user_pseudo_id and use user_id
added features from daily_session table
added credit spend
excluded bots
excluded outliers


## sh.sh
dataform run --tags="unit_tests" --include-deps=true --schema-suffix=tests

## cange.sql
...
select
     cast(json_extract(src,'$.user_id') as string)                as user_id
...
	AWSTemplateFormatVersion: '2010-09-09'
	Description: AWS S3 data lake stack.
	Parameters:

	SourceDataBucketName:
	Description: Data lake bucket with source data files.
	Type: String
	Default: datalake.staging.aws

	Resources:
	...
	select
	cast(json_extract(src,'$.user_id') as string) as user_id
	...