Skip to content

Instantly share code, notes, and snippets.

@lacerogers20
Created March 28, 2022 20:07
Show Gist options
  • Save lacerogers20/20eaf3211c9130fe06ba1d96e9c28492 to your computer and use it in GitHub Desktop.
Save lacerogers20/20eaf3211c9130fe06ba1d96e9c28492 to your computer and use it in GitHub Desktop.
# checks for the ga4 table
DECLARE ga4_table_updated date;
#checks if the table has been updated and contains data within it
DECLARE secondary_table INT64;
#selects the date the updates start from
DECLARE secondary_table_update date;
# selects table name
DECLARE dest_table_name string;
# selects table name
DECLARE source_table_name string;
# selects date_var in your destination table
DECLARE date_var string;
/******************************************************/
#MANUAL UPDATES
/******************************************************/
SET
dest_table_name = 'testing.exist_tests' ;
SET
source_table_name = 'mydatasource.analytics_222222222' ;
SET
date_var = 'event_date';
/******************************************************/
#CHECK THE DESTINATION TABLE MAX DATE
/******************************************************/
EXECUTE IMMEDIATE (
'SELECT MAX('|| date_var ||' ) FROM '|| dest_table_name
) INTO secondary_table_update;
/******************************************************/
#CREATE VARIABLE 1 OR 0 TO RUN OR NOT DEPENDANT ON IF TABLE HAS ALREADY BEEN UPDATED
/******************************************************/
SET
secondary_table =
(SELECT
(CASE WHEN secondary_table_update = (DATE_SUB(DATE(current_date()), INTERVAL 1 day)) THEN 1 ELSE 0 END)) ;
/******************************************************/
#IF THE TABLE HAS NOT BEEN UPDATED TODAY IT WILL RUN THE REMAINDER OF THE CODE;
/******************************************************/
IF secondary_table =1 THEN
/******************************************************/
#THIS STEP CHECKS THE LATEST FILE AVAILABLE
/******************************************************/
EXECUTE IMMEDIATE (
'SELECT MAX(PARSE_DATE("%Y%m%d",'|| "REGEXP_REPLACE(table_name,r'[^0-9.]', ''))) AS date_value FROM `"|| source_table_name
||"`.INFORMATION_SCHEMA.TABLES WHERE table_name LIKE 'events_2%'" )
INTO ga4_table_updated;
/******************************************************/
# ONLY WILL RUN THIS PART IF THE GA4 TABLE IS GREATER THEN THE TABLE UPDATES
/******************************************************/
IF ga4_table_updated > secondary_table_update THEN
/******************************************************/
#ADD QUERY HERE (MUST HAVE A DATE VARIABLE)
/******************************************************/
CREATE OR REPLACE TABLE
testing.exist_tests AS
SELECT
DISTINCT DATE(TIMESTAMP_MICROS(event_timestamp)) AS event_date,
stream_id
FROM
`mydatasource.analytics_222222222.events_*`
where _table_suffix BETWEEN FORMAT_DATE('%Y%m%d', DATE_ADD(secondary_table_update, INTERVAL 1 DAY))
AND FORMAT_DATE('%Y%m%d',ga4_table_updated) ;
/******************************************************/
#ENDS THE IF;
/******************************************************/
END IF ;
END IF;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment