Last active
June 11, 2021 14:47
-
-
Save cinaglia/da3be250140ef8fcb3fb5b62137f999b to your computer and use it in GitHub Desktop.
Cloud SQL & BigQuery Sync
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Check if table exists. If it does, compare schemas. | |
-- If schema is equal, then perform updates | |
-- If schema is different, CREATE OR REPLACE | |
-- Store schema for comparison | |
CREATE OR REPLACE TABLE `temp.users_schema` AS | |
SELECT * FROM EXTERNAL_QUERY( | |
'bigquery-exploration-278323.us.bigquery-exploration', | |
'DESCRIBE users;' | |
) ORDER BY Field; | |
-- Compare remote schema with current definition | |
-- https://medium.com/google-cloud/bigquery-table-comparison-cea802a3c64d | |
WITH | |
LeftData AS ( | |
SELECT | |
a AS data, | |
FARM_FINGERPRINT(FORMAT("%T", a)) AS h | |
FROM | |
`temp.users_schema` AS a | |
), | |
RightData AS ( | |
SELECT | |
b AS data, | |
FARM_FINGERPRINT(FORMAT("%T", b)) AS h | |
FROM | |
( | |
SELECT * FROM EXTERNAL_QUERY( | |
'bigquery-exploration-278323.us.bigquery-exploration', | |
'DESCRIBE users;' | |
) ORDER BY Field | |
) AS b | |
) | |
SELECT | |
IF(l.h IS NULL,"New on right","New on left") AS Change, | |
IF(l.h IS NULL,r.data,l.data).* | |
FROM | |
LeftData l | |
FULL OUTER JOIN RightData r | |
ON l.h = r.h | |
WHERE | |
l.h IS NULL OR | |
r.h IS NULL; | |
-- Initial data load, auto-discovers table schema | |
CREATE OR REPLACE TABLE `temp.users` AS | |
SELECT * FROM EXTERNAL_QUERY( | |
'bigquery-exploration-278323.us.bigquery-exploration', | |
'SELECT * FROM users;' | |
); | |
-- Verify users were created | |
SELECT * FROM `temp.users`; | |
-- Get max updated time, can be optimized by storing it somewhere | |
SELECT FORMAT_TIMESTAMP('%Y-%m-%d %H:%M:%S', MAX(created_at)) FROM `temp.users`; | |
-- Insert records for all records updated since the last update | |
INSERT INTO `temp.users` | |
SELECT * FROM EXTERNAL_QUERY( | |
'bigquery-exploration-278323.us.bigquery-exploration', | |
'SELECT * FROM users WHERE created_at > "2020-05-26 02:32:06"' | |
); | |
-- Create View if schema has changed | |
SELECT * | |
FROM ( | |
SELECT | |
*, | |
ROW_NUMBER() OVER (PARTITION BY id ORDER BY created_at DESC) rownum | |
FROM | |
`temp.users` | |
) | |
WHERE rownum = 1; | |
-- Select from view | |
SELECT * FROM `temp.users_view`; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment