Skip to content

Instantly share code, notes, and snippets.

@cinaglia
Last active June 11, 2021 14:47
Show Gist options
  • Save cinaglia/da3be250140ef8fcb3fb5b62137f999b to your computer and use it in GitHub Desktop.
Save cinaglia/da3be250140ef8fcb3fb5b62137f999b to your computer and use it in GitHub Desktop.
Cloud SQL & BigQuery Sync
-- Check if table exists. If it does, compare schemas.
-- If schema is equal, then perform updates
-- If schema is different, CREATE OR REPLACE
-- Store schema for comparison
CREATE OR REPLACE TABLE `temp.users_schema` AS
SELECT * FROM EXTERNAL_QUERY(
'bigquery-exploration-278323.us.bigquery-exploration',
'DESCRIBE users;'
) ORDER BY Field;
-- Compare remote schema with current definition
-- https://medium.com/google-cloud/bigquery-table-comparison-cea802a3c64d
WITH
LeftData AS (
SELECT
a AS data,
FARM_FINGERPRINT(FORMAT("%T", a)) AS h
FROM
`temp.users_schema` AS a
),
RightData AS (
SELECT
b AS data,
FARM_FINGERPRINT(FORMAT("%T", b)) AS h
FROM
(
SELECT * FROM EXTERNAL_QUERY(
'bigquery-exploration-278323.us.bigquery-exploration',
'DESCRIBE users;'
) ORDER BY Field
) AS b
)
SELECT
IF(l.h IS NULL,"New on right","New on left") AS Change,
IF(l.h IS NULL,r.data,l.data).*
FROM
LeftData l
FULL OUTER JOIN RightData r
ON l.h = r.h
WHERE
l.h IS NULL OR
r.h IS NULL;
-- Initial data load, auto-discovers table schema
CREATE OR REPLACE TABLE `temp.users` AS
SELECT * FROM EXTERNAL_QUERY(
'bigquery-exploration-278323.us.bigquery-exploration',
'SELECT * FROM users;'
);
-- Verify users were created
SELECT * FROM `temp.users`;
-- Get max updated time, can be optimized by storing it somewhere
SELECT FORMAT_TIMESTAMP('%Y-%m-%d %H:%M:%S', MAX(created_at)) FROM `temp.users`;
-- Insert records for all records updated since the last update
INSERT INTO `temp.users`
SELECT * FROM EXTERNAL_QUERY(
'bigquery-exploration-278323.us.bigquery-exploration',
'SELECT * FROM users WHERE created_at > "2020-05-26 02:32:06"'
);
-- Create View if schema has changed
SELECT *
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY created_at DESC) rownum
FROM
`temp.users`
)
WHERE rownum = 1;
-- Select from view
SELECT * FROM `temp.users_view`;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment