Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Gets a list of source/target tables for any query (only available if bundle 2021_10 is enabled)
WITH table_lineage_history AS (
SELECT r.value:"objectName"::varchar AS upstream_table_name
, r.value:"objectDomain"::varchar AS upstream_table_domain
, r.value:"columns" AS upstream_table_columns
, w.value:"objectName"::varchar AS downstream_table_name
, w.value:"objectDomain"::varchar AS downstream_table_domain
, w.value:"columns" AS downstream_table_columns
, t.query_start_time AS query_start_time
, t.QUERY_ID AS query_id
FROM SNOWFLAKE.ACCOUNT_USAGE.ACCESS_HISTORY t
, LATERAL FLATTEN(INPUT => t.BASE_OBJECTS_ACCESSED) r
, LATERAL FLATTEN(INPUT => t.OBJECTS_MODIFIED) w
WHERE r.value:"objectId" IS NOT NULL
AND w.value:"objectId" IS NOT NULL
AND t.QUERY_START_TIME >= (CURRENT_DATE - 1)::timestamp -- filtering query_history a little
)
SELECT QH.SESSION_ID
, QH.QUERY_ID
, QH.QUERY_TYPE
, QH.QUERY_TEXT
, lin.upstream_table_name
, lin.downstream_table_name
, lin.upstream_table_columns
, lin.downstream_table_columns
FROM table_lineage_history lin
INNER JOIN SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY QH
ON QH.QUERY_ID = lin.query_id
AND qh.START_TIME >= (CURRENT_DATE - 1)::timestamp -- filtering query_history a little
WHERE lin.downstream_table_domain = 'Table' -- only interested in tables as target
AND QH.QUERY_TYPE != 'DELETE' -- DELETES are not really lineage
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment