Skip to content

Instantly share code, notes, and snippets.

@PedroMartinSteenstrup
Created June 11, 2020 14:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PedroMartinSteenstrup/2f728973b6a378f0348374dba4be0add to your computer and use it in GitHub Desktop.
Save PedroMartinSteenstrup/2f728973b6a378f0348374dba4be0add to your computer and use it in GitHub Desktop.
USE ROLE AP_TOOLS;
-- CAVEAT Nr 1: Snowflake only gets the history_id from Looker passed as context, so we can link to a user efficiently.
-- -- -- -- -- -- This is then linked to workday's data
-- CAVEAT Nr 2: Looker's history_id can have several Snowflake queries linked to, who each have a cost associated to.
-- -- -- -- -- -- therefore there is a danger of double counting. History is augmented by Looker data.
-- CAVEAT Nr 3: Workday's data seems to not systematically have the cost center input, though it makes little sense
-- -- -- -- -- -- some departments might be under-represented until we figure out why it's missing
-- CAVEAT Nr 4: Snowflake credits seem to be off compared to the rest of the figures
-- -- -- -- -- -- might have to tweak the calculation
select *
from ANALYTICS_DB.REPORTS.report_looker_usage;
CREATE TABLE IF NOT EXISTS ANALYTICS_DB.REPORTS.report_looker_usage
(
SNOWFLAKE_QUERY_ID VARCHAR PRIMARY KEY,
START_TIME TIMESTAMP_LTZ,
EXECUTION_STATUS VARCHAR,
QUERY_TYPE VARCHAR,
EXECUTION_TIME VARCHAR,
COMPILATION_TIME NUMBER,
QUEUED_OVERLOAD_TIME NUMBER,
BYTES_SCANNED NUMBER,
ERROR_MESSAGE VARCHAR,
CREDITS_USED_CLOUD_SERVICES FLOAT,
LOOKER_QUERY_ID NUMBER,
HISTORY_ID NUMBER,
EMAIL VARCHAR,
USER_ID NUMBER,
SOURCE VARCHAR,
CONNECTION_NAME VARCHAR,
MODEL VARCHAR,
LOOKER_VIEW VARCHAR,
FIELDS VARCHAR,
DASHBOARD_ID VARCHAR,
RESULT_SOURCE VARCHAR,
STATUS VARCHAR,
HISTORY_RUNTIME DOUBLE,
QUERY_RUNTIME DOUBLE,
WORKER_TITLE VARCHAR,
COST_CENTER_LVL_1 VARCHAR,
COST_CENTER_LVL_2 VARCHAR,
COST_CENTER_CODE VARCHAR,
WORKER_LOCATION VARCHAR,
WORKER_NAME VARCHAR
);
-- Get all Looker queries from Snowflake
MERGE INTO ANALYTICS_DB.REPORTS.report_looker_usage target USING (
select JSON_EXTRACT_PATH_TEXT(looker_data, 'user_id')::int user_id,
JSON_EXTRACT_PATH_TEXT(looker_data, 'history_id')::int history_id,
QUERY_ID AS SNOWFLAKE_QUERY_ID,
QUERY_TYPE,
START_TIME,
EXECUTION_STATUS,
EXECUTION_TIME,
COMPILATION_TIME,
QUEUED_OVERLOAD_TIME,
BYTES_SCANNED,
ERROR_MESSAGE,
CREDITS_USED_CLOUD_SERVICES
from (
select parse_json(split_part(qh.QUERY_TEXT, '-- Looker Query Context ', 2)) looker_data,
qh.QUERY_ID,
qh.START_TIME,
qh.EXECUTION_STATUS,
qh.EXECUTION_TIME,
qh.QUEUED_OVERLOAD_TIME,
qh.BYTES_SCANNED,
qh.ERROR_MESSAGE,
qh.COMPILATION_TIME,
qh.CREDITS_USED_CLOUD_SERVICES,
qh.QUERY_TYPE
from SNOWFLAKE.ACCOUNT_USAGE.QUERY_HISTORY qh
where qh.USER_NAME IN ('LOOKER_OBFUSCATED', 'LOOKER_UNOBFUSCATED', 'LOOKER_MONITOR')
and qh.QUERY_TEXT ilike '%Looker Query Context%'
AND START_TIME < date_trunc(day, current_timestamp)
AND START_TIME > DATEADD(day, -3, date_trunc(day, current_timestamp))
) data
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
) source ON source.history_id = target.HISTORY_ID
AND source.user_id = target.USER_ID
AND source.SNOWFLAKE_QUERY_ID = target.SNOWFLAKE_QUERY_ID
WHEN NOT MATCHED THEN INSERT
(user_id, history_id, SNOWFLAKE_QUERY_ID, QUERY_TYPE, START_TIME,
EXECUTION_STATUS, EXECUTION_TIME, COMPILATION_TIME, QUEUED_OVERLOAD_TIME,
BYTES_SCANNED, ERROR_MESSAGE, CREDITS_USED_CLOUD_SERVICES)
VALUES (source.user_id, source.history_id, source.SNOWFLAKE_QUERY_ID, source.QUERY_TYPE, source.START_TIME,
source.EXECUTION_STATUS, source.EXECUTION_TIME, source.COMPILATION_TIME, source.QUEUED_OVERLOAD_TIME,
source.BYTES_SCANNED, source.ERROR_MESSAGE, source.CREDITS_USED_CLOUD_SERVICES);
MERGE INTO ANALYTICS_DB.REPORTS.report_looker_usage target USING (
select q.ID as LOOKER_QUERY_ID,
h.id as history_id,
u.email,
u.id as user_id,
h.source,
h.CONNECTION_NAME,
q.MODEL,
q.VIEW AS looker_view,
q.fields,
h.DASHBOARD_ID,
h.RESULT_SOURCE,
h.STATUS,
h.RUNTIME history_runtime,
q.RUNTIME query_runtime
from ANALYTICS_DB.looker.HISTORY h
LEFT JOIN LOOKER.QUERY q ON q.id = h.QUERY_ID
LEFT JOIN LOOKER.USER u ON u.ID = h.USER_ID
where 1 = 1
AND h.CREATED_AT < date_trunc(day, current_timestamp)
AND h.CREATED_AT > DATEADD(day, -4, date_trunc(day, current_timestamp))
GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14
) source ON source.history_id = target.HISTORY_ID
AND source.user_id = target.USER_ID
WHEN MATCHED THEN UPDATE SET
LOOKER_QUERY_ID = source.LOOKER_QUERY_ID,
history_id = source.history_id,
email = source.email,
user_id = source.user_id,
source = source.source,
CONNECTION_NAME = source.CONNECTION_NAME,
MODEL = source.MODEL,
looker_view = source.looker_view,
fields = source.fields,
DASHBOARD_ID = source.DASHBOARD_ID,
RESULT_SOURCE = source.RESULT_SOURCE,
STATUS = source.STATUS,
history_runtime = source.history_runtime,
query_runtime = source.query_runtime
;
MERGE INTO ANALYTICS_DB.REPORTS.report_looker_usage target USING (
SELECT *
FROM (
select people.BUSINESS_TITLE as WORKER_TITLE,
regexp_replace(
regexp_replace(people.HIERARCHY_LEVEL_03,
regexp_substr(people.HIERARCHY_LEVEL_03, '\\(.*\\)'), ''), ' \\(\\)',
'') as COST_CENTER_LVL_1,
regexp_replace(people.COST_CENTER_NAME, '\\(DO NOT USE\\)', '') as COST_CENTER_LVL_2,
people.COST_CENTER_ID as COST_CENTER_CODE,
people.PRIMARY_WORK_EMAIL as WORKER_EMAIL,
people.LOCATION as WORKER_LOCATION,
people.WORKER as WORKER_NAME,
row_number()
over (partition by PRIMARY_WORK_EMAIL order by REPORT_EFFECTIVE_DATE_AND_TIME desc) rownr
from ANALYTICS_DB.WORKDAY.HEADCOUNT people
group by 1, 2, 3, 4, 5, 6, 7, REPORT_EFFECTIVE_DATE_AND_TIME)
WHERE rownr = 1
) source ON lower(source.WORKER_EMAIL) = lower(target.EMAIL) AND target.WORKER_NAME is null
WHEN MATCHED THEN UPDATE SET
WORKER_TITLE = source.WORKER_TITLE,
COST_CENTER_LVL_1 = source.COST_CENTER_LVL_1,
COST_CENTER_LVL_2 = source.COST_CENTER_LVL_2,
COST_CENTER_CODE = source.COST_CENTER_CODE,
WORKER_LOCATION = source.WORKER_LOCATION,
WORKER_NAME = source.WORKER_NAME;
GRANT SELECT ON table reports.report_looker_usage TO ROLE LOOKER_ROLE_MONITOR;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment