Skip to content

Instantly share code, notes, and snippets.

@geoffmc
Created May 25, 2020 18:00
Show Gist options
  • Save geoffmc/6d0309773c8c1a8d81aa538f5695b345 to your computer and use it in GitHub Desktop.
Save geoffmc/6d0309773c8c1a8d81aa538f5695b345 to your computer and use it in GitHub Desktop.
#!/bin/bash
if [ -z "${PROJECT}" -o -z "${DATASET}" ]; then
echo "Please set \$PROJECT and \$DATASET."
exit 1
fi
IFS='' read -d '' Q <<EOF
#standardSQL
/*
* Script: BQ Load Audit - INFORMATION_SCHEMA
* Description:
*
* NOTE: this runs against `region-us`.INFORMATION_SCHEMA.JOBS_BY_USER, you should use JOBS_BY_PROJET if you have access
*
* Creates a user friendly view for querying the
* BigQuery load jobs using INFORMATION_SCHEMA.
*/
/* Create a user friendly view. */
SELECT
creation_time,
project_id,
user_email,
job_id,
reservation_id,
start_time,
STRUCT( EXTRACT(MINUTE
FROM
creation_time) AS minute_of_day,
EXTRACT(HOUR
FROM
creation_time) AS hour_of_day,
EXTRACT(DAYOFWEEK
FROM
creation_time) - 1 AS day_of_week ) AS date,
end_time,
TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS runtime_ms,
TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) / 1000 AS runtime_seconds,
total_slot_ms,
total_slot_ms / TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS avg_slots,
destination_table,
error_result.reason AS error_result_reason,
CASE
WHEN error_result.reason IS NOT NULL THEN TRUE
ELSE
FALSE
END
AS is_error,
1 AS num_loads
FROM
`region-us`.INFORMATION_SCHEMA.JOBS_BY_USER
WHERE
job_type = 'LOAD'
AND state = 'DONE'
EOF
bq mk --project_id $PROJECT --view="${Q}" $DATASET.bq_load_info_schema
#!/bin/bash
if [ -z "${PROJECT}" -o -z "${DATASET}" ]; then
echo "Please set \$PROJECT and \$DATASET."
exit 1
fi
IFS='' read -d '' Q <<EOF
#standardSQL
/*
* Script: BQ Query Audit - INFORMATION_SCHEMA
* Description:
*
* NOTE: this runs against `region-us`.INFORMATION_SCHEMA.JOBS_BY_USER, you should use JOBS_BY_PROJET if you have access
*
* Creates a user friendly view for querying the
* BigQuery query jobs using INFORMATION_SCHEMA.
*/
/* Create a user friendly view. */
SELECT
creation_time,
project_id,
user_email,
job_id,
job_type,
statement_type,
start_time,
STRUCT( EXTRACT(MINUTE
FROM
creation_time) AS minute_of_day,
EXTRACT(HOUR
FROM
creation_time) AS hour_of_day,
EXTRACT(DAYOFWEEK
FROM
creation_time) - 1 AS day_of_week ) AS date,
end_time,
TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS runtime_ms,
TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) / 1000 AS runtime_seconds,
query,
state,
reservation_id,
total_bytes_processed,
total_bytes_processed / (1000 * 1000 * 1000 * 1000) AS total_bytes_terabytes,
(total_bytes_processed / (1000 * 1000 * 1000 * 1000)) * 5 AS estimated_cost_usd,
total_slot_ms,
total_slot_ms / TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS avg_slots,
error_result.reason AS error_result_reason,
CASE
WHEN error_result.reason IS NOT NULL THEN TRUE
ELSE
FALSE
END
AS is_error,
cache_hit,
referenced_tables,
ARRAY_LENGTH(referenced_tables) AS total_tables_processed,
CASE
WHEN REGEXP_CONTAINS(referenced_tables[safe_OFFSET(0)].table_id, r'^INFORMATION_SCHEMA.JOBS_BY_*') THEN TRUE
ELSE
FALSE
END
AS is_info_schema_query,
1 AS queries
FROM
`region-us`.INFORMATION_SCHEMA.JOBS_BY_USER
WHERE
job_type = 'QUERY'
AND state = 'DONE'
AND statement_type = 'SELECT'
EOF
bq mk --project_id $PROJECT --view="${Q}" $DATASET.bq_query_info_schema
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment