Last active
December 10, 2020 21:07
-
-
Save mikesparr/3f3fef4028d24b79f9e590e8f37daf76 to your computer and use it in GitHub Desktop.
Google Cloud log sink to BigQuery exported to Cloud Storage and queried with Federated Query from GCS bucket
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# set vars | |
export PROJECT_ID=$(gcloud config get-value project) | |
export PROJECT_USER=$(gcloud config get-value core/account) # set current user | |
export PROJECT_NUMBER=$(gcloud projects describe $PROJECT_ID --format="value(projectNumber)") | |
export IDNS=${PROJECT_ID}.svc.id.goog # workflow identity domain | |
export REGION="us-central1" | |
export ZONE="us-central1-a" | |
export NETWORK_NAME="default" | |
export DATASET_NAME="logs_from_sink" | |
export TEST_TABLE_NAME="stderr" # stderr | stdout | kubelet | events | cloudaudit_googleapis_com | node_problem_detector | |
export BUCKET_NAME="gs://compressed-logs-demo2" | |
export SINK_NAME="logs-to-bq" | |
# enable apis | |
gcloud services enable compute.googleapis.com \ | |
storage.googleapis.com \ | |
bigquery.googleapis.com \ | |
logging.googleapis.com | |
# create BigQuery dataset (with partition expire each day) | |
bq mk -d --location=US \ | |
--default_partition_expiration 86400 \ | |
--description "Logs shipped from log sink." \ | |
$DATASET_NAME | |
# create GCS bucket | |
gsutil mb $BUCKET_NAME | |
# create log sink to Pub/Sub (*** Add --log-filter to manage cost ***) | |
gcloud logging sinks create $SINK_NAME \ | |
bigquery.googleapis.com/projects/${PROJECT_ID}/datasets/${DATASET_NAME} \ | |
--project $PROJECT_ID | |
# get sink writer identity SA | |
# Ref: https://cloud.google.com/logging/docs/export/configure_export_v2#dest-auth | |
export SINK_WRITER_IDENTITY=$(gcloud logging sinks describe $SINK_NAME --format="value(writerIdentity)") | |
gcloud projects add-iam-policy-binding $PROJECT_ID \ | |
--member=$SINK_WRITER_IDENTITY \ | |
--role=roles/bigquery.dataEditor | |
# export table data to GCS storage | |
bq extract \ | |
--compression GZIP \ | |
--destination_format "NEWLINE_DELIMITED_JSON" \ | |
${DATASET_NAME}.${TEST_TABLE_NAME} \ | |
$BUCKET_NAME/test-file.json | |
# confirm file is in bucket | |
gsutil ls $BUCKET_NAME | |
# query cloud storage in BigQuery using temp tables | |
# Ref: https://cloud.google.com/bigquery/external-data-cloud-storage#temporary-tables | |
bq query \ | |
--external_table_definition=test_logs::logName:STRING@NEWLINE_DELIMITED_JSON=$BUCKET_NAME/test-file.json \ | |
'SELECT | |
* | |
FROM | |
test_logs' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment