Skip to content

Instantly share code, notes, and snippets.

@yptheangel
Created August 4, 2021 10:04
Show Gist options
  • Save yptheangel/8b735d90dbf24ac30a1bccaa3fad4779 to your computer and use it in GitHub Desktop.
Save yptheangel/8b735d90dbf24ac30a1bccaa3fad4779 to your computer and use it in GitHub Desktop.
List out BigQuery tables metadata, sizes in GB, row_count, creation and last modified datetime and save into a csv using pandas dataframe
from google.cloud import bigquery
import os
import pandas as pd
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "YOURGCPSERVICEACCOUNTKEY.json"
GCP_PROJECT_ID = "YOURGCPPROJECT"
client = bigquery.Client(project=GCP_PROJECT_ID)
datasets = list(client.list_datasets())
project = client.project
if datasets:
print(f"Datasets in project {project}:")
for iter, dataset in enumerate(datasets):
print(f'Dataset: {dataset.dataset_id}')
query_job = client.query(
f"""
SELECT
project_id, dataset_id, table_id,
size_bytes/pow(1024,3) AS size_GB,
TIMESTAMP_MILLIS(creation_time) AS creation_time,
TIMESTAMP_MILLIS(last_modified_time) AS last_modified_time,
row_count
FROM `{GCP_PROJECT_ID}.{dataset.dataset_id}`.__TABLES__
"""
)
if iter == 0:
df = query_job.to_dataframe()
else:
df = df.append(query_job.to_dataframe())
df.to_csv("bqtables.csv",index=False)
else:
print(f"{project} does not contain any datasets.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment