Skip to content

Instantly share code, notes, and snippets.

@justinmklam
Last active June 2, 2022 19:51
Show Gist options
  • Save justinmklam/47f1fe77bd0339003a34e005d77352cb to your computer and use it in GitHub Desktop.
Save justinmklam/47f1fe77bd0339003a34e005d77352cb to your computer and use it in GitHub Desktop.
Saves a csv file of recent bigquery jobs, ordered by bytes billed.
import csv
from datetime import datetime
from argparse import ArgumentParser
from google.cloud import bigquery
parser = ArgumentParser()
parser.add_argument("project_id")
parser.add_argument("--output_filename", default=None)
parser.add_argument("--num_jobs", default=10000, type=int)
args = parser.parse_args()
if not args.output_filename:
output_filename = f"bigquery-jobs-{args.project_id}-{datetime.now().isoformat()}.csv"
else:
output_filename = args.output_filename
client = bigquery.Client(project=args.project_id)
jobs = []
for job in client.list_jobs(max_results=args.num_jobs, all_users=True, state_filter="DONE"):
if hasattr(job, 'total_bytes_billed') and job.total_bytes_billed is not None and job.total_bytes_billed > 0:
jobs.append({
"created": job.created,
"job_id": job.job_id,
"user_email": job.user_email,
"total_gigabytes_billed": job.total_bytes_billed / float(1 << 30),
"dml_stats": job.dml_stats,
"query": job.query,
})
# sort the job_sizes by decreasing size
sorted_job_sizes = sorted(jobs, key=lambda x: x["total_gigabytes_billed"], reverse=True)
with open(output_filename, "w") as f:
writer = csv.DictWriter(f, fieldnames=sorted_job_sizes[0].keys())
writer.writeheader()
writer.writerows(sorted_job_sizes)
print(f"Saved to {output_filename}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment