Skip to content

Instantly share code, notes, and snippets.

@tswast
Forked from crwilcox/bigquery_github.py
Created September 12, 2018 21:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tswast/71cd4adb5b2308d67639fe6324357d31 to your computer and use it in GitHub Desktop.
Save tswast/71cd4adb5b2308d67639fe6324357d31 to your computer and use it in GitHub Desktop.
Scan GitHub using BigQuery
from google.cloud import bigquery
import json
GITHUB_USERNAME = 'crwilcox'
START_DATE = "2018-03-05"
END_DATE = "2018-08-31"
client = bigquery.client.Client()
query = f"""SELECT repository, type, event AS status, COUNT(*) AS count
FROM (
SELECT type, repo.name as repository, actor.login,
JSON_EXTRACT(payload, '$.action') AS event, created_at
FROM `githubarchive.day.20*`
WHERE actor.login = '{GITHUB_USERNAME}' AND
created_at BETWEEN TIMESTAMP('{START_DATE}') AND
TIMESTAMP('{END_DATE}') AND
_TABLE_SUFFIX BETWEEN '{START_DATE[2:].replace('-', '')}' AND
'{END_DATE[2:].replace('-', '')}'
)
GROUP BY repository, type, status ORDER BY repository, type, status;
"""
print(query)
query_job = client.query(query)
results = [i for i in query_job.result()]
# results = {}
# with open('query_result.json', 'r') as js:
# results = json.load(js)
# # print(results)
# Releases made
count = [int(row.count) for row in results
if row.type == 'ReleaseEvent']
print(f"{sum(count)} Releases across {len(count)} repos")
# PRs Made
count = [int(row.count) for row in results
if row.type == 'PullRequestEvent' and
row.status == "\"opened\""]
print(f"{sum(count)} PRs opened across {len(count)} repos")
# PR Comments Left
count = [int(row.count) for row in results
if row.type == 'PullRequestReviewCommentEvent']
print(f"{sum(count)} PR comments across {len(count)} repos")
# Issues Created
count = [int(row.count) for row in results
if row.type == 'IssuesEvent' and
row.status == "\"opened\""]
print(f"{sum(count)} issues opened across {len(count)} repos")
# Issues Closed
count = [int(row.count) for row in results
if row.type == 'IssuesEvent' and
row.status == "\"closed\""]
print(f"{sum(count)} issues closed across {len(count)} repos")
# Issue Comments
count = [int(row.count) for row in results
if row.type == 'IssueCommentEvent']
print(f"{sum(count)} issue comments across {len(count)} repos")
# Push Events
count = [int(row.count) for row in results
if row.type == 'PushEvent']
print(f"{sum(count)} pushes across {len(count)} repos")
#!/bin/bash
enddate=`date +%Y-%m-%d -d "yesterday"`
startdate=`date +%Y-%m-%d -d "7 days ago"`
ghusername='crwilcox'
startdate='2018-03-05'
echo "query GitHub for user $ghusername from $startdate to $enddate"
bq query "SELECT repository, type, event AS status, COUNT(*) AS count \
FROM ( \
SELECT type, repo.name as repository, actor.login, JSON_EXTRACT(payload, '\$.action') AS event, \
FROM (TABLE_DATE_RANGE([githubarchive:day.], TIMESTAMP('$startdate'), TIMESTAMP('$enddate') )) \
WHERE actor.login = '$ghusername' ) \
GROUP BY repository, type, status ORDER BY repository, type, status;"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment