Skip to content

Instantly share code, notes, and snippets.

@batemapf
Created March 30, 2017 16:43
Show Gist options
  • Save batemapf/191218bc3a05d3650648584a490bc07d to your computer and use it in GitHub Desktop.
Save batemapf/191218bc3a05d3650648584a490bc07d to your computer and use it in GitHub Desktop.
#! /usr/bin/env python
import requests
import os, json
import datetime as dt
class DataSet:
def __init__(self):
self.created = dt.datetime.now()
self.data = []
def length(self):
return len(self.data)
def keys(self):
return self.data[0].keys()
def print_item(self, index):
return print(json.dumps(self.data[index], indent=4))
def dump_json(self, name):
with open('{}_{}.json'.format(name, dt.datetime.utcnow()), 'w') \
as outfile:
return outfile.write(json.dumps(self.data, indent=4))
class GitHubDataSet(DataSet):
base_url = 'https://api.github.com'
def __init__(self, endpoint, params={}):
self.params = params
self.endpoint = endpoint
self.token = os.getenv('GITHUB_TOKEN')
self.headers = self.github_get().headers
self.data = self.get_data()
def github_get(self, diff_endpoint=None):
if diff_endpoint:
return requests.get(
'{}{}'.format(self.base_url, diff_endpoint),
headers={'Authorization': 'token {}'.format(self.token)},
params=self.params
)
else:
return requests.get(
'{}{}'.format(self.base_url, self.endpoint),
headers={'Authorization': 'token {}'.format(self.token)},
params=self.params
)
def get_data(self):
# Handle paginated data.
if len([ i for i in self.headers.keys() if i == 'Link']):
links = self.headers['Link'].split(',')
next_prev = ([ i.strip() for i in links[0].split(';') ])
last_first = ([ i.strip() for i in links[1].split(';') ])
page_range = int(
last_first[0]\
.split(self.base_url)[1]\
.split('?')[1]\
.split('=')[-1]\
.replace('>','')
)
output = []
for i in range(0, page_range):
output.append(self.github_get(diff_endpoint=\
'{}?page={}'.format(
next_prev[0].split(self.base_url)[1].split('?')[0], i)
).json())
return [ i for sublist in output for i in sublist ]
# Handle non-paginated data.
else:
return self.github_get().json()
def check_rate_limit():
return requests.get(
'https://api.github.com/rate_limit',
headers={'Authorization': 'token {}'.format(os.getenv('GITHUB_TOKEN'))},
).json()['resources']['core']['remaining']
# Where team is plain language team name, e.g. 18f.
def get_org_repos(team):
return GitHubDataSet('/orgs/{}/repos'.format(team))
# Where weeks is number of prior weeks of history sought and repos is
# a GitHubDataSet object with >0 repos in held in its data attribute.
def get_bulk_commit_history(weeks, repos):
end = dt.date.today() - dt.timedelta(days=(dt.date.today().weekday() + 1))
start = (end - dt.timedelta(days=(weeks * 7)))
return [ GitHubDataSet(
'/repos/18F/{}/commits'.format(r['name']),
params={'since':start, 'until': end}) \
for r in repos.data ], \
end, \
start
def get_18fs_commits(weeks):
print('{} calls remaining...'.format(check_rate_limit()))
commits, end, start = get_bulk_commit_history(weeks, get_org_repos('18f'))
to_file = DataSet()
to_file.data = [ i.data for i in commits ]
to_file.dump_json('commits')
print('History for {} repos written to file.'.format(to_file.length))
get_18fs_commits(52)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment