Skip to content

Instantly share code, notes, and snippets.

@sveinungkb
Last active October 22, 2015 17:34
Show Gist options
  • Save sveinungkb/20c0677433fce43df11c to your computer and use it in GitHub Desktop.
Save sveinungkb/20c0677433fce43df11c to your computer and use it in GitHub Desktop.
Simple script that will use Github's API to read a number of repo's commit history into .csv so it can be processed in other tools (Excel, R, Matlab)
import requests
import re
import os
import datetime
OAUTH_TOKEN = 'YOUR-TOKEN
ORGANIZATION = "org"
REPOS = ["repo1", "repo2"]
# Uncomment to get all repos for org
# REPOS = []
HISTORY_WINDOW = datetime.timedelta(days=30)
import requests
import re
import os
import datetime
import time
for file in os.listdir('.'):
if file.endswith('.csv'):
print "Cleaned up file %s" % file
os.remove(file)
commitsProcessed = 0
def processCommits(repo, commits):
global commitsProcessed
target = open(repo + '.csv', 'a')
if os.stat(repo + '.csv').st_size == 0:
target.write("sha,date,user,avatar\n")
for commit in commits:
#print "Processing: %s" % commit['sha']
if not commit['committer']:
#print "Skipping: %s" % commit
continue
dateString = commit['commit']['committer']['date']
date = datetime.datetime.strptime(dateString, "%Y-%m-%dT%H:%M:%SZ")
if date < datetime.datetime.now() - HISTORY_WINDOW:
print "Reached the end for %s at: %s (discarded)" % (repo, date)
target.close()
return True
else:
commitsProcessed += 1
line = "%s,%s,%s,%s" % (commit['sha'], commit['commit']['committer']['date'], commit['committer']['login'], commit['committer']['avatar_url'])
target.write(line)
print "Adding commit: %s" % line
target.write("\n")
target.close()
return False
def getCommitsPage(repo, url):
print "Get commit history at %s" % url
headers = {"Authorization": "token " + OAUTH_TOKEN}
response = requests.get(url, headers=headers)
commits = response.json()
finished = processCommits(repo, commits)
if 'Link' not in response.headers:
return
next = response.headers['Link']
print "API calls remaining: %s/%s" % (response.headers['X-RateLimit-Remaining'], response.headers['X-RateLimit-Limit'])
if not finished and next and 'rel="next"' in next:
match = re.compile('<(.*)>').search(next);
if match:
getCommitsPage(repo, match.group(1))
def getHistory(repo):
url = 'https://api.github.com/repos/' + ORGANIZATION + '/' + repo + '/commits'
getCommitsPage(repo, url)
def getReposForUrl(url, repos):
headers = {"Authorization": "token " + OAUTH_TOKEN}
response = requests.get(url, headers=headers)
for repo in response.json():
repos.append(repo['name'])
print "Added repo: %d %s" % (len(repos), repo['name'])
next = response.headers['Link']
if next and 'rel="next"' in next:
match = re.compile('<(.*)>').search(next);
if match:
getReposForUrl(match.group(1), repos)
def getRepos(org):
print "Getting all repos for %s" % org
url = 'https://api.github.com/orgs/' + org + '/repos'
repos = []
getReposForUrl(url, repos)
return repos
start = time.time()
if len(REPOS) == 0:
repos = getRepos(ORGANIZATION)
else:
repos = REPOS
for repo in repos:
getHistory(repo)
print "Processed %s commits in %d seconds" % (commitsProcessed, time.time()-start)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment