Count all the lines I've ever changed on GitHub
#!/usr/local/bin/python3 | |
""" | |
Count all the lines I've ever changed on GitHub | |
Setup: | |
pip install requests tqdm GitPython | |
Usage: | |
./commits.py | |
""" | |
import fnmatch | |
import functools | |
import git | |
import json | |
import os | |
import requests | |
import subprocess | |
import tqdm | |
GITHUB_BASE_URL = 'https://api.github.com' | |
AUTHOR = 'ryanwilsonperkin' | |
IGNORED_REPOS = ('thalmic-alpha/*', ) | |
@functools.lru_cache() | |
def get_token(): | |
"""Load the GitHub token""" | |
return os.environ.get('GITHUB_TOKEN') | |
def mkdir(d): | |
"""Make directory if it doesn't exist""" | |
if not os.path.exists(d): | |
os.mkdir(d) | |
def github_fetch(api, **kwargs): | |
"""Make a GitHub API call""" | |
return requests.get( | |
f'{GITHUB_BASE_URL}{api}', | |
params={'access_token': get_token(), **kwargs}, | |
) | |
def github_get(api, **kwargs): | |
"""Fetch a single value from GitHub""" | |
return github_fetch(api, **kwargs).json() | |
def github_list(api, **kwargs): | |
"""Fetch a list of values from GitHub, handling pagination""" | |
page, has_next = 1, True | |
data = [] | |
while has_next: | |
response = github_fetch(api, page=page, **kwargs) | |
data.extend(response.json()) | |
if 'next' in response.links: | |
page += 1 | |
else: | |
has_next = False | |
return data | |
def get_my_repos(): | |
"""Fetch all repos I have access to from GitHub or from local cache""" | |
if os.path.exists('/tmp/repos.json'): | |
with open('/tmp/repos.json', 'r') as f: | |
return json.load(f) | |
else: | |
repos = github_list('/user/repos') | |
with open('/tmp/repos.json', 'w') as f: | |
json.dump(repos, f) | |
return repos | |
def get_my_commits(repo): | |
"""Fetch all my commits from GitHub or from local cache""" | |
repo_cache = '/tmp/commits/{}.json'.format(repo.replace('/', '__')) | |
mkdir('/tmp/commits') | |
if os.path.exists(repo_cache): | |
with open(repo_cache, 'r') as f: | |
return json.load(f) | |
else: | |
commits = github_list(f'/repos/{repo}/commits', author=AUTHOR) | |
with open(repo_cache, 'w') as f: | |
json.dump(commits, f) | |
return commits | |
def get_commit_details(repo, commits): | |
"""Load commit details from locally cloned project""" | |
repo_cache = '/tmp/commit_details/{}.json'.format(repo.replace('/', '__')) | |
mkdir('/tmp/commit_details') | |
if os.path.exists(repo_cache): | |
with open(repo_cache, 'r') as f: | |
return json.load(f) | |
else: | |
repo = git.Repo(f'/tmp/repos/{repo}') | |
commit_details = { | |
commit['sha']: repo.commit(commit['sha']).stats.total | |
for commit in commits | |
} | |
with open(repo_cache, 'w') as f: | |
json.dump(commit_details, f) | |
return commit_details | |
def clone_repo(repo): | |
"""Clone a repo from GitHub""" | |
owner = repo.split('/')[0] | |
mkdir('/tmp/repos') | |
mkdir(f'/tmp/repos/{owner}') | |
if not os.path.exists(f'/tmp/repos/{repo}'): | |
subprocess.check_output( | |
['git', 'clone', f'git@github.com:{repo}.git', f'/tmp/repos/{repo}'], | |
stderr=subprocess.PIPE, | |
) | |
def filter_repos(repos): | |
"""Filter out any repos in the IGNORED_REPOS list""" | |
for repo in repos: | |
if not any(fnmatch.fnmatch(repo['full_name'], pattern) for pattern in IGNORED_REPOS): | |
yield repo | |
if __name__ == "__main__": | |
commits = {} | |
# Load a list of repos to check | |
repos = list(filter_repos(get_my_repos())) | |
# Setup a nice progress bar for monitoring status | |
t = tqdm.tqdm(repos) | |
for repo in t: | |
repo_name = repo['full_name'] | |
t.set_description(f'{repo_name}: Fetching commits') | |
# Fetch the commits I've made to this repo | |
repo_commits = get_my_commits(repo_name) | |
# Skip if I haven't commited to this repo | |
if not repo_commits: | |
continue | |
# Clone the repo for commit analysis | |
t.set_description(f'{repo_name}: Cloning repo') | |
clone_repo(repo_name) | |
# Load stats about all the commits I made to this repo | |
t.set_description(f'{repo_name}: Loading stats') | |
commit_details = get_commit_details(repo_name, repo_commits) | |
# Keep track in one giant mapping | |
commits.update(commit_details) | |
# Summarize the total number of lines I've ever modified | |
lines_modified = sum(commit['lines'] for commit in commits.values()) | |
print(f'I have changed {lines_modified:,} lines in my GitHub lifetime.') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment