Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Count all the lines I've ever changed on GitHub
#!/usr/local/bin/python3
"""
Count all the lines I've ever changed on GitHub
Setup:
pip install requests tqdm GitPython
Usage:
./commits.py
"""
import fnmatch
import functools
import git
import json
import os
import requests
import subprocess
import tqdm
GITHUB_BASE_URL = 'https://api.github.com'
AUTHOR = 'ryanwilsonperkin'
IGNORED_REPOS = ('thalmic-alpha/*', )
@functools.lru_cache()
def get_token():
"""Load the GitHub token"""
return os.environ.get('GITHUB_TOKEN')
def mkdir(d):
"""Make directory if it doesn't exist"""
if not os.path.exists(d):
os.mkdir(d)
def github_fetch(api, **kwargs):
"""Make a GitHub API call"""
return requests.get(
f'{GITHUB_BASE_URL}{api}',
params={'access_token': get_token(), **kwargs},
)
def github_get(api, **kwargs):
"""Fetch a single value from GitHub"""
return github_fetch(api, **kwargs).json()
def github_list(api, **kwargs):
"""Fetch a list of values from GitHub, handling pagination"""
page, has_next = 1, True
data = []
while has_next:
response = github_fetch(api, page=page, **kwargs)
data.extend(response.json())
if 'next' in response.links:
page += 1
else:
has_next = False
return data
def get_my_repos():
"""Fetch all repos I have access to from GitHub or from local cache"""
if os.path.exists('/tmp/repos.json'):
with open('/tmp/repos.json', 'r') as f:
return json.load(f)
else:
repos = github_list('/user/repos')
with open('/tmp/repos.json', 'w') as f:
json.dump(repos, f)
return repos
def get_my_commits(repo):
"""Fetch all my commits from GitHub or from local cache"""
repo_cache = '/tmp/commits/{}.json'.format(repo.replace('/', '__'))
mkdir('/tmp/commits')
if os.path.exists(repo_cache):
with open(repo_cache, 'r') as f:
return json.load(f)
else:
commits = github_list(f'/repos/{repo}/commits', author=AUTHOR)
with open(repo_cache, 'w') as f:
json.dump(commits, f)
return commits
def get_commit_details(repo, commits):
"""Load commit details from locally cloned project"""
repo_cache = '/tmp/commit_details/{}.json'.format(repo.replace('/', '__'))
mkdir('/tmp/commit_details')
if os.path.exists(repo_cache):
with open(repo_cache, 'r') as f:
return json.load(f)
else:
repo = git.Repo(f'/tmp/repos/{repo}')
commit_details = {
commit['sha']: repo.commit(commit['sha']).stats.total
for commit in commits
}
with open(repo_cache, 'w') as f:
json.dump(commit_details, f)
return commit_details
def clone_repo(repo):
"""Clone a repo from GitHub"""
owner = repo.split('/')[0]
mkdir('/tmp/repos')
mkdir(f'/tmp/repos/{owner}')
if not os.path.exists(f'/tmp/repos/{repo}'):
subprocess.check_output(
['git', 'clone', f'git@github.com:{repo}.git', f'/tmp/repos/{repo}'],
stderr=subprocess.PIPE,
)
def filter_repos(repos):
"""Filter out any repos in the IGNORED_REPOS list"""
for repo in repos:
if not any(fnmatch.fnmatch(repo['full_name'], pattern) for pattern in IGNORED_REPOS):
yield repo
if __name__ == "__main__":
commits = {}
# Load a list of repos to check
repos = list(filter_repos(get_my_repos()))
# Setup a nice progress bar for monitoring status
t = tqdm.tqdm(repos)
for repo in t:
repo_name = repo['full_name']
t.set_description(f'{repo_name}: Fetching commits')
# Fetch the commits I've made to this repo
repo_commits = get_my_commits(repo_name)
# Skip if I haven't commited to this repo
if not repo_commits:
continue
# Clone the repo for commit analysis
t.set_description(f'{repo_name}: Cloning repo')
clone_repo(repo_name)
# Load stats about all the commits I made to this repo
t.set_description(f'{repo_name}: Loading stats')
commit_details = get_commit_details(repo_name, repo_commits)
# Keep track in one giant mapping
commits.update(commit_details)
# Summarize the total number of lines I've ever modified
lines_modified = sum(commit['lines'] for commit in commits.values())
print(f'I have changed {lines_modified:,} lines in my GitHub lifetime.')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment