Skip to content

Instantly share code, notes, and snippets.

@chadwhitacre
Last active January 2, 2019 20:41
Show Gist options
  • Save chadwhitacre/d4cb21261e41a2da1dc3bc7f86e6879c to your computer and use it in GitHub Desktop.
Save chadwhitacre/d4cb21261e41a2da1dc3bc7f86e6879c to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""Usage: harvest_email_from_github.py <username>
E.g.:
$ ./harvest_email_from_github.py chadwhitacre
7 chad@zetaweb.com Chad Whitacre
$
Set GITHUB_TOKEN in the environment to increase your rate limit. Get a token here:
https://github.com/settings/tokens/new
Or use as a library:
>>> from harvest_email_from_github import harvest_one
>>> harvest_one('chadwhitacre')
({'chad@zetaweb.com': [7, {'Chad Whitacre'}]}, 1, 16)
>>>
Requires the Requests library:
https://pypi.org/project/requests/
"""
import os
import sys
from collections import defaultdict
import requests
log = lambda *a: print(*a, file=sys.stderr, flush=True)
# Configure
# =========
TOKEN = os.environ.get('GITHUB_TOKEN', '')
def harvest_one(username, token=TOKEN):
"""Given a username on GitHub, return a 3-tuple:
- a dict mapping email addresses to [count, {author name, ...}] 2-lists
- a column width for displaying counts
- a column width for displaying email addresses
"""
# Hit GitHub API
# ==============
response = requests.get(
f'https://api.github.com/users/{username}/events/public',
headers={'Authorization': f'token {token}'} if token else None
)
if response.status_code == 403:
for k, v in response.headers.items():
if k.startswith('X-RateLimit'):
log(f'{k:<16}: {v}')
if not TOKEN:
log()
log('Set the GITHUB_TOKEN environment variable to increase your rate limit.')
log('Get a token here: https://github.com/settings/tokens/new.')
exit()
elif response.status_code != 200:
exit(f'request failed with {response.status_code}')
# Parse results
# =============
w1, w2, counts = 0, 0, defaultdict(lambda: [0, set()])
for event in response.json():
for commit in event['payload'].get('commits', []):
author = commit['author']
name, email = author['name'], author['email'].lower()
counts[email][0] += 1
counts[email][1].add(name)
w1 = max(len(str(counts[email][0])), w1)
w2 = max(len(email), w2)
return dict(counts), w1, w2
# Display
# =======
if __name__ == '__main__':
username = sys.argv[1] if len(sys.argv) > 1 else exit(f'usage: {sys.argv[0]} <username>')
counts, w1, w2 = harvest_one(username)
for email, (n, names) in sorted(counts.items(), key=lambda x: x[1], reverse=True):
print(f" {n:>{w1}} {email:<{w2}} {'; '.join(names)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment