-
-
Save chadwhitacre/d4cb21261e41a2da1dc3bc7f86e6879c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
"""Usage: harvest_email_from_github.py <username> | |
E.g.: | |
$ ./harvest_email_from_github.py chadwhitacre | |
7 chad@zetaweb.com Chad Whitacre | |
$ | |
Set GITHUB_TOKEN in the environment to increase your rate limit. Get a token here: | |
https://github.com/settings/tokens/new | |
Or use as a library: | |
>>> from harvest_email_from_github import harvest_one | |
>>> harvest_one('chadwhitacre') | |
({'chad@zetaweb.com': [7, {'Chad Whitacre'}]}, 1, 16) | |
>>> | |
Requires the Requests library: | |
https://pypi.org/project/requests/ | |
""" | |
import os | |
import sys | |
from collections import defaultdict | |
import requests | |
log = lambda *a: print(*a, file=sys.stderr, flush=True) | |
# Configure | |
# ========= | |
TOKEN = os.environ.get('GITHUB_TOKEN', '') | |
def harvest_one(username, token=TOKEN): | |
"""Given a username on GitHub, return a 3-tuple: | |
- a dict mapping email addresses to [count, {author name, ...}] 2-lists | |
- a column width for displaying counts | |
- a column width for displaying email addresses | |
""" | |
# Hit GitHub API | |
# ============== | |
response = requests.get( | |
f'https://api.github.com/users/{username}/events/public', | |
headers={'Authorization': f'token {token}'} if token else None | |
) | |
if response.status_code == 403: | |
for k, v in response.headers.items(): | |
if k.startswith('X-RateLimit'): | |
log(f'{k:<16}: {v}') | |
if not TOKEN: | |
log() | |
log('Set the GITHUB_TOKEN environment variable to increase your rate limit.') | |
log('Get a token here: https://github.com/settings/tokens/new.') | |
exit() | |
elif response.status_code != 200: | |
exit(f'request failed with {response.status_code}') | |
# Parse results | |
# ============= | |
w1, w2, counts = 0, 0, defaultdict(lambda: [0, set()]) | |
for event in response.json(): | |
for commit in event['payload'].get('commits', []): | |
author = commit['author'] | |
name, email = author['name'], author['email'].lower() | |
counts[email][0] += 1 | |
counts[email][1].add(name) | |
w1 = max(len(str(counts[email][0])), w1) | |
w2 = max(len(email), w2) | |
return dict(counts), w1, w2 | |
# Display | |
# ======= | |
if __name__ == '__main__': | |
username = sys.argv[1] if len(sys.argv) > 1 else exit(f'usage: {sys.argv[0]} <username>') | |
counts, w1, w2 = harvest_one(username) | |
for email, (n, names) in sorted(counts.items(), key=lambda x: x[1], reverse=True): | |
print(f" {n:>{w1}} {email:<{w2}} {'; '.join(names)}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment