-
-
Save non/bc0cac3053b4cb532b09 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# | |
# by Erik Osheim | |
# | |
# Reads README.md, and writes a README.md.new. If the format of | |
# README.md changes, this script may need modifications. | |
# | |
# Currently it rewrites each section, doing the following: | |
# 1. alphabetizing | |
# 2. querying GitHub for watchers, stars, days since active | |
# 3. formatting the link title to show this info | |
# 4. bolding projects with lots of stars | |
# | |
# Once README.md has the stars/days info in the links, the | |
# repo_regex will need slight modification. | |
# | |
# Curently, the lack of OAuth2 + GH developer keys means you exceed | |
# GitHub's hourly limit before you even finish. I have no idea how to | |
# set up OAuth2 so I'm going to post this as a Gist and see if someone | |
# else has an idea. | |
import datetime | |
import json | |
import random | |
import re | |
import urllib2 | |
empty_regex = re.compile(r"^ *\n$") | |
section_regex = re.compile(r"^## (.+)\n$") | |
repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$") | |
end_regex = re.compile(r"^# .+\n$") | |
github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$") | |
inf = open('README.md', 'r') | |
lines = list(inf) | |
inf.close() | |
outf = open('README.md.new', 'w') | |
# use fake to avoid hitting github API | |
# useful when over the rate limit (i.e. always) | |
fake = True | |
# TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit. | |
def query(owner, name): | |
if fake: | |
return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300)) | |
else: | |
try: | |
u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name)) | |
j = json.load(u) | |
t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ") | |
dt = datetime.datetime.now() - t | |
print "%s/%s: ok" % (owner, name) | |
return (j['watchers_count'], j['stargazers_count'], dt.days) | |
except urllib2.HTTPError, e: | |
print "%s/%s: ERROR" % (owner, name) | |
return None | |
def flush_section(outf, section, sdesc, repos): | |
outf.write(section) | |
outf.write('\n') | |
if sdesc: | |
outf.write(sdesc) | |
outf.write('\n') | |
repos.sort(key=lambda t: t[0].lower()) | |
for name, link, rdesc in repos: | |
m = github_regex.match(link) | |
if m: | |
res = query(m.group(1), m.group(2)) | |
if res is not None: | |
(watchers, stars, days) = res | |
title = '%s ★ %d ⧗ %d' % (name, stars, int(days)) | |
if stars > 500: | |
btitle = '*' + title + '*' | |
else: | |
btitle = title | |
outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc)) | |
continue | |
outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc)) | |
outf.write('\n') | |
started = False | |
finished = False | |
section = None | |
sdesc = None | |
repos = [] | |
for line in lines: | |
if finished: | |
outf.write(line) | |
elif started: | |
if end_regex.match(line): | |
flush_section(outf, section, sdesc, repos) | |
outf.write(line) | |
finished = True | |
elif empty_regex.match(line): | |
continue | |
elif section_regex.match(line): | |
flush_section(outf, section, sdesc, repos) | |
section = line | |
sdesc = None | |
repos = [] | |
else: | |
m = repo_regex.match(line) | |
if m: | |
name, link, rdesc = m.groups() | |
repos.append((name, link, rdesc)) | |
elif sdesc is None: | |
sdesc = line | |
else: | |
raise Exception("cannot parse %r" % line) | |
else: | |
if section_regex.match(line): | |
section = line | |
started = True | |
else: | |
outf.write(line) |
@bzz Aha! That's a great idea, thanks! I'll try that out and see how it goes.
@bzz seems pretty simple! Good tip.
Does this mean you'd have anyone wanting to contribute have to set up auth tokens? Maybe this is something better handled by a bot somewhere? Dunno, just tossing out thoughts since I'd like to lower the barrier to contribution as much as possible.
Perhaps I am overthinking it and this could be run by someone (like a collaborator) once in awhile.
(I am under the assumption that this is run per-PR by a potential contributor - though it's late for me and this is making less sense now...maybe run just once in awhile locally)
@longcao -- i think this is maybe something that we would run ourselves periodically to keep things up-to-date.
(although i could imagine a mode where it only runs for "empty" entries, and that could possibly run without authentication.)
@bzz Thanks again for your help! I felt really blocked, although getting basic auth working was super easy.
@longcao I have a PR for this, see: lauris/awesome-scala#139
It might be simpler to skip full OAuth flow here and just implement simple 'basic auth' support with OAuth token through CLI args, as in cUrl example from the link below, so that user takes care of creating a personal token manually and then just pass it on
https://developer.github.com/v3/auth/#via-oauth-tokens