non/updater.py

## updater.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# by Erik Osheim
#
# Reads README.md, and writes a README.md.new. If the format of
# README.md changes, this script may need modifications.
#
# Currently it rewrites each section, doing the following:
#  1. alphabetizing
#  2. querying GitHub for watchers, stars, days since active
#  3. formatting the link title to show this info
#  4. bolding projects with lots of stars
#
# Once README.md has the stars/days info in the links, the
# repo_regex will need slight modification.
#
# Curently, the lack of OAuth2 + GH developer keys means you exceed
# GitHub's hourly limit before you even finish. I have no idea how to
# set up OAuth2 so I'm going to post this as a Gist and see if someone
# else has an idea.

import datetime
import json
import random
import re
import urllib2

empty_regex = re.compile(r"^ *\n$")
section_regex = re.compile(r"^## (.+)\n$")
repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$")
end_regex = re.compile(r"^# .+\n$")
github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$")

inf = open('README.md', 'r')
lines = list(inf)
inf.close()

outf = open('README.md.new', 'w')

# use fake to avoid hitting github API
# useful when over the rate limit (i.e. always)
fake = True

# TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit.
def query(owner, name):
    if fake:
        return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300))
    else:
        try:
            u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name))
            j = json.load(u)
            t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
            dt = datetime.datetime.now() - t
            print "%s/%s: ok" % (owner, name)
            return (j['watchers_count'], j['stargazers_count'], dt.days)
        except urllib2.HTTPError, e:
            print "%s/%s: ERROR" % (owner, name)
            return None

def flush_section(outf, section, sdesc, repos):
    outf.write(section)
    outf.write('\n')
    if sdesc:
        outf.write(sdesc)
        outf.write('\n')
    repos.sort(key=lambda t: t[0].lower())
    for name, link, rdesc in repos:
        m = github_regex.match(link)
        if m:
            res = query(m.group(1), m.group(2))
            if res is not None:
                (watchers, stars, days) = res
                title = '%s ★ %d ⧗ %d' % (name, stars, int(days))
                if stars > 500:
                    btitle = '*' + title + '*'
                else:
                    btitle = title
                outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc))
                continue

        outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc))
    outf.write('\n')

started = False
finished = False
section = None
sdesc = None
repos = []
for line in lines:
    if finished:
        outf.write(line)
    elif started:
        if end_regex.match(line):
            flush_section(outf, section, sdesc, repos)
            outf.write(line)
            finished = True
        elif empty_regex.match(line):
            continue
        elif section_regex.match(line):
            flush_section(outf, section, sdesc, repos)
            section = line
            sdesc = None
            repos = []
        else:
            m = repo_regex.match(line)
            if m:
                name, link, rdesc = m.groups()
                repos.append((name, link, rdesc))
            elif sdesc is None:
                sdesc = line
            else:
                raise Exception("cannot parse %r" % line)
    else:
        if section_regex.match(line):
            section = line
            started = True
        else:
            outf.write(line)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	#
	# by Erik Osheim
	#
	# Reads README.md, and writes a README.md.new. If the format of
	# README.md changes, this script may need modifications.
	#
	# Currently it rewrites each section, doing the following:
	# 1. alphabetizing
	# 2. querying GitHub for watchers, stars, days since active
	# 3. formatting the link title to show this info
	# 4. bolding projects with lots of stars
	#
	# Once README.md has the stars/days info in the links, the
	# repo_regex will need slight modification.
	#
	# Curently, the lack of OAuth2 + GH developer keys means you exceed
	# GitHub's hourly limit before you even finish. I have no idea how to
	# set up OAuth2 so I'm going to post this as a Gist and see if someone
	# else has an idea.

	import datetime
	import json
	import random
	import re
	import urllib2

	empty_regex = re.compile(r"^ *\n$")
	section_regex = re.compile(r"^## (.+)\n$")
	repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$")
	end_regex = re.compile(r"^# .+\n$")
	github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$")

	inf = open('README.md', 'r')
	lines = list(inf)
	inf.close()

	outf = open('README.md.new', 'w')

	# use fake to avoid hitting github API
	# useful when over the rate limit (i.e. always)
	fake = True

	# TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit.
	def query(owner, name):
	if fake:
	return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300))
	else:
	try:
	u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name))
	j = json.load(u)
	t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
	dt = datetime.datetime.now() - t
	print "%s/%s: ok" % (owner, name)
	return (j['watchers_count'], j['stargazers_count'], dt.days)
	except urllib2.HTTPError, e:
	print "%s/%s: ERROR" % (owner, name)
	return None

	def flush_section(outf, section, sdesc, repos):
	outf.write(section)
	outf.write('\n')
	if sdesc:
	outf.write(sdesc)
	outf.write('\n')
	repos.sort(key=lambda t: t[0].lower())
	for name, link, rdesc in repos:
	m = github_regex.match(link)
	if m:
	res = query(m.group(1), m.group(2))
	if res is not None:
	(watchers, stars, days) = res
	title = '%s ★ %d ⧗ %d' % (name, stars, int(days))
	if stars > 500:
	btitle = '' + title + ''
	else:
	btitle = title
	outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc))
	continue

	outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc))
	outf.write('\n')

	started = False
	finished = False
	section = None
	sdesc = None
	repos = []
	for line in lines:
	if finished:
	outf.write(line)
	elif started:
	if end_regex.match(line):
	flush_section(outf, section, sdesc, repos)
	outf.write(line)
	finished = True
	elif empty_regex.match(line):
	continue
	elif section_regex.match(line):
	flush_section(outf, section, sdesc, repos)
	section = line
	sdesc = None
	repos = []
	else:
	m = repo_regex.match(line)
	if m:
	name, link, rdesc = m.groups()
	repos.append((name, link, rdesc))
	elif sdesc is None:
	sdesc = line
	else:
	raise Exception("cannot parse %r" % line)
	else:
	if section_regex.match(line):
	section = line
	started = True
	else:
	outf.write(line)