Skip to content

Instantly share code, notes, and snippets.

@non
Created July 28, 2015 22:57
Show Gist options
  • Save non/bc0cac3053b4cb532b09 to your computer and use it in GitHub Desktop.
Save non/bc0cac3053b4cb532b09 to your computer and use it in GitHub Desktop.
Script to use with [awesome-scala](https://github.com/lauris/awesome-scala) to format README.md and add useful metadata.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# by Erik Osheim
#
# Reads README.md, and writes a README.md.new. If the format of
# README.md changes, this script may need modifications.
#
# Currently it rewrites each section, doing the following:
# 1. alphabetizing
# 2. querying GitHub for watchers, stars, days since active
# 3. formatting the link title to show this info
# 4. bolding projects with lots of stars
#
# Once README.md has the stars/days info in the links, the
# repo_regex will need slight modification.
#
# Curently, the lack of OAuth2 + GH developer keys means you exceed
# GitHub's hourly limit before you even finish. I have no idea how to
# set up OAuth2 so I'm going to post this as a Gist and see if someone
# else has an idea.
import datetime
import json
import random
import re
import urllib2
empty_regex = re.compile(r"^ *\n$")
section_regex = re.compile(r"^## (.+)\n$")
repo_regex = re.compile(r"^\* \[(.+?)\]\((.+?)\) - (.+)\n$")
end_regex = re.compile(r"^# .+\n$")
github_regex = re.compile(r"^https://github.com/(.+?)/(.+)$")
inf = open('README.md', 'r')
lines = list(inf)
inf.close()
outf = open('README.md.new', 'w')
# use fake to avoid hitting github API
# useful when over the rate limit (i.e. always)
fake = True
# TODO: need to do Oauth2 stuff here to avoid GitHub's rate limit.
def query(owner, name):
if fake:
return (random.randint(1, 100), random.randint(1, 1000), random.randint(1, 300))
else:
try:
u = urllib2.urlopen('https://api.github.com/repos/%s/%s' % (owner, name))
j = json.load(u)
t = datetime.datetime.strptime(j['updated_at'], "%Y-%m-%dT%H:%M:%SZ")
dt = datetime.datetime.now() - t
print "%s/%s: ok" % (owner, name)
return (j['watchers_count'], j['stargazers_count'], dt.days)
except urllib2.HTTPError, e:
print "%s/%s: ERROR" % (owner, name)
return None
def flush_section(outf, section, sdesc, repos):
outf.write(section)
outf.write('\n')
if sdesc:
outf.write(sdesc)
outf.write('\n')
repos.sort(key=lambda t: t[0].lower())
for name, link, rdesc in repos:
m = github_regex.match(link)
if m:
res = query(m.group(1), m.group(2))
if res is not None:
(watchers, stars, days) = res
title = '%s ★ %d ⧗ %d' % (name, stars, int(days))
if stars > 500:
btitle = '*' + title + '*'
else:
btitle = title
outf.write('* [%s](%s) - %s\n' % (btitle, link, rdesc))
continue
outf.write('* [%s ★ ? ⧗ ?](%s) - %s\n' % (name, link, rdesc))
outf.write('\n')
started = False
finished = False
section = None
sdesc = None
repos = []
for line in lines:
if finished:
outf.write(line)
elif started:
if end_regex.match(line):
flush_section(outf, section, sdesc, repos)
outf.write(line)
finished = True
elif empty_regex.match(line):
continue
elif section_regex.match(line):
flush_section(outf, section, sdesc, repos)
section = line
sdesc = None
repos = []
else:
m = repo_regex.match(line)
if m:
name, link, rdesc = m.groups()
repos.append((name, link, rdesc))
elif sdesc is None:
sdesc = line
else:
raise Exception("cannot parse %r" % line)
else:
if section_regex.match(line):
section = line
started = True
else:
outf.write(line)
@non
Copy link
Author

non commented Jul 29, 2015

(although i could imagine a mode where it only runs for "empty" entries, and that could possibly run without authentication.)

@non
Copy link
Author

non commented Jul 29, 2015

@bzz Thanks again for your help! I felt really blocked, although getting basic auth working was super easy.

@longcao I have a PR for this, see: lauris/awesome-scala#139

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment