Skip to content

Instantly share code, notes, and snippets.

@TJSomething
Last active December 25, 2015 05:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TJSomething/6924624 to your computer and use it in GitHub Desktop.
Save TJSomething/6924624 to your computer and use it in GitHub Desktop.
Uses Wilson confidence intervals to rank the results of this Hacker News language poll (https://news.ycombinator.com/item?id=6527104).
#!/usr/bin/python3
import re
from math import sqrt
import urllib.request
# Thanks to http://possiblywrong.wordpress.com/2011/06/05/reddits-comment-ranking-algorithm/
def confidence_fixed(ups, downs):
if ups == 0:
return -downs
n = ups + downs
z = 1.64485 #1.0 = 85%, 1.6 = 95%
phat = float(ups) / n
return (phat+z*z/(2*n)-z*sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
raw_data = urllib.request.urlopen("https://news.ycombinator.com/item?id=6527104").read().decode("utf-8")
entry_regex = re.compile(r"""(?P<language>[^>-]+) - Like.*?(?P<upvotes>[0-9]+) points.*? - Dislike.*?(?P<downvotes>[0-9]+) points""")
matched_data = re.finditer(entry_regex, raw_data)
results = []
for entry in matched_data:
language = entry.group("language")
upvotes = float(entry.group("upvotes"))
downvotes = float(entry.group("downvotes"))
confidence = confidence_fixed(upvotes, downvotes)
results.append((language, confidence))
results.sort(key=lambda x: -x[1])
for rank, entry in enumerate(results):
print("%d\t%s%f" % (rank+1, entry[0].ljust(16), entry[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment