Skip to content

Instantly share code, notes, and snippets.

@ChronoMonochrome
Last active June 12, 2018 21:56
Show Gist options
  • Save ChronoMonochrome/5e16e4860d38732b7d4a3ad4aa5104da to your computer and use it in GitHub Desktop.
Save ChronoMonochrome/5e16e4860d38732b7d4a3ad4aa5104da to your computer and use it in GitHub Desktop.
Wilson score calculation for anime titles on shikimori.org
#!/usr/bin/env python
import operator
import json
from bs4 import BeautifulSoup
from urllib.request import urlopen
from functools import reduce, lru_cache
from math import sqrt
def sum_prod(lists):
return sum([reduce(operator.mul, i, 1) for i in zip(*lists)])
# taken from https://habr.com/company/darudar/blog/143188
def _wilson_score(sum_rating, n, votes_range = [0, 1]):
z = 1.64485
v_min = min(votes_range)
v_width = float(max(votes_range) - v_min)
phat = (sum_rating - n * v_min) / v_width / float(n)
rating = (phat+z*z/(2*n)-z*sqrt((phat*(1-phat)+z*z/(4*n))/n))/(1+z*z/n)
return rating * v_width + v_min
def wilson_score(votes, votes_range = [1, 10]):
sum_rating = sum_prod((votes, range(votes_range[0], votes_range[-1] + 1) ))
n = sum(votes)
if n == 0:
return 0
return _wilson_score(sum_rating, n, votes_range)
def my_score(votes, threshold = 300):
q = 1/1024., 2/1024., 4/1024., 8/1024., 16/1024., \
32/1024., 64/1024., 128/1024., 256/1024., 512/1024.
norm = 2 / 3.
n = sum(votes)
if n < threshold:
return 0
return norm * sum_prod((votes, range(1, 11), [i + 1 for i in q])) / n
@lru_cache(maxsize=None)
def get_stats(url):
res = [0] * 10
page = BeautifulSoup(urlopen(url).read())
try:
stats = json.loads(page.find(attrs = {"id": "rates_scores_stats"}).get("data-stats")
for stat in stats:
key, val = stat.values()
res[key - 1] = val
except AttributeError:
pass
return res
''' Example of usage:
>>> wilson_score(get_stats('https://shikimori.org/animes/9253-steins-gate'))
9.075422968506256'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment