Skip to content

Instantly share code, notes, and snippets.

@frankgeerlings
Created May 14, 2016 11:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save frankgeerlings/bf9e8931558145451d5557a6f0bc4182 to your computer and use it in GitHub Desktop.
Save frankgeerlings/bf9e8931558145451d5557a6f0bc4182 to your computer and use it in GitHub Desktop.
List the most popular articles on a Wikimedia instance over a given number of past days
from mwviews.api import PageviewsClient
import functools # Voor Python 3 compatibility
import itertools
days = 100
topmost = 10000
showtop = 100
p = PageviewsClient(10)
def cleanArticleName(article):
return ("[[" + article.replace('_', ' ') + "]]").encode('utf-8')
def dagart(d):
dag = p.top_articles('nl.wikipedia', limit=topmost, year=d.year, month=d.month, day=d.day)
return {entry['article']: (entry['rank'], entry['views']) for entry in dag}
"""Zet een lijst met rank+views-tupels om in een stats-dict"""
def stats(scores):
# [(rank, views), ...]
r = [r for r,v in scores]
views = sum(v for r,v in scores)
# Sum of inverse of rank
score = sum(1.0/r for r,v in scores)
minrank = min(r)
maxrank = max(r)
dayslisted = len(r)
return (score, minrank, maxrank, views, dayslisted)
from datetime import date,timedelta
def dagen(vandaag, aantal):
while aantal > 0:
yield dagart(vandaag-timedelta(days=aantal))
aantal = aantal - 1
a = dagen(date.today()-timedelta(days=1), days)
def toegestaan(artikel):
if artikel.startswith('Special:'): return False
if artikel.startswith('User:'): return False
if artikel.startswith('Speciaal:'): return False
if artikel == '-': return False
return True
resultaat = {}
for i in a:
for j in i.keys():
if not toegestaan(j):
continue
if not resultaat.has_key(j):
resultaat[j] = []
resultaat[j].append(i[j])
class Wikitable:
def __init__(self, items, columns):
self.items = items
self.columns = columns
self.columncount = len(columns)
def lines(self):
yield "{| class=\"wikitable sortable\""
yield "|-"
yield "! " + " !! ".join(self.columns)
for i in self.items:
yield "|-"
yield "| " + " || ".join([str(x) for x in i])
yield "|}"
def __repr__(self):
return "\n".join(self.lines())
# We hebben nu een dict van artikelnaam: [(rank, views), ...]
#statistiek = {k: stats(v) for k, v in resultaat.iteritems()}
statistiek = [(rank,) + rest for rank, rest in zip([x + 1 for x in range(len(resultaat))[::-1]], sorted([(cleanArticleName(k),) + stats(v) for k, v in resultaat.iteritems()], key=lambda x: x[1]))]
statistiek.reverse()
print Wikitable(itertools.islice(statistiek,showtop), ["Rang", "Artikel", "Score", "Hoogste", "Laagste", "Views totaal", "Dagen in top %s" % topmost])
#from pprint import pprint
#pprint(statistiek)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment