Created
April 2, 2011 07:01
-
-
Save intuited/899299 to your computer and use it in GitHub Desktop.
Show common leading users for 2 different SE sites.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Show common leading users for different SE sites. | |
Pass --grid from the command line to get a grid of common users | |
between askubuntu.com, unix.stackexchange.com, | |
superuser.com, and serverfault.com. | |
For the monthly report, it currently looks like this: | |
s s a u | |
e u s n | |
r p k i | |
v e u x | |
e r b . | |
r u u s | |
f s n t | |
a e t a | |
serverfa 36 2 0 4 | |
superuse 2 36 0 3 | |
askubunt 0 0 36 2 | |
unix.sta 4 3 2 36 | |
""" | |
from lxml import html | |
import textwrap | |
import pprint | |
import argparse | |
UNIX_SERVER = 'unix.stackexchange.com' | |
AU_SERVER = 'askubuntu.com' | |
USER_LINKS = '//div[@class="user-details"]/a/@href' | |
def users_url(server, filter='week'): | |
"""Build a top users URL for the server and filter.""" | |
return 'http://{0}/users?tab=reputation&filter={1}'.format(server, filter) | |
def username(path): | |
"""Returns the username from a SE path.""" | |
return path.split('/')[-1] | |
def common_users(url1, url2, xpath=USER_LINKS, parse=html.parse): | |
"""Returns the set of matching elements at `xpath` common between URLS.""" | |
def users(url): | |
return (username(user_path) | |
for user_path in parse(url).xpath(xpath)) | |
sets = [set(users(url)) for url in (url1, url2)] | |
return set.intersection(*sets) | |
def site_top_user_report(url): | |
print "Top users at '{0}':".format(url) | |
usernames = (username(path) | |
for path in html.parse(url).xpath(USER_LINKS)) | |
print textwrap.fill(' '.join(usernames)) | |
def render_grid(server_names, filter='week'): | |
def abbrev(server): | |
return '{0:8s}'.format(server[0:8]) | |
server_abbrs = [abbrev(server) for server in server_names] | |
def header(server_abbrs): | |
def line(index): | |
return ' ' * 12 + ' '.join(name[index] for name in server_abbrs) | |
return '\n'.join(line(i) for i in range(8)) + '\n\n' | |
def body(server_names): | |
import StringIO | |
import urllib | |
pages = [StringIO.StringIO(urllib.urlopen(users_url(server, filter)).read()) | |
for server in server_names] | |
counts = [[len(common_users(p1, p2)) for p1 in pages] for p2 in pages] | |
def line(name, counts): | |
counts_str = (' '.join('{0:3d}'.format(c) for c in counts)) | |
return '{0} {1}'.format(name, counts_str) | |
return '\n'.join(line(abbrev(name), row) | |
for name, row in zip(server_names, counts)) | |
return header(server_abbrs) + body(server_names) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser(description=__doc__) | |
parser.add_argument('-f', '--filter', default='week') | |
parser.add_argument('-1', '--server-1', default=UNIX_SERVER) | |
parser.add_argument('-2', '--server-2', default=AU_SERVER) | |
parser.add_argument('-g', '--grid', action='store_true') | |
options = parser.parse_args() | |
if options.grid: | |
servers = ['serverfault.com', | |
'superuser.com', | |
'askubuntu.com', | |
'unix.stackexchange.com'] | |
print render_grid(servers, options.filter) | |
else: | |
unix_url = users_url(options.server_1, options.filter) | |
au_url = users_url(options.server_2, options.filter) | |
site_top_user_report(unix_url) | |
site_top_user_report(au_url) | |
print "Common users:" | |
pprint.pprint(common_users(unix_url, au_url)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment