Skip to content

Instantly share code, notes, and snippets.

@intuited
Created April 2, 2011 07:01
Show Gist options
  • Save intuited/899299 to your computer and use it in GitHub Desktop.
Save intuited/899299 to your computer and use it in GitHub Desktop.
Show common leading users for 2 different SE sites.
"""Show common leading users for different SE sites.
Pass --grid from the command line to get a grid of common users
between askubuntu.com, unix.stackexchange.com,
superuser.com, and serverfault.com.
For the monthly report, it currently looks like this:
s s a u
e u s n
r p k i
v e u x
e r b .
r u u s
f s n t
a e t a
serverfa 36 2 0 4
superuse 2 36 0 3
askubunt 0 0 36 2
unix.sta 4 3 2 36
"""
from lxml import html
import textwrap
import pprint
import argparse
UNIX_SERVER = 'unix.stackexchange.com'
AU_SERVER = 'askubuntu.com'
USER_LINKS = '//div[@class="user-details"]/a/@href'
def users_url(server, filter='week'):
"""Build a top users URL for the server and filter."""
return 'http://{0}/users?tab=reputation&filter={1}'.format(server, filter)
def username(path):
"""Returns the username from a SE path."""
return path.split('/')[-1]
def common_users(url1, url2, xpath=USER_LINKS, parse=html.parse):
"""Returns the set of matching elements at `xpath` common between URLS."""
def users(url):
return (username(user_path)
for user_path in parse(url).xpath(xpath))
sets = [set(users(url)) for url in (url1, url2)]
return set.intersection(*sets)
def site_top_user_report(url):
print "Top users at '{0}':".format(url)
usernames = (username(path)
for path in html.parse(url).xpath(USER_LINKS))
print textwrap.fill(' '.join(usernames))
def render_grid(server_names, filter='week'):
def abbrev(server):
return '{0:8s}'.format(server[0:8])
server_abbrs = [abbrev(server) for server in server_names]
def header(server_abbrs):
def line(index):
return ' ' * 12 + ' '.join(name[index] for name in server_abbrs)
return '\n'.join(line(i) for i in range(8)) + '\n\n'
def body(server_names):
import StringIO
import urllib
pages = [StringIO.StringIO(urllib.urlopen(users_url(server, filter)).read())
for server in server_names]
counts = [[len(common_users(p1, p2)) for p1 in pages] for p2 in pages]
def line(name, counts):
counts_str = (' '.join('{0:3d}'.format(c) for c in counts))
return '{0} {1}'.format(name, counts_str)
return '\n'.join(line(abbrev(name), row)
for name, row in zip(server_names, counts))
return header(server_abbrs) + body(server_names)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument('-f', '--filter', default='week')
parser.add_argument('-1', '--server-1', default=UNIX_SERVER)
parser.add_argument('-2', '--server-2', default=AU_SERVER)
parser.add_argument('-g', '--grid', action='store_true')
options = parser.parse_args()
if options.grid:
servers = ['serverfault.com',
'superuser.com',
'askubuntu.com',
'unix.stackexchange.com']
print render_grid(servers, options.filter)
else:
unix_url = users_url(options.server_1, options.filter)
au_url = users_url(options.server_2, options.filter)
site_top_user_report(unix_url)
print
site_top_user_report(au_url)
print
print "Common users:"
pprint.pprint(common_users(unix_url, au_url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment