Skip to content

Instantly share code, notes, and snippets.

@normalhuman
Created January 10, 2017 04:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save normalhuman/6bfdce417cf8537a34ba5188286e35b4 to your computer and use it in GitHub Desktop.
Save normalhuman/6bfdce417cf8537a34ba5188286e35b4 to your computer and use it in GitHub Desktop.
Scraping a user's helpful flag counts network-wide. Python 3
import re
import urllib.request
from bs4 import BeautifulSoup
from operator import itemgetter
from time import sleep
account_id = 'YOUR ACCOUNT ID HERE'
network_profile = 'http://stackexchange.com/users/' + account_id + '?tab=accounts'
with urllib.request.urlopen(network_profile) as response:
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
sites = soup.find_all('div', class_='account-site')
site_profiles = []
for site in sites:
link = site.contents[1].contents[1]
print(link)
name = re.sub('&', '&', link.string.strip()) if link.string else 'unknown'
site_profiles.append({'site_name': name, 'url': link['href'] + '?tab=topactivity'})
total_flags = 0
for profile in site_profiles:
sleep(1)
with urllib.request.urlopen(profile['url']) as response:
html = response.read()
soup = BeautifulSoup(html, 'html.parser')
icon = soup.find("span", class_="icon-helpful-flags")
if icon:
flags = icon.next_sibling
profile['flag_count'] = int(re.sub(",", "", flags.split()[0]))
else:
profile['flag_count'] = 0
total_flags += profile['flag_count']
print('*** Flag Counts ***\n')
cutoffs = [500, 400, 300, 200, 100]
groups = {'500': [], '400': [], '300': [], '200': [], '100': []}
site_profiles.sort(key=itemgetter('flag_count'), reverse=True)
for profile in site_profiles:
print(u'{} flags on {}'.format(profile['flag_count'], profile['site_name']))
for cut in cutoffs:
if profile['flag_count'] >= cut:
groups[str(cut)].append(profile['site_name'])
break
print('\n *** Summary ***')
prev_cut = 500
for cut in cutoffs:
if len(groups[str(cut)]) > 0:
if cut == 500:
print('\n### Marshal badges ({})\n'.format(len(groups[str(cut)])))
else:
print('\n### {}-{} helpful flags ({})\n'.format(cut, prev_cut-1, len(groups[str(cut)])))
prev_cut = cut
print(", ".join(groups[str(cut)]))
print('\n *** Total ***')
print(u'{} helpful flags network-wide'.format(total_flags))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment