Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sainipray/594ebbe94f2518caf2afdb57bfb8691e to your computer and use it in GitHub Desktop.
Save sainipray/594ebbe94f2518caf2afdb57bfb8691e to your computer and use it in GitHub Desktop.
Get Stackoverflow.com scratch data using Python BeautifulSoup4 library
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
import bs4
import requests
def get_data(url):
res = requests.get(url)
res.raise_for_status()
return bs4.BeautifulSoup(res.text, 'html.parser')
def get_latest_questions():
soup = get_data('http://stackoverflow.com')
latest_questions = soup.select('.question-summary > div.summary > h3 > a')
users = soup.select('.question-summary > div.summary > div.started > a:nth-of-type(2)')
status = soup.select('.question-summary > div.summary > div.started > a.started-link')
for index, question in enumerate(latest_questions):
print "Question {0}: {1}. {2} by {3}".format(index + 1, question.text.strip(), status[index].text.strip(),
users[index].text.strip())
def get_popular_tags():
soup = get_data('https://stackoverflow.com/tags')
tags = soup.select('#tags-browser .post-tag')
counts = soup.select('#tags-browser span.item-multiplier-count')
for index, tag in enumerate(tags):
print "Tag {0}: {1} X {2}".format(index + 1, tag.text.strip(), counts[index].text.strip())
def get_top_users():
soup = get_data('https://stackoverflow.com/users?tab=Reputation&filter=all')
users = soup.select('#user-browser .user-details > a')
reputation = soup.select('#user-browser .user-details .reputation-score')
for index, user in enumerate(users):
print "User {0}: {1} - {2}".format(index + 1, user.text.strip(), reputation[index].text.strip())
def get_top_users_of_tag():
tag = raw_input('Enter tag name ')
print "Top Answerers"
soup = get_data('https://stackoverflow.com/tags/' + tag + '/topusers')
users = soup.select('#questions div.fl')[1].select('.user-details a')
point = soup.select('#questions div.fl')[1].select('tr')
for index, user in enumerate(users):
print "User {0}: {1} - {2}".format(index + 1, user.text.strip(),
point[index].select('td:nth-of-type(1)')[0].text.strip())
print "\n\nTop Askers"
users = soup.select('#questions div.fl')[3].select('.user-details a')
point = soup.select('#questions div.fl')[3].select('tr')
for index, user in enumerate(users):
print "User {0}: {1} - {2}".format(index + 1, user.text.strip(),
point[index].select('td:nth-of-type(1)')[0].text.strip())
if __name__ == '__main__':
print "Get latest questions\n\n"
get_latest_questions()
print "\n\nGet popular tags\n\n"
get_popular_tags()
print "\n\nGet top users\n\n"
get_top_users()
print "\n\nGet users of tag\n\n"
get_top_users_of_tag()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment