Skip to content

Instantly share code, notes, and snippets.

@geraldbaeck
Created August 7, 2014 10:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save geraldbaeck/3dd915b46b452f478c79 to your computer and use it in GitHub Desktop.
Save geraldbaeck/3dd915b46b452f478c79 to your computer and use it in GitHub Desktop.
This gist just queries all articles from the frontpage of profil.at and counts the total number of comments made and the number of unique discussion participants.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from urlparse import urljoin
from bs4 import BeautifulSoup
from requests import get
ROOT_URL = "http://www.profil.at/"
def main():
r = get(ROOT_URL)
soup = BeautifulSoup(r.text)
links = soup.find_all('a')
articles = set()
for link in links:
url = urljoin(ROOT_URL, link['href'])
if ROOT_URL+'articles' in url:
articles.add(url)
comment_counter = 0
user = set()
for a in articles:
r = get(a)
soup = BeautifulSoup(r.text)
comments = soup.find_all('div', class_="nnw-flog-user")
for c in comments:
comment_counter += 1
klarname = c.text.split(',')[0]
if '|' in c.text:
klarname = klarname.split('|')[1]
user.add(klarname)
print "Artikel: %d" % len(articles)
print "Kommentare: %d" % comment_counter
print "Unique Users: %d" % len(user)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment