Skip to content

Instantly share code, notes, and snippets.

@nirvik
Created March 28, 2015 09:12
Show Gist options
  • Save nirvik/f54a84fdee6d9ec2a406 to your computer and use it in GitHub Desktop.
Save nirvik/f54a84fdee6d9ec2a406 to your computer and use it in GitHub Desktop.
Get all the comments from rembook.nitt.edu
import requests
from BeautifulSoup import BeautifulSoup as bs
import re
payload = {'username' : '' , 'password': '' ,'login': 'Log In'}
container = { }
base_url = 'http://rembook.nitt.edu/home/'
session = requests.session()
r = session.post("http://rembook.nitt.edu/home/login", data=payload)
response = bs(r.text)
links_soup = response.find('div',{'class':'panel','id':'alerts'})
links = links_soup.findAll('a')
rollnos = set(re.search('(\d+)',link['href']).group() for link in links)
names = {}
for link in links :
if link.text == 'comments':
continue
names[re.search('(\d+)',link['href']).group()] = link.text
for rollno in rollnos:
r= session.get('http://rembook.nitt.edu/home/{0}/#comment'.format(rollno))
soup = bs(r.text)
comments = [comment.text for comment in soup.findAll('h4')]
container[rollno] = {'author':names[rollno], 'people call you' : comments[1] , 'you frequently utter' : comments[2] , 'your striking features are': comments[3],'and you are' : comments[4] }
print container[rollno]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment