scottydelta/RembookScraper.py

## RembookScraper.py
import requests
from BeautifulSoup import BeautifulSoup as bs
import re
import ho.pisa as pisa
import os
import sys
import cgi
import cStringIO
import logging
payload = {'username' : '106111062' , 'password': '5' ,'login': 'Log In'}
container  = { }

base_url = 'http://rembook.nitt.edu/home/'
session = requests.session()
r = session.post("http://rembook.nitt.edu/home/login", data=payload)

response = bs(r.text)
links_soup = response.find('div',{'class':'panel','id':'alerts'})
links = links_soup.findAll('a')
rollnos =  set(re.search('(\d+)',link['href']).group() for link in links)
names = {}
for link in links :
        if link.text == 'comments':
                continue
        names[re.search('(\d+)',link['href']).group()] = link.text

temphtml = """<center><h1>My rembook</h1></center>"""
for rollno in rollnos:
        r= session.get('http://rembook.nitt.edu/home/{0}/#comment'.format(rollno))
        soup = bs(r.text)

        comments = [comment.text for comment in soup.findAll('h4')]
        container[rollno] = {'author':names[rollno], 'people call you' : comments[1] , 'you frequently utter' : comments[2] , 'your striking features are': comments[3],'and you are' : comments[4] }
        print container[rollno]
        temphtml+= """<p><img src="http://rembook.nitt.edu/uploads/%s.jpg"><br>Author: %s<br>People call you: %s<br>You Frequently Utter: %s<br> Your striking feature: %s <br>and You Are: %s</p>""" % (rollno, names[rollno], comments[1], comments[2], comments[3], comments[4])
print temphtml
#from fpdf import FPDF, HTMLMixin
#
#class MyFPDF(FPDF, HTMLMixin):
#    pass
#
#pdf=MyFPDF()
##First page
#pdf.add_page()
#pdf.write_html(temphtml)
#pdf.output('html.pdf','F')
try:
    pdf = pisa.CreatePDF(
        cStringIO.StringIO(temphtml),
        file('html.pdf', "wb")
        )
except Exception as e:
    print e

if pdf.err:
    dumpErrors(pdf)
else:
    pisa.startViewer('html.pdf')
	import requests
	from BeautifulSoup import BeautifulSoup as bs
	import re
	import ho.pisa as pisa
	import os
	import sys
	import cgi
	import cStringIO
	import logging
	payload = {'username' : '106111062' , 'password': '5' ,'login': 'Log In'}
	container = { }

	base_url = 'http://rembook.nitt.edu/home/'
	session = requests.session()
	r = session.post("http://rembook.nitt.edu/home/login", data=payload)

	response = bs(r.text)
	links_soup = response.find('div',{'class':'panel','id':'alerts'})
	links = links_soup.findAll('a')
	rollnos = set(re.search('(\d+)',link['href']).group() for link in links)
	names = {}
	for link in links :
	if link.text == 'comments':
	continue
	names[re.search('(\d+)',link['href']).group()] = link.text

	temphtml = """<center><h1>My rembook</h1></center>"""
	for rollno in rollnos:
	r= session.get('http://rembook.nitt.edu/home/{0}/#comment'.format(rollno))
	soup = bs(r.text)

	comments = [comment.text for comment in soup.findAll('h4')]
	container[rollno] = {'author':names[rollno], 'people call you' : comments[1] , 'you frequently utter' : comments[2] , 'your striking features are': comments[3],'and you are' : comments[4] }
	print container[rollno]
	temphtml+= """<p><img src="http://rembook.nitt.edu/uploads/%s.jpg"><br>Author: %s<br>People call you: %s<br>You Frequently Utter: %s<br> Your striking feature: %s <br>and You Are: %s</p>""" % (rollno, names[rollno], comments[1], comments[2], comments[3], comments[4])
	print temphtml
	#from fpdf import FPDF, HTMLMixin
	#
	#class MyFPDF(FPDF, HTMLMixin):
	# pass
	#
	#pdf=MyFPDF()
	##First page
	#pdf.add_page()
	#pdf.write_html(temphtml)
	#pdf.output('html.pdf','F')
	try:
	pdf = pisa.CreatePDF(
	cStringIO.StringIO(temphtml),
	file('html.pdf', "wb")
	)
	except Exception as e:
	print e

	if pdf.err:
	dumpErrors(pdf)
	else:
	pisa.startViewer('html.pdf')