Skip to content

Instantly share code, notes, and snippets.

@scottydelta
Forked from nirvik/RembookScraper.py
Last active August 29, 2015 14:18
Show Gist options
  • Save scottydelta/7af852193c5a53ba3956 to your computer and use it in GitHub Desktop.
Save scottydelta/7af852193c5a53ba3956 to your computer and use it in GitHub Desktop.
import requests
from BeautifulSoup import BeautifulSoup as bs
import re
import ho.pisa as pisa
import os
import sys
import cgi
import cStringIO
import logging
payload = {'username' : '106111062' , 'password': '5' ,'login': 'Log In'}
container = { }
base_url = 'http://rembook.nitt.edu/home/'
session = requests.session()
r = session.post("http://rembook.nitt.edu/home/login", data=payload)
response = bs(r.text)
links_soup = response.find('div',{'class':'panel','id':'alerts'})
links = links_soup.findAll('a')
rollnos = set(re.search('(\d+)',link['href']).group() for link in links)
names = {}
for link in links :
if link.text == 'comments':
continue
names[re.search('(\d+)',link['href']).group()] = link.text
temphtml = """<center><h1>My rembook</h1></center>"""
for rollno in rollnos:
r= session.get('http://rembook.nitt.edu/home/{0}/#comment'.format(rollno))
soup = bs(r.text)
comments = [comment.text for comment in soup.findAll('h4')]
container[rollno] = {'author':names[rollno], 'people call you' : comments[1] , 'you frequently utter' : comments[2] , 'your striking features are': comments[3],'and you are' : comments[4] }
print container[rollno]
temphtml+= """<p><img src="http://rembook.nitt.edu/uploads/%s.jpg"><br>Author: %s<br>People call you: %s<br>You Frequently Utter: %s<br> Your striking feature: %s <br>and You Are: %s</p>""" % (rollno, names[rollno], comments[1], comments[2], comments[3], comments[4])
print temphtml
#from fpdf import FPDF, HTMLMixin
#
#class MyFPDF(FPDF, HTMLMixin):
# pass
#
#pdf=MyFPDF()
##First page
#pdf.add_page()
#pdf.write_html(temphtml)
#pdf.output('html.pdf','F')
try:
pdf = pisa.CreatePDF(
cStringIO.StringIO(temphtml),
file('html.pdf', "wb")
)
except Exception as e:
print e
if pdf.err:
dumpErrors(pdf)
else:
pisa.startViewer('html.pdf')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment