Skip to content

Instantly share code, notes, and snippets.

@aniruddha-adhikary
Created December 2, 2013 07:46
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aniruddha-adhikary/7746347 to your computer and use it in GitHub Desktop.
Save aniruddha-adhikary/7746347 to your computer and use it in GitHub Desktop.
Education Board Result Scraper
import urllib2
from BeautifulSoup import BeautifulSoup
def get_html(exam, year, board, roll):
"""Get the result page HTML output directly.
get_html(exam, year, board, roll)"""
base_url = "http://www.educationboardresults.gov.bd/regular"
base_index = 'index.php'
base_result = 'result.php'
index_response = urllib2.urlopen(base_url + '/' + base_index)
# Getting PHPSESSID session cookie
session_cookie = index_response.info().headers[3].split()[1][0:36]
post_data = "sr=3&et=0&exam=%s&year=%s&board=%s&roll=%s&button2=Submit" \
% (exam, year, board, roll)
result_request = urllib2.Request(base_url + "/" + base_result, headers = {
'Cookie': session_cookie
})
result_response = urllib2.urlopen(result_request, post_data)
return result_response.read()
def get_result(exam, year, board, roll):
"""Get the result as a dictionary.
get_result(exam, year, board, roll)"""
html_response = get_html(exam, year, board, roll)
souped_html = BeautifulSoup(html_response)
souped_html_td = souped_html.findAll('td')
td_list = []
# Forming a new list with only text values
for each_element in souped_html_td:
td_list.append(each_element.text)
# Gathering the grades
result_list = td_list[td_list.index('Grade')+1:td_list.index('Search Again')]
student_data = {
'info': {
'name': td_list[27].lower(),
'group': td_list[33].lower(),
'session': td_list[37],
'registration': td_list[39],
'student_type': td_list[41].lower(),
'gpa': td_list[49]
},
}
full_dict = dict(student_data.items() + dictify(result_list).items())
return full_dict
def dictify(result_list):
"""Make a dictionary from the souped result list
dictify(result_list)"""
result_dict = {'grades': {}}
element_counter = 1
while(element_counter < len(result_list)):
result_dict['grades'][result_list[element_counter]] = result_list[element_counter+1]
element_counter = element_counter + 3
return result_dict
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment