Skip to content

Instantly share code, notes, and snippets.

@nrathnam
Last active August 25, 2016 20:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nrathnam/79d03213c53449bb5039efd95bd97057 to your computer and use it in GitHub Desktop.
Save nrathnam/79d03213c53449bb5039efd95bd97057 to your computer and use it in GitHub Desktop.
#Extract the Cost, Facility, Faculty, Programme, Student and Recruiter data from The Economist
# Ranking, Application and Accreditation details are not required for our analysis
root_web = 'http://www.economist.com/'
university_school_details = []
for j in range(100):
for k in range(9):
if (k != 0 and k != 2 and k != 8):
school_full_url = root_web + univ_school_url[j] + '?tab=' + str(k)
response = opener.open(school_full_url)
page = response.read()
soup = BeautifulSoup(page)
university_school_details.append(BeautifulSoup(page))
univ_facilities_dict = {}; univ_cost_dict = {}; univ_faculty_dict = {}; univ_programme_dict = {};
univ_students_dict = {}; univ_recruiters_dict = {}
for i in range(600): # replace range 600
school_name = university_school_details[i].find_all('h2')
school_name = school_name[1].get_text().encode('utf-8')
if (i % 6 == 0 ):
j = 0
else:
j = j + 1
if (j == 0):
univ_cost = university_school_details[i].find_all('tr', {'class':['odd', 'even']} )
univ_cost_dict[school_name] = univ_cost
if (j == 1):
univ_facilities = university_school_details[i].find_all('tr', {'class':['odd', 'even']} )
univ_facilities_dict[school_name] = univ_facilities
if (j == 2):
univ_faculty = university_school_details[i].find_all('tr', {'class':['odd', 'even']} )
univ_faculty_dict[school_name] = univ_faculty
if (j == 3):
univ_programme = university_school_details[i].find_all('tr', {'class':['odd', 'even']} )
univ_programme_dict[school_name] = univ_programme
if (j == 4):
univ_students = university_school_details[i].find_all('tr', {'class':['odd', 'even']} )
univ_students_dict[school_name] = univ_students
if (j == 5):
univ_recruiters = university_school_details[i].find_all('tr', {'class':['odd', 'even']} )
univ_recruiters_dict[school_name] = univ_recruiters
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment