Skip to content

Instantly share code, notes, and snippets.

@edison12a
Last active February 3, 2019 17:40
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save edison12a/7dccdb78a9979807d13a6c661a868332 to your computer and use it in GitHub Desktop.
Save edison12a/7dccdb78a9979807d13a6c661a868332 to your computer and use it in GitHub Desktop.
# if you want to pull data from an API, requests is your friend
import requests
# BeautifulSoup helps us extract data from html, xml, ..
from bs4 import BeautifulSoup
# this is the type of strings extracted from html
from bs4.element import NavigableString
ug_results = []
# set these to a big number like 1000, 10000, any number that makes sense as an index number
schs= 10 # this is the number of schools to loop over
stds = 100 # this is the assumed number of students to loop over
# loop over possible school index numbers
for sch in range(1, schs):
# loop over possible student index numbers from that school
for std in range(1, stds):
# use a try to ignore the errors that result out of invalid index numbers
try:
# prepend zeros to the numbers and slice them to a sensibe number of characters
sch = ('000'+str(sch) )[-4::]
std = ('00'+str(std) )[-3::]
# use an f-string to add the numbers to the posts data format you got from the curl request
data = f"index_no=u{sch}%2F{std}"
url = "https://ereg.uneb.ac.ug/ajax_calls/results_status"
# make the post request and store it at this variable
response = requests.post(
url,
headers = {"accept":"*/*",
"content-type":"application/x-www-form-urlencoded; charset=UTF-8",
"x-requested-with":"XMLHttpRequest",
'user-agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1'
},
data = data
)
# extract the html part from the response object
response = response.text
# print(response)
# parse the html using BS4
soup = BeautifulSoup(response, 'html.parser')
# get soup from the div that contains results
results = soup.find(class_='col-md-10')
# print(results.text)
# once you get the results html, this is where your creativity comes into play
# this is how i went about it
# define a dict to store new results
student_results = {}
# this generator helps extract strings out of an element that has <br>s
# i got it from stackoverflow!
for result_str in results.childGenerator():
# this is the type of strings found in html
if type(result_str) is NavigableString:
# strip it of spaces
result_str = str(result_str).strip()
# i found out that strings with a subject and grade have a length of 7
if len(result_str) == 7:
# split a tring like "ENG : 2" into two parts
split_result = result_str.split(' : ')
# add it to this student's result dic
student_results[split_result[0]]=split_result[1]
print(student_results) # {'ENG': '4', 'LIT': '7', 'HIS': '4', 'GEO': '5', 'MAT': '6', 'PHY': '6', 'CHE': '7', 'BIO': '6', 'COM': '6', 'CST': '7'}
# add results to list
ug_results.append(student_results)
except Exception as e:
# print(sch, std, data)
print(str(e))
pass
# do what you want with your results, Have fun!
print(ug_results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment