Skip to content

Instantly share code, notes, and snippets.

@hitecherik
Created March 30, 2018 14:22
Show Gist options
  • Save hitecherik/ce02afa4eba40f1929bb62e47bf94d05 to your computer and use it in GitHub Desktop.
Save hitecherik/ce02afa4eba40f1929bb62e47bf94d05 to your computer and use it in GitHub Desktop.
Fetches Computing exams from Imperial
from login import USERNAME, PASSWORD
from datetime import datetime
import os, re, requests, shutil
pdf_prefix = "C1"
def main():
try:
shutil.rmtree("papers")
except OSError:
pass
finally:
os.mkdir("papers")
for year in range(12, datetime.now().year % 1000):
year_code = "{}-{}".format(year, year + 1)
base_url = "https://exams.doc.ic.ac.uk/pastpapers/papers.{}/".format(year_code)
r = requests.get(base_url, auth=(USERNAME, PASSWORD))
if (r.status_code == 200):
os.mkdir("papers/" + year_code)
loaded_files = []
regex = re.compile(r"(=.*?):")
for match in set(re.findall("<a href='({0}.*?)'>({0}.*?)</a>".format(pdf_prefix), r.text)):
file, file_name = match
if file in loaded_files:
continue
loaded_files.append(file)
req = requests.get("{}/{}".format(base_url, file), auth=(USERNAME, PASSWORD), stream=True)
if (req.status_code == 200):
f = open("papers/{}/{}.pdf".format(year_code, regex.sub("", file_name)).replace(":", ""), "wb")
for chunk in req.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)
f.close()
if __name__ == "__main__":
main()
@hitecherik
Copy link
Author

Needs a login.py file in the same directory that looks like this:

USERNAME = "imperial-username"
PASSWORD = "imperial-password"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment