Last active
December 4, 2023 11:37
-
-
Save lancylot2004/5ab7acda68e127cee9aec1643b99ca63 to your computer and use it in GitHub Desktop.
Charterhouse Fireflycloud Calendar Scraper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Script to generate iCal files for term time | |
hashes for Charterhouse School. | |
Author: Lancelot Liu | |
License: MIT | |
Email: lancylot2004@proton.me | |
""" | |
# requirements | |
# icalendar requests selenium tqdm | |
from datetime import datetime, timedelta | |
import os | |
import json | |
from getpass import getpass | |
from re import findall | |
from time import sleep | |
from tqdm import tqdm | |
from typing import List | |
import requests | |
from dateutil.parser import parse | |
from icalendar import Calendar, Event | |
from selenium import webdriver | |
from selenium.common.exceptions import NoSuchElementException | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
from selenium.webdriver.common.by import By | |
START_DATE = datetime(2023, 4, 18) | |
END_DATE = datetime(2023, 5, 27) | |
EXPR = r"<script>var PLANNER_INITIAL_STATUS = (.*?)<\/script>" | |
WIDTH = os.get_terminal_size().columns | |
TIMEOUT = 20 | |
ENTRY_POINT = "https://charterhouse.fireflycloud.net/login/login.aspx?prelogin=https%3a%2f%2fcharterhouse.fireflycloud.net%2f&kr=MSAADKeyRing" | |
assert START_DATE < END_DATE | |
## Class Definitions | |
class Hash: | |
""" | |
Class to represent an event. Contains | |
all the necessary information to | |
generate an iCal event. | |
""" | |
def __init__(self, eventJSON): | |
self.uid = eventJSON['guid'] | |
self.teacher = eventJSON['chairperson'] | |
self.set = eventJSON['description'] | |
self.isostartdate = parse(eventJSON['isostartdate'] + '.000000') | |
self.isoenddate = parse(eventJSON['isoenddate'] + '.000000') | |
self.location = eventJSON['location'] | |
self.subject = eventJSON['subject'] | |
def __eq__(self, other): | |
return (self.teacher == other.teacher | |
and self.set == other.set | |
and self.isostartdate == other.isostartdate | |
and self.isoenddate == other.isoenddate | |
and self.location == other.location | |
and self.subject == other.subject) | |
def __hash__(self): | |
return hash(self.uid + self.teacher | |
+ self.set + self.isostartdate.isoformat() | |
+ self.isoenddate.isoformat() + self.location | |
+ self.subject) | |
def __lt__(self, other): | |
""" | |
Overloaded to compare based on **start** only. | |
""" | |
return self.isostartdate < other.isostartdate | |
def __rshift__(self, other): | |
""" | |
Overloaded to see if one event leads to the other. | |
""" | |
return (self.teacher == other.teacher | |
and self.set == other.set | |
and self.isostartdate.date() == other.isostartdate.date() | |
and self.isostartdate < other.isostartdate | |
and self.location == other.location | |
and self.subject == other.subject) | |
# __str__ and __repr__ for debug only. | |
def __str__(self): | |
return f"{self.subject} @ {self.location} w/ {self.teacher} || {self.isostartdate} --> {self.isoenddate}" | |
class Person: | |
""" | |
Class to represent a person. Contains | |
necessary cookies to access Greyhound. | |
Also stores event data once acquired. | |
""" | |
def __init__(self, personName: str, sessionA: str = "", sessionB: str = "", ASPID: str = ""): | |
self.name = personName | |
self.cookies = { | |
"SessionSecureA": sessionA, | |
"SessionSecureB": sessionB, | |
"ASP.NET_SessionId": ASPID, | |
"Prelogin": "https://charterhouse.fireflycloud.net/" | |
} | |
self.events = [] | |
def getDataFromPage(self, date: datetime) -> List[Hash]: | |
""" | |
Gets event data from a given date. | |
Args: | |
date (datetime, optional): When to get event data from. Defaults to START_DATE. | |
Returns: | |
List[Hash]: List of events. | |
""" | |
dateObj = date.date() | |
year, month, day = str(dateObj.year), str(dateObj.month).zfill(2), str(dateObj.day).zfill(2) | |
url = f"https://charterhouse.fireflycloud.net/planner/week/{year}-{month}-{day}" | |
try: | |
response = requests.get(url, cookies = self.cookies, timeout = TIMEOUT) | |
except TimeoutError: | |
print(f"Timeout Error: {url}") | |
print(f"While processing {self.name}") | |
return [] | |
try: | |
events = findall(EXPR, response.content.decode("utf-8"))[0] | |
except IndexError: | |
print(f"No Data Extracted from .../planner/week/{year}-{month}-{day}") | |
events = [] | |
return json.loads(events)['events'] | |
def getData(self, date: datetime) -> List[Hash]: | |
""" | |
Gets event data from a given date and the next week. | |
Args: | |
date (datetime, optional): When to get event data from. Defaults to datetime.now(). | |
Returns: | |
List[Hash]: List of events. | |
""" | |
return self.getDataFromPage(date) + self.getDataFromPage(date + timedelta(weeks=1)) | |
## Function Definitions | |
def initCalendar(calName): | |
cal = Calendar() | |
cal.add('prodid', '-//Lancelot Liu//calendarAbuse 2.0.1//EN') | |
cal.add('version', '2.0') | |
cal.add("calscale", "GREGORIAN") | |
cal.add('X-WR-TIMEZONE', 'Europe/London') | |
cal.add('X-WR-CALNAME', calName) | |
return cal | |
## Setup | |
PERSONS = [] | |
## Main Programme | |
if __name__ == "__main__": | |
# Title | |
os.system('cls' if os.name == 'nt' else 'clear') | |
print(f"@{'-' * (WIDTH - 2)}@") | |
print(f"|{' ' * (WIDTH - 2)}|") | |
print(f"|{'|calendarAbuse v2.0.1|'.center(WIDTH - 2)}|") | |
print(f"|{'Lancelot Liu'.center(WIDTH - 2)}|") | |
print(f"|{'lancylot2004@proton.me'.center(WIDTH - 2)}|") | |
print(f"|{' ' * (WIDTH - 2)}|") | |
print(f"@{'-' * (WIDTH - 2)}@") | |
print("") | |
print("If more than one person is added, the calendar") | |
print("will be split into multiple files, including") | |
print("a shared file which will contain events common") | |
print("to all users.") | |
print("") | |
# Asks for users | |
print("Adding New Login...") | |
while True: | |
name = str(input("Name: ")) | |
print("Input the 8-character school login, e.g., liu8lh01") | |
email = str(input("School Login: ")) + "@charterhouse.org.uk" | |
passwd = getpass() | |
# Setup progress bar | |
pBar = tqdm(total = 5, leave = True) | |
# Setup driver (S1) | |
pBar.set_description("Setting up driver...") | |
options = webdriver.FirefoxOptions() | |
options.headless = True | |
driver = webdriver.Firefox(options = options) | |
wait = WebDriverWait(driver, TIMEOUT) | |
# Navigate to Microsoft Login (S2) | |
pBar.update(1) | |
pBar.set_description("Navigating to login...") | |
driver.get(ENTRY_POINT) | |
# Input Email and Submit (S3) | |
pBar.update(1) | |
pBar.set_description("Inputting email...") | |
wait.until(EC.visibility_of_element_located((By.XPATH, "//input[@type='email']"))) | |
inputField = driver.find_element("xpath", "//input[@type='email']") | |
inputField.send_keys(email) | |
submitField = driver.find_element("xpath", "//input[@type='submit']") | |
submitField.click() | |
sleep(4) | |
try: | |
usernameError = driver.find_element(By.ID, "usernameError") | |
pbar.close() | |
print("LMAO, Microsoft says you don't exist, let's try again... \n") | |
continue | |
except NoSuchElementException: | |
pass | |
# Input Password and Submit (S4) | |
pBar.update(1) | |
pBar.set_description("Inputting password...") | |
wait.until(EC.visibility_of_element_located((By.XPATH, "//input[@type='password']"))) | |
inputField = driver.find_element("xpath", "//input[@type='password']") | |
inputField.send_keys(passwd) | |
submitField = driver.find_element("xpath", "//input[@type='submit']") | |
submitField.click() | |
# Wait and click Yes for remembering logins. (S5) | |
# Note that cookies are deleted automatically, so this is not a security risk. | |
pBar.update(1) | |
pBar.set_description("Waiting for login...") | |
wait.until(EC.visibility_of_element_located((By.XPATH, "//input[@type='submit']"))) | |
try: | |
password_error = driver.find_element(By.ID, "passwordError") | |
pbar.close() | |
print("Forgot your password ehh, shame... Let's try again... \n") | |
continue | |
except NoSuchElementException: | |
pass | |
submitField = driver.find_element("xpath", "//input[@type='submit']") | |
submitField.click() | |
wait = WebDriverWait(driver, 20) | |
wait.until(EC.url_contains("charterhouse.fireflycloud.net")) | |
PERSONS.append( | |
Person( | |
name, | |
driver.get_cookie("SessionSecureA")["value"], | |
driver.get_cookie("SessionSecureB")["value"], | |
driver.get_cookie("ASP.NET_SessionId")["value"] | |
) | |
) | |
pBar.update(1) | |
pBar.close() | |
driver.quit() | |
continueBool = str(input("Add another person? (Y/N): ")) | |
continueBool = continueBool.lower() | |
if continueBool[0] == "n": | |
break | |
# Init all persons with their events | |
for person in PERSONS: | |
person.events = [Hash(event) for event in person.getData(START_DATE)] | |
# Dedupe all events in non-primary persons | |
if len(PERSONS) > 1: | |
PERSONS.insert(0, Person("Shared")) | |
# PERSONS[0] is shared, PERSONS[1] is primary | |
for person in PERSONS[2:]: | |
PERSONS[0].events += list(set(PERSONS[1].events) & set(person.events)) | |
for person in PERSONS[1:]: | |
person.events = sorted( | |
[event for event in person.events if event not in PERSONS[0].events] | |
) | |
# Write to individual calendars | |
for person in PERSONS: | |
calendar = initCalendar(person.name) | |
# Amalgamate all double hashes (in theory all consecute hashes) | |
for index in reversed(range(1, len(person.events))): | |
if person.events[index - 1] >> person.events[index]: | |
person.events[index - 1].isoenddate = person.events[index].isoenddate | |
person.events.pop(index) | |
for eventObj in person.events: | |
event = Event() | |
event.add('summary', eventObj.subject) | |
event.add('dtstart', eventObj.isostartdate) | |
event.add('dtend', eventObj.isoenddate) | |
event['uid'] = eventObj.uid | |
event['transp'] = "OPAQUE" | |
event.add('location', eventObj.location) | |
event.add('description', eventObj.teacher + '\n' + eventObj.set) | |
event.add('rrule', {'freq': 'weekly', 'interval': 2, 'until': END_DATE}) | |
calendar.add_component(event) | |
with open(os.path.expanduser(f"~/Desktop/{person.name}.ics"), 'wb') as file: | |
file.write(calendar.to_ical()) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment