Skip to content

Instantly share code, notes, and snippets.

@lancylot2004
Last active December 4, 2023 11:37
Show Gist options
  • Save lancylot2004/5ab7acda68e127cee9aec1643b99ca63 to your computer and use it in GitHub Desktop.
Save lancylot2004/5ab7acda68e127cee9aec1643b99ca63 to your computer and use it in GitHub Desktop.
Charterhouse Fireflycloud Calendar Scraper
# -*- coding: utf-8 -*-
"""
Script to generate iCal files for term time
hashes for Charterhouse School.
Author: Lancelot Liu
License: MIT
Email: lancylot2004@proton.me
"""
# requirements
# icalendar requests selenium tqdm
from datetime import datetime, timedelta
import os
import json
from getpass import getpass
from re import findall
from time import sleep
from tqdm import tqdm
from typing import List
import requests
from dateutil.parser import parse
from icalendar import Calendar, Event
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
START_DATE = datetime(2023, 4, 18)
END_DATE = datetime(2023, 5, 27)
EXPR = r"<script>var PLANNER_INITIAL_STATUS = (.*?)<\/script>"
WIDTH = os.get_terminal_size().columns
TIMEOUT = 20
ENTRY_POINT = "https://charterhouse.fireflycloud.net/login/login.aspx?prelogin=https%3a%2f%2fcharterhouse.fireflycloud.net%2f&kr=MSAADKeyRing"
assert START_DATE < END_DATE
## Class Definitions
class Hash:
"""
Class to represent an event. Contains
all the necessary information to
generate an iCal event.
"""
def __init__(self, eventJSON):
self.uid = eventJSON['guid']
self.teacher = eventJSON['chairperson']
self.set = eventJSON['description']
self.isostartdate = parse(eventJSON['isostartdate'] + '.000000')
self.isoenddate = parse(eventJSON['isoenddate'] + '.000000')
self.location = eventJSON['location']
self.subject = eventJSON['subject']
def __eq__(self, other):
return (self.teacher == other.teacher
and self.set == other.set
and self.isostartdate == other.isostartdate
and self.isoenddate == other.isoenddate
and self.location == other.location
and self.subject == other.subject)
def __hash__(self):
return hash(self.uid + self.teacher
+ self.set + self.isostartdate.isoformat()
+ self.isoenddate.isoformat() + self.location
+ self.subject)
def __lt__(self, other):
"""
Overloaded to compare based on **start** only.
"""
return self.isostartdate < other.isostartdate
def __rshift__(self, other):
"""
Overloaded to see if one event leads to the other.
"""
return (self.teacher == other.teacher
and self.set == other.set
and self.isostartdate.date() == other.isostartdate.date()
and self.isostartdate < other.isostartdate
and self.location == other.location
and self.subject == other.subject)
# __str__ and __repr__ for debug only.
def __str__(self):
return f"{self.subject} @ {self.location} w/ {self.teacher} || {self.isostartdate} --> {self.isoenddate}"
class Person:
"""
Class to represent a person. Contains
necessary cookies to access Greyhound.
Also stores event data once acquired.
"""
def __init__(self, personName: str, sessionA: str = "", sessionB: str = "", ASPID: str = ""):
self.name = personName
self.cookies = {
"SessionSecureA": sessionA,
"SessionSecureB": sessionB,
"ASP.NET_SessionId": ASPID,
"Prelogin": "https://charterhouse.fireflycloud.net/"
}
self.events = []
def getDataFromPage(self, date: datetime) -> List[Hash]:
"""
Gets event data from a given date.
Args:
date (datetime, optional): When to get event data from. Defaults to START_DATE.
Returns:
List[Hash]: List of events.
"""
dateObj = date.date()
year, month, day = str(dateObj.year), str(dateObj.month).zfill(2), str(dateObj.day).zfill(2)
url = f"https://charterhouse.fireflycloud.net/planner/week/{year}-{month}-{day}"
try:
response = requests.get(url, cookies = self.cookies, timeout = TIMEOUT)
except TimeoutError:
print(f"Timeout Error: {url}")
print(f"While processing {self.name}")
return []
try:
events = findall(EXPR, response.content.decode("utf-8"))[0]
except IndexError:
print(f"No Data Extracted from .../planner/week/{year}-{month}-{day}")
events = []
return json.loads(events)['events']
def getData(self, date: datetime) -> List[Hash]:
"""
Gets event data from a given date and the next week.
Args:
date (datetime, optional): When to get event data from. Defaults to datetime.now().
Returns:
List[Hash]: List of events.
"""
return self.getDataFromPage(date) + self.getDataFromPage(date + timedelta(weeks=1))
## Function Definitions
def initCalendar(calName):
cal = Calendar()
cal.add('prodid', '-//Lancelot Liu//calendarAbuse 2.0.1//EN')
cal.add('version', '2.0')
cal.add("calscale", "GREGORIAN")
cal.add('X-WR-TIMEZONE', 'Europe/London')
cal.add('X-WR-CALNAME', calName)
return cal
## Setup
PERSONS = []
## Main Programme
if __name__ == "__main__":
# Title
os.system('cls' if os.name == 'nt' else 'clear')
print(f"@{'-' * (WIDTH - 2)}@")
print(f"|{' ' * (WIDTH - 2)}|")
print(f"|{'|calendarAbuse v2.0.1|'.center(WIDTH - 2)}|")
print(f"|{'Lancelot Liu'.center(WIDTH - 2)}|")
print(f"|{'lancylot2004@proton.me'.center(WIDTH - 2)}|")
print(f"|{' ' * (WIDTH - 2)}|")
print(f"@{'-' * (WIDTH - 2)}@")
print("")
print("If more than one person is added, the calendar")
print("will be split into multiple files, including")
print("a shared file which will contain events common")
print("to all users.")
print("")
# Asks for users
print("Adding New Login...")
while True:
name = str(input("Name: "))
print("Input the 8-character school login, e.g., liu8lh01")
email = str(input("School Login: ")) + "@charterhouse.org.uk"
passwd = getpass()
# Setup progress bar
pBar = tqdm(total = 5, leave = True)
# Setup driver (S1)
pBar.set_description("Setting up driver...")
options = webdriver.FirefoxOptions()
options.headless = True
driver = webdriver.Firefox(options = options)
wait = WebDriverWait(driver, TIMEOUT)
# Navigate to Microsoft Login (S2)
pBar.update(1)
pBar.set_description("Navigating to login...")
driver.get(ENTRY_POINT)
# Input Email and Submit (S3)
pBar.update(1)
pBar.set_description("Inputting email...")
wait.until(EC.visibility_of_element_located((By.XPATH, "//input[@type='email']")))
inputField = driver.find_element("xpath", "//input[@type='email']")
inputField.send_keys(email)
submitField = driver.find_element("xpath", "//input[@type='submit']")
submitField.click()
sleep(4)
try:
usernameError = driver.find_element(By.ID, "usernameError")
pbar.close()
print("LMAO, Microsoft says you don't exist, let's try again... \n")
continue
except NoSuchElementException:
pass
# Input Password and Submit (S4)
pBar.update(1)
pBar.set_description("Inputting password...")
wait.until(EC.visibility_of_element_located((By.XPATH, "//input[@type='password']")))
inputField = driver.find_element("xpath", "//input[@type='password']")
inputField.send_keys(passwd)
submitField = driver.find_element("xpath", "//input[@type='submit']")
submitField.click()
# Wait and click Yes for remembering logins. (S5)
# Note that cookies are deleted automatically, so this is not a security risk.
pBar.update(1)
pBar.set_description("Waiting for login...")
wait.until(EC.visibility_of_element_located((By.XPATH, "//input[@type='submit']")))
try:
password_error = driver.find_element(By.ID, "passwordError")
pbar.close()
print("Forgot your password ehh, shame... Let's try again... \n")
continue
except NoSuchElementException:
pass
submitField = driver.find_element("xpath", "//input[@type='submit']")
submitField.click()
wait = WebDriverWait(driver, 20)
wait.until(EC.url_contains("charterhouse.fireflycloud.net"))
PERSONS.append(
Person(
name,
driver.get_cookie("SessionSecureA")["value"],
driver.get_cookie("SessionSecureB")["value"],
driver.get_cookie("ASP.NET_SessionId")["value"]
)
)
pBar.update(1)
pBar.close()
driver.quit()
continueBool = str(input("Add another person? (Y/N): "))
continueBool = continueBool.lower()
if continueBool[0] == "n":
break
# Init all persons with their events
for person in PERSONS:
person.events = [Hash(event) for event in person.getData(START_DATE)]
# Dedupe all events in non-primary persons
if len(PERSONS) > 1:
PERSONS.insert(0, Person("Shared"))
# PERSONS[0] is shared, PERSONS[1] is primary
for person in PERSONS[2:]:
PERSONS[0].events += list(set(PERSONS[1].events) & set(person.events))
for person in PERSONS[1:]:
person.events = sorted(
[event for event in person.events if event not in PERSONS[0].events]
)
# Write to individual calendars
for person in PERSONS:
calendar = initCalendar(person.name)
# Amalgamate all double hashes (in theory all consecute hashes)
for index in reversed(range(1, len(person.events))):
if person.events[index - 1] >> person.events[index]:
person.events[index - 1].isoenddate = person.events[index].isoenddate
person.events.pop(index)
for eventObj in person.events:
event = Event()
event.add('summary', eventObj.subject)
event.add('dtstart', eventObj.isostartdate)
event.add('dtend', eventObj.isoenddate)
event['uid'] = eventObj.uid
event['transp'] = "OPAQUE"
event.add('location', eventObj.location)
event.add('description', eventObj.teacher + '\n' + eventObj.set)
event.add('rrule', {'freq': 'weekly', 'interval': 2, 'until': END_DATE})
calendar.add_component(event)
with open(os.path.expanduser(f"~/Desktop/{person.name}.ics"), 'wb') as file:
file.write(calendar.to_ical())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment