Skip to content

Instantly share code, notes, and snippets.

@GorgeousOne
Created July 15, 2023 15:40
Show Gist options
  • Save GorgeousOne/89fda2203740d231487b92e80fe22949 to your computer and use it in GitHub Desktop.
Save GorgeousOne/89fda2203740d231487b92e80fe22949 to your computer and use it in GitHub Desktop.
Summaery 2023 calender event scraper
"""visits the event list of the summaery 2023 and scrapes all events into one big icalendar file.
update the url and year in the future"""
from datetime import datetime
import pytz
from icalendar import Calendar, Event
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
def get_event_start_end_place(summaery_event):
"""read the start, end time and possible place from the same calender info element"""
date_string = get_event_date(summaery_event)
time_info = summaery_event.find_element(By.CLASS_NAME, "cal_maininfo")
infos = time_info.find_elements(By.CSS_SELECTOR, "li")
# filter event start and end for only the datetime string
start = date_string + " " + infos[0].text[8:-4]
end = infos[1].text[6:-4]
# add the event date to event end if it's not in the string yet
if len(end) < 10:
end = date_string + " " + end
place = infos[2].text[5:] if len(infos) > 2 else None
time_format = "%d.%m.%Y %H.%M"
germany = pytz.timezone('Europe/Berlin')
start = datetime.strptime(start, time_format)
end = datetime.strptime(end, time_format)
return germany.localize(start), germany.localize(end), place
def get_event_date(summaery_event):
"""read the event date string from the big date element to the left. and append a year"""
month = summaery_event.find_element(By.CLASS_NAME, "month").text
day = summaery_event.find_element(By.CLASS_NAME, "day_of_month").text
return day.zfill(2) + "." + month.zfill(2) + ".2023"
def get_event_title_url(summaery_event):
"""read the event title and url for more info from the desciption to the right"""
title_elem = summaery_event.find_element(By.CSS_SELECTOR, "h2").find_element(By.CSS_SELECTOR, "a")
title = title_elem.text
url = title_elem.get_attribute("href")
return title, url
def get_event_teaser(summaery_event):
"""read the description text of the event on the right"""
return summaery_event.find_element(By.CLASS_NAME, "teasertext").text[:-4]
def add_event_to_cal(cal, start, end, place, title, url, teaser):
"""create and add a calendar event with all the info to the calendar"""
event = Event()
event.add("summary", title)
event.add("dtstart", start)
event.add("dtend", end)
if place:
event.add("location", place)
event.add("url", url)
event.add("description", teaser)
cal.add_component(event)
if __name__ == "__main__":
opts = Options()
opts.add_argument("user-agent=firefox")
driver = webdriver.Chrome(options=opts)
page_url = "https://www.uni-weimar.de/de/universitaet/aktuell/jaehrliche-veranstaltungen/jaehrliche-veranstaltungen-2023/summaery/veranstaltungen/"
driver.get(page_url)
# find all events listed on the page
summaery_events = driver.find_elements(By.CLASS_NAME, "summaeryLiveEvent")
cal = Calendar()
for i, event in enumerate(summaery_events):
add_event_to_cal(
cal,
*get_event_start_end_place(event),
*get_event_title_url(event),
get_event_teaser(event))
print(i + 1, "/", len(summaery_events))
# write them into one big icalendar file
with open("summaery.ics", "wb") as f:
f.write(cal.to_ical())
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment