Skip to content

Instantly share code, notes, and snippets.

@yoavst
Last active September 20, 2021 19:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoavst/de2a2d2e9ade70fa452d1ec84170fe12 to your computer and use it in GitHub Desktop.
Save yoavst/de2a2d2e9ade70fa452d1ec84170fe12 to your computer and use it in GitHub Desktop.
Timetable downloader
# -*- coding: utf-8 -*-
import io
import itertools
import json
from pyquery import PyQuery as pq
from selenium import webdriver
from selenium.webdriver.chrome.webdriver import WebDriver
SEMESTER = {
u"קיץ": 0,
u"א'": 1,
u"ב'": 2
}
# region Utils
def exists(s):
return s is not None and s != ""
def group(seq, sep_lambda):
g = []
for el in seq:
if sep_lambda(el):
yield g
g = []
else:
g.append(el)
yield g
def flatmap(func, *iterable):
return itertools.chain.from_iterable(map(func, *iterable))
def clean_str(s):
if not s:
return None
return ' '.join(s.split())
# endregion
def extract_courses_from_group(courses_group):
if len(courses_group) < 5:
return []
group_courses = []
course_info = pq(pq(courses_group[1]).children()[0]).contents()
course_id, group_id = course_info[0].strip(), course_info[2].strip()
name = pq(courses_group[1]).children()[1].text
faculty = pq(pq(courses_group[2]).children()[-1]).text()
last_teacher = None
for item in courses_group[4:-1]:
children = pq(item).children()
if len(children) < 6:
continue
if not exists(children[1].text) or not exists(children[2].text) or not exists(children[3].text) or not exists(
children[4].text) or not exists(children[5].text):
continue
last_teacher = clean_str(pq(children[0]).text().strip() or last_teacher)
hours = children[5].text.split('-')
group_courses.append({
'name': name,
'id': course_id,
'faculty': faculty,
'semester': SEMESTER[children[6].text.strip()],
'end': hours[1],
'start': hours[0],
'day': ord(children[4].text[0]) - ord(u'א') + 1,
'room': children[3].text.replace(' ', ''),
'building': children[2].text,
'type': children[1].text,
'lecturer': last_teacher
})
return group_courses
def parse_results_page(html):
page = pq(html)
trs = page('#frmgrid > table:nth-child(4)')('tr')[1:]
groups = list(group(trs, lambda element: len(pq(element).text()) == 0))
return list(flatmap(extract_courses_from_group, groups))
def load_courses(driver, year, semester, department_values):
"""
:type driver: WebDriver
"""
driver.get("https://www.ims.tau.ac.il/tal/kr/Search_P.aspx")
# Choose departments
for dep_id, dep_value in department_values.items():
for dep_opt in driver.find_element_by_id("lstDep" + str(dep_id)).find_elements_by_tag_name("option"):
if dep_opt.get_attribute("value") == dep_value:
dep_opt.click()
break
else:
print("Fail to select departments for {} -> {}, exiting".format(dep_id, dep_value))
return
# Choose year
for year_opt in driver.find_element_by_id("lstYear1").find_elements_by_tag_name("option"):
if year_opt.get_attribute("value") == str(year):
year_opt.click()
break
# Choose semester
for sem_opt in driver.find_elements_by_name("ckSem"):
if sem_opt.get_attribute("value") == str(semester):
sem_opt.click()
break
# Start searching
driver.find_element_by_id("search1").click()
# Go over all the pages
results = []
while True:
html_source_code = driver.execute_script("return document.body.innerHTML;")
current_results = parse_results_page(html_source_code)
results.extend(current_results)
# Move to next page
next_page_btn = driver.find_elements_by_id("next")
if next_page_btn:
next_page_btn[0].click()
else:
break
return results
def main():
driver = webdriver.Chrome()
courses1 = load_courses(driver, 2021, 1, {
1: '08',
2: '05',
3: '10',
4: '04',
5: '06-16',
6: '03',
7: '14',
8: '12',
9: '01',
10: '11',
11: '2171',
12: '1880-1882',
13: '1843',
14: '2120'
})
courses2 = load_courses(driver, 2021, 1, {
10: '07',
11: '2172',
12: '1883'
})
courses3 = load_courses(driver, 2021, 1, {
10: '15',
})
courses = courses1 + courses2 + courses3
with io.open('timetable.json', 'w', encoding='utf-8') as f:
data = json.dumps(courses, indent=4, ensure_ascii=False)
f.write(unicode(data))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment