Skip to content

Instantly share code, notes, and snippets.

@yoavst
Last active February 12, 2019 12:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save yoavst/923a4e50638c9af2b802eea7f1161f21 to your computer and use it in GitHub Desktop.
Save yoavst/923a4e50638c9af2b802eea7f1161f21 to your computer and use it in GitHub Desktop.
Download tests from TAU tests storage
"""
Download tests from the tests bank, run with -help for parameters
Dependencies:
pip install pyquery
pip install requests
Usage: python3 Downloader.py -username USERNAME -id USER_ID -password PASS_IN_BASE64 -faculty FACULTY -department DEPARTMENT -filters FILTERS_FILE.txt
GUI usage: python3 Downloader.py gui
Use it only for downloading tests for self-usage. Do not use it for piracy.
Do Follow the site's terms of service: https://tinyurl.com/ya3qjbbn
TAU-Downloader Copyright (C) 2018 Yoav Sternberg
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses
"""
import json
import string
import os
import requests
import base64
import sys
import argparse
import logging
from pyquery import PyQuery as pq
from collections import namedtuple
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
BASE_HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0",
"Accept": "application/json, text/javascript, */*; q=0.01",
}
DEFAULT_DOWNLOAD_DIR = "Downloads"
LIST_OF_IDS = """
Options for faculty:
<option value="all">בחר</option>
<option value="3294">ביהס פורטר ללימודי סביבה</option>
<option value="2874">רפואה ומקצועות הבריאות</option>
<option value="32">מדעי החברה</option>
<option value="229" >מדעים מדויקים</option>
<option value="62">ביהס לעבודה סוציאלית</option>
<option value="63">היחידה ללימודי שפות</option>
<option value="60">משפטים</option>
<option value="61">ביהס לחינוך</option>
<option value="52">אמנויות</option>
<option value="53">מדעי החיים</option>
<option value="55">מדעי הרוח</option>
<option value="44">ניהול</option>
<option value="42">הנדסה</option>
Options for department for Exact Science:
<option value="all">בחר</option>
<option value="2539">גיאופיזיקה ומדעים פלנטריים</option>
<option value="3307">מתמטיקה וסטטיסטיקה</option>
<option value="3303">פיזיקה</option>
<option value="289">מדעי המחשב</option>
<option value="231">כימיה</option>
<option value="230">פיזיקה ואסטרונומיה</option>
"""
def flatten(listOfLists):
items = []
for lst in listOfLists:
items.extend(lst)
return items
def parse_command_line_args():
if len(sys.argv) == 2 and sys.argv[1] == '-list':
try:
print(LIST_OF_IDS)
except UnicodeEncodeError:
print("Bad encoding, see https://pastebin.com/raw/sGGkZnMe for list")
exit(0)
parser = argparse.ArgumentParser()
parser.add_argument("--list", help="Show list of ids for faculties and departments")
# parser.add_argument("gui", help="Open the experimental GUI")
parser.add_argument("--verbose", "-v", action="store_true", help="Increase log level for the program")
parser.add_argument("-dir", help="Directory for storing downloaded files", default=DEFAULT_DOWNLOAD_DIR)
requiredNamed = parser.add_argument_group('required named arguments')
requiredNamed.add_argument("-username", help="TAU username for login", type=str, required=True)
requiredNamed.add_argument("-id", help="TAU user identity number for login", type=str, required=True)
requiredNamed.add_argument("-password", help="TAU password in BASE64 format for login", type=str, required=True)
requiredNamed.add_argument("-faculty", help="Faculty id for search", type=int, required=True)
requiredNamed.add_argument("-department", help="Department id for search", type=int, required=True)
requiredNamed.add_argument("-filters", help="path for file with filters each in different line", type=str,
required=True)
return parser.parse_args()
def get_filters(args):
with open(args.filters, 'r', encoding='UTF-8') as f:
return [filter_text.replace('\r', '').replace('\n', '') for filter_text in f.readlines()]
def connect(username, id_number, password):
session = requests.Session()
request = session.post("https://store.student.co.il/ajax/student-login",
data={
"student_id": id_number,
"student_username": username,
"student_password": password
}, headers=dict(BASE_HEADERS).update({"Content-Type": "application/x-www-form-urlencoded"}),
verify=False)
logging.info("Login request: " + str(request))
data = request.json()
logging.info("Login request content: " + str(data))
if data['isLogedIn']:
return session
else:
return None
def get_build_id(session):
request = session.get('https://store.student.co.il/en?dest_form=tests_popup_wrapper', headers=dict(BASE_HEADERS))
logging.info("Build ID request: " + str(request))
return pq(request.text)('#ajax_enabled_select_tests')('input[name=form_build_id]').attr('value')
def _request_course(session, build_id, faculty, department):
request = session.post('https://store.student.co.il/en/system/ajax', data={
"faculty": faculty if faculty else "all",
"department": department if department else "all",
"course": "all",
"text_type": "tests",
"form_build_id": build_id,
"form_id": "customization_text_popup_search_form_tests",
"_triggering_element_name": "department"
})
return request.json()
def _parse_courses(content):
html = pq(content[1]["data"])
return [(it.text, it.attrib['value']) for it in html('[name=course]')('option')]
def get_courses(session, faculty, department):
build_id = get_build_id(session)
faculty_request = _request_course(session, build_id, faculty, None)
if not department:
return _parse_courses(faculty_request)
# In order to request a specific department, one cannot ask directly without asking for the right faculty before.
content = _request_course(session, build_id, faculty, department)
return _parse_courses(content)
def get_tests(courses, faculty, department):
return flatten(get_tests_for_course(course, faculty, department) for course in courses)
def test_url(course, faculty, department):
faculty = faculty if faculty else "all"
department = department if department else "all"
return "https://store.student.co.il/en/text/tests/%s/%s/%s/all" % (faculty, department, course)
TestInfo = namedtuple('TestInfo', ['name', 'lecturer', 'year', 'semester', 'term', 'link'])
def get_tests_for_course(course, faculty, department):
request = requests.get(test_url(course, faculty, department), headers=BASE_HEADERS, verify=False)
html = pq(request.text)
links = _parse_links_page(html)
pager = html('.pager')
if len(pager):
for item in pager('.pager-item'):
for link in pq(item)('a'):
href = link.attrib['href']
r = requests.get("https://store.student.co.il" + href, headers=BASE_HEADERS, verify=False)
links.extend(_parse_links_page(pq(r.text)))
return links
def or_none(text):
if not text:
return None
return text.strip()
def _parse_links_page(html):
elements = html('.views-table')
if not len(elements):
return []
tests = []
for element in elements('tbody')('tr'):
if not len(pq(element)('a')):
continue
tests.append(TestInfo(element[0].text.strip(), or_none(element[1].text),
or_none(element[2].text), or_none(element[3].text), or_none(element[4].text),
pq(element[5])('a').attr('href')))
return tests
def format_filename(proposed_filename):
valid_chars = "-_.() %s%s%s" % (string.ascii_letters, string.digits, "פםןוטארקףךלחיעכגדשץתצמנהבסז")
filename = ''.join(c for c in proposed_filename if c in valid_chars)
filename = filename.replace(' ', '_')
return filename
# Experimental
def gui():
import tkinter
import tkinter.filedialog
def download():
# global faculty_entry, department_entry, username_entry, id_entry, password_entry, filters_entry
directory = tkinter.filedialog.askdirectory()
if directory != "":
temp = os.path.join(directory, '__temp_filters_file.txt')
with open(temp, 'wb') as f:
f.write(filters_entry.get("1.0", tkinter.END).encode('utf-8'))
args = {
'filters': f.name,
'dir': directory,
'username': username_entry.get(),
'id': id_entry.get(),
'password': base64.b64encode(password_entry.get().encode('utf-8')).decode('utf-8'),
'department': department_entry.get(),
'faculty': faculty_entry.get(),
'verbose': False
}
main(namedtuple('Args', args.keys())(*args.values()))
os.remove(temp)
master = tkinter.Tk()
master.title('TAU tests bank downloader')
tkinter.Label(master, text="Faculty").grid(row=0)
tkinter.Label(master, text="Department").grid(row=1)
tkinter.Label(master, text="Username").grid(row=2, pady=(20, 0))
tkinter.Label(master, text="Id").grid(row=3)
tkinter.Label(master, text="Password").grid(row=4)
tkinter.Label(master, text="Filters").grid(row=5, pady=(20, 0))
faculty_entry = tkinter.Entry(master)
faculty_entry.grid(row=0, column=1)
department_entry = tkinter.Entry(master)
department_entry.grid(row=1, column=1)
username_entry = tkinter.Entry(master)
username_entry.grid(row=2, column=1, pady=(20, 0))
id_entry = tkinter.Entry(master)
id_entry.grid(row=3, column=1)
password_entry = tkinter.Entry(master, show="*")
password_entry.grid(row=4, column=1)
filters_entry = tkinter.Text(width=40, height=5)
filters_entry.tag_configure('tag-right', justify='right')
filters_entry.grid(row=5, column=1, pady=(20, 0))
tkinter.Button(master, text="Download", command=download).grid(row=6)
tkinter.mainloop()
def main(args=None):
if not args:
if len(sys.argv) == 2 and sys.argv[1] == 'gui':
gui()
return
args = parse_command_line_args()
filters = get_filters(args)
if not len(filters):
print("error: filters not provided via stdin. Abort.")
sys.exit(1)
print("Applying for", len(filters), "filters...")
if args.verbose:
logging.basicConfig(level=logging.INFO)
session = connect(args.username, args.id, args.password)
if not session:
print("Error: Login info is incorrect")
sys.exit(1)
courses = get_courses(session, args.faculty, args.department)
print("There are", len(courses), "courses on list")
filtered_courses = []
for course in courses:
for keyword in filters:
if keyword in course[0]:
filtered_courses.append(course[1])
break
print("But only", len(filtered_courses), "after the filter")
tests = get_tests(filtered_courses, args.faculty, args.department)
print(len(tests), "tests found, starts downloading them...")
with open('urls.json', 'w') as file:
# noinspection PyProtectedMember
json.dump([test._asdict() for test in tests], file)
for i, test in enumerate(tests):
r = requests.get(test.link, verify=False)
if not os.path.exists(args.dir):
os.makedirs(args.dir)
filename = os.path.join(args.dir, format_filename(
test.name + " " + test.lecturer + " " + test.year + " " + test.semester + " " + test.term) + "_" + str(
i) + '.pdf')
with open(filename.encode('utf-8'), 'wb') as f:
f.write(r.content)
print('Done! have a good night.')
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment