Skip to content

Instantly share code, notes, and snippets.

@soachishti
Created October 2, 2016 14:20
Show Gist options
  • Save soachishti/1ffce3388123205ef1adce605680d08c to your computer and use it in GitHub Desktop.
Save soachishti/1ffce3388123205ef1adce605680d08c to your computer and use it in GitHub Desktop.
Script to download all course resources from http://slate.nu.edu.pk
import requests
from lxml import html
import sys, os, errno
import Queue
from threading import Thread
import getpass
user = raw_input("Username: ");
passwd = getpass.getpass();
print("\n\n")
data_folder = os.getcwd() + "/SlateNUFiles";
file_to_download = [];
session_requests = requests.session()
q = Queue.LifoQueue()
#############################################
def make_sure_path_exists(path):
try:
os.makedirs(path)
except OSError as exception:
if exception.errno != errno.EEXIST:
raise
########################################################
def collect_file_recursive(key, value, folder_name = None):
global file_to_download;
#print "collect_file_recursive called"
# is folder
if folder_name is not None:
#print "Special Requests"
payload = {
"source" : "0",
"collectionId" : "/group/" + value[1] + "/" + folder_name + "/",
"navRoot" : "",
"criteria" : "title",
"sakai_action" : "doNavigate",
"rt_action" : "",
"selectedItemId": ""
}
result = session_requests.post(
value[0],
data = payload,
headers = dict(referer=value[0]));
result = session_requests.get(value[0]);
tree = html.fromstring(result.content);
elems = tree.findall(".//td[@class='specialLink']/h4/a[last()]");
for file in elems:
href = file.get("href");
if file.get("title") != "Folder":
if href == "#":
continue;
# Is file not directory
print "Collected file: " + href;
file_to_download.append([href, key]);
else:
folder_name = file.text_content().strip();
#print "Found folder: " + folder_name;
key = key + "/" + folder_name;
collect_file_recursive(key, value, folder_name);
####################################################
def file_download_thread():
while not q.empty(): # check that the queue isn't empty
value = q.get() # print the item from the queue
url = value[0];
folder = value[1];
filename = url.split("/")[-1]
print "+ Downloading " + filename;
result = session_requests.get(url);
print "- Downloaded " + filename;
path = data_folder + "/" + folder
make_sure_path_exists(path);
with open(path + "/" + filename,"w+") as f:
f.write(result.content);
q.task_done() # specify that you are done with the item
####################################################
payload = {
"eid": user,
"pw": passwd,
"submit": "Login"
}
print "Requesting to login"
login_url = "http://slate.nu.edu.pk/portal/xlogin";
result = session_requests.post(
login_url,
data = payload,
headers = dict(referer="http://slate.nu.edu.pk/portal")
)
if result.status_code == 200:
tree = html.fromstring(result.content)
elems = tree.findall(".//ul[@id='siteLinkList']/li/a")
courses = {}
for course in elems:
title = course.get('title');
# Skip Academic and QEC section
if title == None or "Academic" in title or "QEC" in title:
continue;
# Resources URL for course
href = course.get('href');
course_id = href.split("/")[-1]
href = "http://slate.nu.edu.pk/portal/tool/" + course_id + "1310?panel=Main";
courses[title] = [href, course_id];
for key, value in courses.iteritems():
collect_file_recursive(key, value);
print "\nFile links collected\n\nDownloader Started...\n";
for value in file_to_download:
q.put(value)
for i in range(8):
t = Thread(target = file_download_thread)
t.start()
q.join()
print "Logging out session"
session_requests.get("http://slate.nu.edu.pk/portal/logout");
else:
print "Login failed"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment