Created
October 2, 2016 14:20
-
-
Save soachishti/1ffce3388123205ef1adce605680d08c to your computer and use it in GitHub Desktop.
Script to download all course resources from http://slate.nu.edu.pk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import html | |
import sys, os, errno | |
import Queue | |
from threading import Thread | |
import getpass | |
user = raw_input("Username: "); | |
passwd = getpass.getpass(); | |
print("\n\n") | |
data_folder = os.getcwd() + "/SlateNUFiles"; | |
file_to_download = []; | |
session_requests = requests.session() | |
q = Queue.LifoQueue() | |
############################################# | |
def make_sure_path_exists(path): | |
try: | |
os.makedirs(path) | |
except OSError as exception: | |
if exception.errno != errno.EEXIST: | |
raise | |
######################################################## | |
def collect_file_recursive(key, value, folder_name = None): | |
global file_to_download; | |
#print "collect_file_recursive called" | |
# is folder | |
if folder_name is not None: | |
#print "Special Requests" | |
payload = { | |
"source" : "0", | |
"collectionId" : "/group/" + value[1] + "/" + folder_name + "/", | |
"navRoot" : "", | |
"criteria" : "title", | |
"sakai_action" : "doNavigate", | |
"rt_action" : "", | |
"selectedItemId": "" | |
} | |
result = session_requests.post( | |
value[0], | |
data = payload, | |
headers = dict(referer=value[0])); | |
result = session_requests.get(value[0]); | |
tree = html.fromstring(result.content); | |
elems = tree.findall(".//td[@class='specialLink']/h4/a[last()]"); | |
for file in elems: | |
href = file.get("href"); | |
if file.get("title") != "Folder": | |
if href == "#": | |
continue; | |
# Is file not directory | |
print "Collected file: " + href; | |
file_to_download.append([href, key]); | |
else: | |
folder_name = file.text_content().strip(); | |
#print "Found folder: " + folder_name; | |
key = key + "/" + folder_name; | |
collect_file_recursive(key, value, folder_name); | |
#################################################### | |
def file_download_thread(): | |
while not q.empty(): # check that the queue isn't empty | |
value = q.get() # print the item from the queue | |
url = value[0]; | |
folder = value[1]; | |
filename = url.split("/")[-1] | |
print "+ Downloading " + filename; | |
result = session_requests.get(url); | |
print "- Downloaded " + filename; | |
path = data_folder + "/" + folder | |
make_sure_path_exists(path); | |
with open(path + "/" + filename,"w+") as f: | |
f.write(result.content); | |
q.task_done() # specify that you are done with the item | |
#################################################### | |
payload = { | |
"eid": user, | |
"pw": passwd, | |
"submit": "Login" | |
} | |
print "Requesting to login" | |
login_url = "http://slate.nu.edu.pk/portal/xlogin"; | |
result = session_requests.post( | |
login_url, | |
data = payload, | |
headers = dict(referer="http://slate.nu.edu.pk/portal") | |
) | |
if result.status_code == 200: | |
tree = html.fromstring(result.content) | |
elems = tree.findall(".//ul[@id='siteLinkList']/li/a") | |
courses = {} | |
for course in elems: | |
title = course.get('title'); | |
# Skip Academic and QEC section | |
if title == None or "Academic" in title or "QEC" in title: | |
continue; | |
# Resources URL for course | |
href = course.get('href'); | |
course_id = href.split("/")[-1] | |
href = "http://slate.nu.edu.pk/portal/tool/" + course_id + "1310?panel=Main"; | |
courses[title] = [href, course_id]; | |
for key, value in courses.iteritems(): | |
collect_file_recursive(key, value); | |
print "\nFile links collected\n\nDownloader Started...\n"; | |
for value in file_to_download: | |
q.put(value) | |
for i in range(8): | |
t = Thread(target = file_download_thread) | |
t.start() | |
q.join() | |
print "Logging out session" | |
session_requests.get("http://slate.nu.edu.pk/portal/logout"); | |
else: | |
print "Login failed" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment