Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save KartikTalwar/2899403 to your computer and use it in GitHub Desktop.
Save KartikTalwar/2899403 to your computer and use it in GitHub Desktop.
Desire 2 Download Fix
## This line should fix the double (sometimes triple) fetching of the courses
## https://github.com/sholiday/desire2download/blob/master/desire2download.py#L79
# The reason why multiple links are detected is because of the course announcements
# on the homepage that link to the course page
"""
Special Cases :
- PHYS 10 - Spring 2012
- PHYS 260A - Spring 2012
- AMATH 251/PMATH 332 - Spring 2012
"""
def get_course_links(self):
print 'Finding courses...'
links = []
track = []
for link in self.br.links():
matches = re.match('[[A-Z]+ [0-9A-Z/\s]{2,15} - [A-Z][a-z]+ 20[0-9]{2}', link.text) # new as well
if matches is not None:
if link.text not in track:
links.append(link)
track.append(link.text)
return links
# Fixes the issue where the referenced content is not a downloadable file and/or does not exist
"""
# Bug:
- https://learn.uwaterloo.cahttp://cte.uwaterloo.ca/teaching_resources/tips/online_discussions_tips_for_students.html
- https://learn.uwaterloo.cahttp://phet.colorado.edu/new/simulations/sims.php
- https://learn.uwaterloo.cahttp://paws.kettering.edu/~drussell/Demos/wave-x-t/wave-x-t.html
- URL to a file that does not exist
# After Fix:
- http://cte.uwaterloo.ca/teaching_resources/tips/online_discussions_tips_for_students.html
- 'X File does not exist: Aberration_correction_2010.pdf'
"""
import urllib2
def download_file(self, title, url, path):
"""Downloads a file to the specified directory.
Args:
title (str): Name of the file.
url (str): Address to the file preview page.
path (str): Relative path of file to make.
"""
try:
os.makedirs(path)
except OSError as e:
if e.errno != 17:
raise e
pass
page = self.br.open(url).read()
soup = BeautifulSoup.BeautifulSoup(page)
url = soup.find('iframe')['src']
## TODO: How should this be handled. These seem to be custom pages
## with content loaded via javascript, at least this one
## url = https://learn.uwaterloo.ca/d2l/lor/viewer/view.d2l?ou=16733&loId=0&loIdentId=245
if '/d2l/common/dialogs/' in url or \
'https://learn.uwaterloo.ca/d2l/lor/viewer' in url:
print " X Unable to download web-only content %s" % title
return
url_path = url.split('?')[0]
if url_path.find('http') == 0: # new
clean_url = url_path #new
else: #new
clean_url = 'https://learn.uwaterloo.ca%s' % url_path #new
clean_url = clean_url.replace(' ', '%20')
file_name = os.path.split(url_path)[1]
for r in self.ignore_re:
if r.match(file_name) is not None:
print 'Skipping %s because it matches ignore regex "%s"' % (file_name, r.pattern)
return
path_and_filename = '%s/%s' % (path, file_name.strip('/'))
if os.path.isfile(path_and_filename): ## TODO Can we make this smarter?
print ' - %s (Already Saved)' % path_and_filename
else:
# new from here
try:
content = self.br.open_novisit(clean_url).read()
except urllib2.HTTPError, e:
if e.code == 404:
print " X File does not exist: %s" % file_name.strip('/')
else:
print ' + %s (%s)' % (path_and_filename, self.convert_bytes(len(content)))
with open(path_and_filename, 'w') as f:
f.write(content)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment