Skip to content

Instantly share code, notes, and snippets.

@simonerni
Created February 26, 2018 16:21
Show Gist options
  • Save simonerni/d7f87d322a4ad78ba08e8d606615c4b1 to your computer and use it in GitHub Desktop.
Save simonerni/d7f87d322a4ad78ba08e8d606615c4b1 to your computer and use it in GitHub Desktop.
Video Lecture Downloader ETHZ
[computer networks 2018]
url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html
directory: /myAbsolutePath/ComputerNetworks
quiet: True
Original Author: Basil Fürer, just redistributing here for convenience.
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#
import argparse
import bs4
import datetime as d
import os
import sys
import urllib.request as ul
import configparser
global DIR, QUALITY, QUIET, UAGENT
DIR = os.path.expanduser('~/downloads')
QUALITY = -1 # 0 for worst & -1 for best
QUIET = False
UAGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
def pprint(out):
if not QUIET:
print(out)
def die(e, s):
print('{}: error: {}'.format(os.path.basename(__file__), e))
exit(s)
def pad(s):
return (s + ':' + 80 * ' ')[:40 - len(' [failed]')]
def url_open(url):
req = ul.Request(url, headers={'User-Agent': UAGENT})
res = ul.urlopen(req).read().decode('utf-8')
soup = bs4.BeautifulSoup(res, 'html.parser')
return soup
def get_vids(url):
soup = url_open(url)
vids = [x for x in soup.find_all('div', {'class': 'play'})]
vids = map(lambda s: s.find_all('a')[0]['href'].split('?')[0], vids)
return vids
def get_mp4(url):
if url[0] == '/':
url = 'https://www.video.ethz.ch' + url
soup = url_open(url)
hrfs = [x.find_all('a') for x in soup.find_all('li', {'class': 'video'})]
mp4s = {int(x[0].getText().split('x')[-1]): x[0]['href'] for x in hrfs}
details = soup.find_all('div', {'class': 'accordionContent'})[0]
name = details.findNext('h3').getText().replace(' ', '_')
date = details.find_all('p')[2].getText().strip()
date = d.datetime.strptime(date, '%d.%m.%Y').strftime('%Y.%m.%d')
return ('{}-{}.mp4'.format(date, name), mp4s)
def download(fname, url):
req = ul.Request(url, headers={'User-Agent': UAGENT})
with open(fname, 'wb') as f:
f.write(ul.urlopen(req).read())
def fetch_videos(aurl):
if not os.path.isdir(DIR):
os.makedirs(DIR, exist_ok=True)
downloaded = False
for u in get_vids(aurl):
name, urls = get_mp4(u)
url = [urls[k] for k in sorted(urls.keys())][QUALITY]
fname = '{}/{}'.format(DIR, name)
if not os.path.exists(fname):
if not downloaded:
pprint('Downloading:')
downloaded = True
if not QUIET:
sys.stdout.write(pad(' {}:'.format(name.replace('.mp4', ''))))
sys.stdout.flush()
try:
download(fname, url)
pprint('\t[ok]')
except:
try:
os.remove(fname)
except OSError:
pass
pprint('\t[failed]')
if not downloaded:
pprint('Nothing to download..')
if __name__ == '__main__':
example = 'https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html'
desc = ('Download podcasts from ethz.ch\n\n'
'Example config (can contain multiple entries):\n\n'
'\t[computer networks]\n'
'\turl: {}\n'
'\tdirectory: ~/documents/computer_networks/podcasts\n'
'\t# quality: 0 # uncomment for worst quality\n'
'\tquiet: True\n'
).format(example)
parser = argparse.ArgumentParser(description=desc,
formatter_class=argparse.RawTextHelpFormatter)
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-u', '--urls', nargs='+',
help='url(s) of podcast')
group.add_argument('-c', '--conf', metavar='CONF', nargs=1,
help='specify config file')
parser.add_argument('-d', '--dir', metavar='DIR', nargs=1,
help='specify directory')
parser.add_argument('-q', '--quiet', action='store_true',
help="don't write to stdout")
parser.add_argument('-s', '--stingy', action='store_true',
help='download worst quality')
args = parser.parse_args()
if args.dir:
DIR = args.directory[-1]
if args.quiet:
QUIET = True
if args.stingy:
QUALITY = 0
DIR = os.path.abspath(DIR)
if args.conf:
for conf in args.conf:
if not os.path.isfile(conf):
die("can't read '{}'".format(conf), 3)
defaults = {'directory': DIR,
'quiet': 'False',
'quality': '-1'}
config = configparser.ConfigParser(defaults)
try:
config.read(conf)
except:
raise
die("can't parse '{}'".format(conf), 4)
for s in config.sections():
url = config.get(s, 'url')
DIR = os.path.expanduser(config.get(s, 'directory'))
try:
QUIET = config.getboolean(s, 'quiet')
except:
die("can't parse field 'quiet' in '{}'".format(s), 5)
try:
QUALITY = config.getint(s, 'quality')
except:
die("can't parse field 'quality' in '{}'".format(s), 5)
try:
fetch_videos(url)
except ValueError:
raise
die("invalid url '{}'".format(url), 1)
except:
raise
die('download aborted', 9)
else:
for url in args.urls:
try:
fetch_videos(url)
except ValueError:
die("invalid url '{}'".format(url), 1)
except:
die('download aborted', 9)
eth-video.py [-h] (-u URLS [URLS ...] | -c CONF) [-d DIR] [-q] [-s]
Download podcasts from ethz.ch
Example config (can contain multiple entries):
[computer networks]
url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html
directory: ~/documents/computer_networks/podcasts
# quality: 0 # uncomment for worst quality
quiet: True
optional arguments:
-h, --help show this help message and exit
-u URLS [URLS ...], --urls URLS [URLS ...]
url(s) of podcast
-c CONF, --conf CONF specify config file
-d DIR, --dir DIR specify directory
-q, --quiet don't write to stdout
-s, --stingy download worst quality
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment