simonerni/Disclaimer

## config
[computer networks 2018]
url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html
directory: /myAbsolutePath/ComputerNetworks
quiet: True

## Disclaimer
Original Author: Basil Fürer, just redistributing here for convenience.

## eth-video.py
#!/usr/local/bin/python3
# -*- coding: utf-8 -*-
#

import argparse
import bs4
import datetime as d
import os
import sys
import urllib.request as ul
import configparser

global DIR, QUALITY, QUIET, UAGENT

DIR = os.path.expanduser('~/downloads')
QUALITY = -1  # 0 for worst & -1 for best
QUIET = False
UAGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'


def pprint(out):
    if not QUIET:
        print(out)


def die(e, s):
    print('{}: error: {}'.format(os.path.basename(__file__), e))
    exit(s)


def pad(s):
    return (s + ':' + 80 * ' ')[:40 - len('  [failed]')]


def url_open(url):
    req  = ul.Request(url, headers={'User-Agent': UAGENT})
    res  = ul.urlopen(req).read().decode('utf-8')
    soup = bs4.BeautifulSoup(res, 'html.parser')

    return soup


def get_vids(url):
    soup = url_open(url)
    vids = [x for x in soup.find_all('div', {'class': 'play'})]
    vids = map(lambda s: s.find_all('a')[0]['href'].split('?')[0], vids)

    return vids


def get_mp4(url):
    if url[0] == '/':
        url = 'https://www.video.ethz.ch' + url
    soup = url_open(url)
    hrfs = [x.find_all('a') for x in soup.find_all('li', {'class': 'video'})]
    mp4s = {int(x[0].getText().split('x')[-1]): x[0]['href'] for x in hrfs}

    details = soup.find_all('div', {'class': 'accordionContent'})[0]
    name = details.findNext('h3').getText().replace(' ', '_')
    date = details.find_all('p')[2].getText().strip()
    date = d.datetime.strptime(date, '%d.%m.%Y').strftime('%Y.%m.%d')

    return ('{}-{}.mp4'.format(date, name), mp4s)


def download(fname, url):
    req = ul.Request(url, headers={'User-Agent': UAGENT})
    with open(fname, 'wb') as f:
        f.write(ul.urlopen(req).read())


def fetch_videos(aurl):

    if not os.path.isdir(DIR):
        os.makedirs(DIR, exist_ok=True)

    downloaded = False

    for u in get_vids(aurl):
        name, urls = get_mp4(u)
        url = [urls[k] for k in sorted(urls.keys())][QUALITY]
        fname = '{}/{}'.format(DIR, name)
        if not os.path.exists(fname):
            if not downloaded:
                pprint('Downloading:')
                downloaded = True
            if not QUIET:
                sys.stdout.write(pad('  {}:'.format(name.replace('.mp4', ''))))
                sys.stdout.flush()
            try:
                download(fname, url)
                pprint('\t[ok]')
            except:
                try:
                    os.remove(fname)
                except OSError:
                    pass
                pprint('\t[failed]')

    if not downloaded:
        pprint('Nothing to download..')


if __name__ == '__main__':

    example = 'https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html'

    desc = ('Download podcasts from ethz.ch\n\n'
            'Example config (can contain multiple entries):\n\n'
            '\t[computer networks]\n'
            '\turl: {}\n'
            '\tdirectory: ~/documents/computer_networks/podcasts\n'
            '\t# quality: 0   # uncomment for worst quality\n'
            '\tquiet: True\n'
           ).format(example)

    parser = argparse.ArgumentParser(description=desc,
                                     formatter_class=argparse.RawTextHelpFormatter)

    group = parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-u', '--urls', nargs='+',
                       help='url(s) of podcast')
    group.add_argument('-c', '--conf', metavar='CONF', nargs=1,
                       help='specify config file')

    parser.add_argument('-d', '--dir', metavar='DIR', nargs=1,
                        help='specify directory')
    parser.add_argument('-q', '--quiet', action='store_true',
                        help="don't write to stdout")
    parser.add_argument('-s', '--stingy', action='store_true',
                        help='download worst quality')

    args = parser.parse_args()

    if args.dir:
        DIR = args.directory[-1]
    if args.quiet:
        QUIET = True
    if args.stingy:
        QUALITY = 0

    DIR = os.path.abspath(DIR)

    if args.conf:
        for conf in args.conf:

            if not os.path.isfile(conf):
                die("can't read '{}'".format(conf), 3)

            defaults = {'directory': DIR,
                        'quiet': 'False',
                        'quality': '-1'}

            config = configparser.ConfigParser(defaults)
            try:
                config.read(conf)
            except:
                raise
                die("can't parse '{}'".format(conf), 4)

            for s in config.sections():
                url = config.get(s, 'url')
                DIR = os.path.expanduser(config.get(s, 'directory'))
                try:
                    QUIET = config.getboolean(s, 'quiet')
                except:
                    die("can't parse field 'quiet' in '{}'".format(s), 5)
                try:
                    QUALITY = config.getint(s, 'quality')
                except:
                    die("can't parse field 'quality' in '{}'".format(s), 5)

                try:
                    fetch_videos(url)
                except ValueError:
                    raise
                    die("invalid url '{}'".format(url), 1)
                except:
                    raise
                    die('download aborted', 9)
    else:
        for url in args.urls:
            try:
                fetch_videos(url)
            except ValueError:
                die("invalid url '{}'".format(url), 1)
            except:
                die('download aborted', 9)

## usage
eth-video.py [-h] (-u URLS [URLS ...] | -c CONF) [-d DIR] [-q] [-s]

Download podcasts from ethz.ch

Example config (can contain multiple entries):

	[computer networks]
	url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html
	directory: ~/documents/computer_networks/podcasts
	# quality: 0   # uncomment for worst quality
	quiet: True

optional arguments:
  -h, --help            show this help message and exit
  -u URLS [URLS ...], --urls URLS [URLS ...]
                        url(s) of podcast
  -c CONF, --conf CONF  specify config file
  -d DIR, --dir DIR     specify directory
  -q, --quiet           don't write to stdout
  -s, --stingy          download worst quality
	[computer networks 2018]
	url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html
	directory: /myAbsolutePath/ComputerNetworks
	quiet: True
	#!/usr/local/bin/python3
	# -- coding: utf-8 --
	#

	import argparse
	import bs4
	import datetime as d
	import os
	import sys
	import urllib.request as ul
	import configparser

	global DIR, QUALITY, QUIET, UAGENT

	DIR = os.path.expanduser('~/downloads')
	QUALITY = -1 # 0 for worst & -1 for best
	QUIET = False
	UAGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'


	def pprint(out):
	if not QUIET:
	print(out)


	def die(e, s):
	print('{}: error: {}'.format(os.path.basename(__file__), e))
	exit(s)


	def pad(s):
	return (s + ':' + 80 * ' ')[:40 - len(' [failed]')]


	def url_open(url):
	req = ul.Request(url, headers={'User-Agent': UAGENT})
	res = ul.urlopen(req).read().decode('utf-8')
	soup = bs4.BeautifulSoup(res, 'html.parser')

	return soup


	def get_vids(url):
	soup = url_open(url)
	vids = [x for x in soup.find_all('div', {'class': 'play'})]
	vids = map(lambda s: s.find_all('a')[0]['href'].split('?')[0], vids)

	return vids


	def get_mp4(url):
	if url[0] == '/':
	url = 'https://www.video.ethz.ch' + url
	soup = url_open(url)
	hrfs = [x.find_all('a') for x in soup.find_all('li', {'class': 'video'})]
	mp4s = {int(x[0].getText().split('x')[-1]): x[0]['href'] for x in hrfs}

	details = soup.find_all('div', {'class': 'accordionContent'})[0]
	name = details.findNext('h3').getText().replace(' ', '_')
	date = details.find_all('p')[2].getText().strip()
	date = d.datetime.strptime(date, '%d.%m.%Y').strftime('%Y.%m.%d')

	return ('{}-{}.mp4'.format(date, name), mp4s)


	def download(fname, url):
	req = ul.Request(url, headers={'User-Agent': UAGENT})
	with open(fname, 'wb') as f:
	f.write(ul.urlopen(req).read())


	def fetch_videos(aurl):

	if not os.path.isdir(DIR):
	os.makedirs(DIR, exist_ok=True)

	downloaded = False

	for u in get_vids(aurl):
	name, urls = get_mp4(u)
	url = [urls[k] for k in sorted(urls.keys())][QUALITY]
	fname = '{}/{}'.format(DIR, name)
	if not os.path.exists(fname):
	if not downloaded:
	pprint('Downloading:')
	downloaded = True
	if not QUIET:
	sys.stdout.write(pad(' {}:'.format(name.replace('.mp4', ''))))
	sys.stdout.flush()
	try:
	download(fname, url)
	pprint('\t[ok]')
	except:
	try:
	os.remove(fname)
	except OSError:
	pass
	pprint('\t[failed]')

	if not downloaded:
	pprint('Nothing to download..')


	if __name__ == '__main__':

	example = 'https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html'

	desc = ('Download podcasts from ethz.ch\n\n'
	'Example config (can contain multiple entries):\n\n'
	'\t[computer networks]\n'
	'\turl: {}\n'
	'\tdirectory: ~/documents/computer_networks/podcasts\n'
	'\t# quality: 0 # uncomment for worst quality\n'
	'\tquiet: True\n'
	).format(example)

	parser = argparse.ArgumentParser(description=desc,
	formatter_class=argparse.RawTextHelpFormatter)

	group = parser.add_mutually_exclusive_group(required=True)
	group.add_argument('-u', '--urls', nargs='+',
	help='url(s) of podcast')
	group.add_argument('-c', '--conf', metavar='CONF', nargs=1,
	help='specify config file')

	parser.add_argument('-d', '--dir', metavar='DIR', nargs=1,
	help='specify directory')
	parser.add_argument('-q', '--quiet', action='store_true',
	help="don't write to stdout")
	parser.add_argument('-s', '--stingy', action='store_true',
	help='download worst quality')

	args = parser.parse_args()

	if args.dir:
	DIR = args.directory[-1]
	if args.quiet:
	QUIET = True
	if args.stingy:
	QUALITY = 0

	DIR = os.path.abspath(DIR)

	if args.conf:
	for conf in args.conf:

	if not os.path.isfile(conf):
	die("can't read '{}'".format(conf), 3)

	defaults = {'directory': DIR,
	'quiet': 'False',
	'quality': '-1'}

	config = configparser.ConfigParser(defaults)
	try:
	config.read(conf)
	except:
	raise
	die("can't parse '{}'".format(conf), 4)

	for s in config.sections():
	url = config.get(s, 'url')
	DIR = os.path.expanduser(config.get(s, 'directory'))
	try:
	QUIET = config.getboolean(s, 'quiet')
	except:
	die("can't parse field 'quiet' in '{}'".format(s), 5)
	try:
	QUALITY = config.getint(s, 'quality')
	except:
	die("can't parse field 'quality' in '{}'".format(s), 5)

	try:
	fetch_videos(url)
	except ValueError:
	raise
	die("invalid url '{}'".format(url), 1)
	except:
	raise
	die('download aborted', 9)
	else:
	for url in args.urls:
	try:
	fetch_videos(url)
	except ValueError:
	die("invalid url '{}'".format(url), 1)
	except:
	die('download aborted', 9)
	eth-video.py [-h] (-u URLS [URLS ...] \| -c CONF) [-d DIR] [-q] [-s]

	Download podcasts from ethz.ch

	Example config (can contain multiple entries):

	[computer networks]
	url: https://www.video.ethz.ch/lectures/d-infk/2018/spring/252-0064-00L.html
	directory: ~/documents/computer_networks/podcasts
	# quality: 0 # uncomment for worst quality
	quiet: True

	optional arguments:
	-h, --help show this help message and exit
	-u URLS [URLS ...], --urls URLS [URLS ...]
	url(s) of podcast
	-c CONF, --conf CONF specify config file
	-d DIR, --dir DIR specify directory
	-q, --quiet don't write to stdout
	-s, --stingy download worst quality