Skip to content

Instantly share code, notes, and snippets.

@rdbuf
Last active October 13, 2019 21:49
Show Gist options
  • Save rdbuf/00ef5a5e0aaa2eed04145d07d1b40159 to your computer and use it in GitHub Desktop.
Save rdbuf/00ef5a5e0aaa2eed04145d07d1b40159 to your computer and use it in GitHub Desktop.
Some dumb lecture downloader for mipt.lectoriy.ru
import argparse
import shutil
import os.path
import sys
import requests
from urllib3.util.retry import Retry
from requests.adapters import HTTPAdapter
from selenium import webdriver
from requests_html import HTML
import logging
parser = argparse.ArgumentParser()
parser.add_argument('url', help='example: https://lectoriy.mipt.ru/course/LinearAlgebra/lectures')
parser.add_argument('--outdir', default='')
args = parser.parse_args()
course_url = args.url
outdir = args.outdir
driver = webdriver.Chrome()
driver.get(course_url)
content = driver.page_source
driver.quit()
s = requests.Session()
s.mount('', HTTPAdapter(max_retries=Retry(connect=500, read=500, redirect=500, status=500)))
logging.basicConfig()
logging.getLogger().setLevel(logging.DEBUG)
requests_log = logging.getLogger("requests.packages.urllib3")
requests_log.setLevel(logging.DEBUG)
requests_log.propagate = True
html = HTML(html=content, url=course_url)
course_title = html.find('h1', first=True).text
baseoutdir = os.path.join(outdir, course_title)
if not os.path.exists(baseoutdir):
os.makedirs(baseoutdir)
for i, block in enumerate(html.find('.lecture-title')):
lecture_title = block.text
page_url = list(block.absolute_links)[0]
video_url = HTML(html=s.get(page_url).text).find('video > source', first=True).attrs['src'].replace(r'\"', '')
filename = os.path.join(baseoutdir, f'{i+1:02d}. {lecture_title}.mp4')
if os.path.exists(filename):
startbyte = os.path.getsize(filename)
else:
startbyte = 0
with open(filename, 'ab') as outfile:
print(f'\033[92mdownloading: {filename}\033[0m')
sys.stdout.flush()
r = s.get(video_url, stream=True, headers={'Range': f'bytes={startbyte}-'})
shutil.copyfileobj(r.raw, outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment