Skip to content

Instantly share code, notes, and snippets.

@chilcote
Last active April 17, 2018 20:55
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chilcote/84fa2a6619d89eed1d67d47bae18ffc0 to your computer and use it in GitHub Desktop.
Save chilcote/84fa2a6619d89eed1d67d47bae18ffc0 to your computer and use it in GitHub Desktop.
Download MacSysAdmin2016 videos
#!/usr/bin/env python
'''
download macsysadmin videos
requires:
pip install requests
pip install beautifulsoup4
'''
import os
import sys
import requests
from bs4 import BeautifulSoup, SoupStrainer
year = '2017'
url = 'http://docs.macsysadmin.se/%s/%sdoc.html' % (year, year)
output_dir = 'macsysadmin%s' % year
def get_videos(url):
'''Returns a list of video links'''
req = requests.get(url)
soup = BeautifulSoup(req.content,
"html.parser",
parse_only=SoupStrainer('a', href=True)
)
links = soup.find_all('a')
return [link.attrs['href'] for link in links if 'mp4' in link.attrs['href']]
def save_videos(skip, urls, dir_name):
'''Saves videos to specified directory'''
if not os.path.exists(dir_name):
os.makedirs(dir_name)
for url in urls:
req = requests.get(url, stream=True)
output_file = dir_name + '/' + url.split('/')[-1]
if output_file.split('/')[-1] not in skip:
print 'Downloading %s' % url
with open(output_file, 'w') as f:
for chunk in req.iter_content(chunk_size=3000):
f.write(chunk)
else:
print 'Skipping %s' % output_file
def main():
already_downloaded = os.listdir(output_dir) if os.path.exists(output_dir) else []
videos = get_videos(url)
save_videos(already_downloaded, videos, output_dir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment