Skip to content

Instantly share code, notes, and snippets.

@chilcote
Last active October 8, 2016 22:02
Show Gist options
  • Save chilcote/56632c776efcf4b98c2d to your computer and use it in GitHub Desktop.
Save chilcote/56632c776efcf4b98c2d to your computer and use it in GitHub Desktop.
Download all the videos from http://docs.macsysadmin.se/2015/2015doc.html
#!/usr/bin/env python
# I got the meat of this script from somewhere but I can't remember where...
# if it was yours, let me know and I'll give credit.
'''
download macsysadmin videos
requires:
pip install requests
pip install beautifulsoup4
'''
import os
import sys
import requests
from bs4 import BeautifulSoup, SoupStrainer
year = '2015'
url = 'http://docs.macsysadmin.se/%s/%sdoc.html' % (year, year)
output_dir = 'macsysadmin%s' % year
def get_videos(url):
'''Returns a list of video links'''
req = requests.get(url)
soup = BeautifulSoup(req.content, parse_only=SoupStrainer('a', href=True))
return [link.attrs['href'] for link in soup if 'mp4' in link.attrs['href']]
def save_videos(urls, dir_name):
'''Saves videos to specified directory'''
if not os.path.exists(dir_name):
os.makedirs(dir_name)
for url in urls:
print 'Downloading %s' % url
req = requests.get(url, stream=True)
output_file = dir_name + '/' + url.split('/')[-1]
with open(output_file, 'w') as f:
for chunk in req.iter_content(chunk_size=3000):
f.write(chunk)
def main():
videos = get_videos(url)
save_videos(videos, output_dir)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment