Skip to content

Instantly share code, notes, and snippets.

@smcquay
Forked from travisbhartwell/gist:1257132
Created October 4, 2011 04:51
Show Gist options
  • Save smcquay/1260931 to your computer and use it in GitHub Desktop.
Save smcquay/1260931 to your computer and use it in GitHub Desktop.
Python Script to download October 2011 General Conference Archives
#!/usr/bin/python
"""
This is a handy script to download the media from General Conference for
your own use. Execute this script with the appropriate options to download the
files you wish.
The only non-Python standard library dependency is BeautifulSoup.
"""
import sys
import urllib
import urlparse
import argparse
from BeautifulSoup import BeautifulSoup
download_classes = (
'video-360p',
'video-720p',
'video-1080p',
'video-wmv',
'audio-mp3',
'audio-m4b',
)
def main(download_individual=True,
download_music=False,
download_sessions=False,
download_priesthood=False,
download_young_women_meeting=False,
download_relief_society_meeting=False,
download_class=None,
dryrun=True,
conference_url='http://lds.org/general-conference/'
'sessions/2011/10?lang=eng'):
if (not download_individual) and download_music:
print "To download music, must download individual"
sys.exit(1)
page = urllib.urlopen(conference_url)
document = BeautifulSoup(page)
download_tags = document.findAll("a", attrs={"class": download_class})
for tag in download_tags:
href = tag.attrMap["href"]
# Don't donwload Young Women Meeting files if not wanted
if not download_young_women_meeting and \
tag.findParents(attrs={"class": "sessions", "id": "young-women"}):
continue
# Don't donwload Young Women Meeting files if not wanted
if not download_relief_society_meeting and \
tag.findParents(attrs={"class": "sessions",
"id": "relief-society"}):
continue
# Don't donwload Priesthood Session files if not wanted
if not download_priesthood and \
tag.findParents(attrs={"class": "sessions", "id": "priesthood"}):
continue
# Don't download full session files if not wanted
if not download_sessions and \
tag.findParents(attrs={"class": "head-row"}):
continue
# Don't download individual files if not wanted
if not download_individual and \
not tag.findParents(attrs={"class": "head-row"}):
continue
# Don't download musical number files if not wanted
if not download_music and \
tag.findParents(attrs={"class": "music"}):
continue
# Get an appropriate filename
filename = urlparse.urlsplit(href).path.split("/")[-1]
print "Downloading %s as %s" % (href, filename)
if not dryrun:
urllib.urlretrieve(href, filename)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="LDS Conference Downloader")
parser.add_argument('--dry-run', action='store_true',
help='Do not perform downloads')
parser.add_argument('--music', action='store_true',
help="Download musical numbers")
parser.add_argument('--priesthood', action='store_true',
help="Download Priesthood Session.")
parser.add_argument('--young-womens', action='store_true',
help="Download Young Women's Session.")
parser.add_argument('--relief-society', action='store_true',
help="Download Relief Society Session.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--individual', action='store_true')
group.add_argument('--entire-sessions', action='store_true',
help="Only get files for entire session, not individual files.")
parser.add_argument('class_', type=str, nargs='?',
choices=download_classes, default='audio-mp3',
help="Class of file to download (default: %(default)s).")
args = parser.parse_args()
if not args.entire_sessions and args.class_ == 'audio-m4b':
print "m4b is only available for full sessions,"\
"must have --entire-sessions selected"
sys.exit(1)
main(download_individual=args.individual,
download_music=args.music,
download_sessions=args.entire_sessions,
download_priesthood=args.priesthood,
download_young_women_meeting=args.young_womens,
download_relief_society_meeting=args.relief_society,
download_class=args.class_,
dryrun=args.dry_run,)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment