Skip to content

Instantly share code, notes, and snippets.

@travisbhartwell
Forked from smcquay/ldsconf.py
Created April 1, 2012 20:27
Show Gist options
  • Save travisbhartwell/2278448 to your computer and use it in GitHub Desktop.
Save travisbhartwell/2278448 to your computer and use it in GitHub Desktop.
Python Script to download October 2011 General Conference Archives
#!/usr/bin/python
"""
This is a handy script to download the media from General Conference for
your own use. Execute this script with the appropriate options to download the
files you wish.
The only non-Python standard library dependency is BeautifulSoup.
Example invocation:
./ldsconf.py --individual 2012 4 audio-mp3
This will download mp3 files of the individual talks for the regular
four sessions for April 2012 General Conference.
"""
import sys
import urllib
import urlparse
import argparse
from BeautifulSoup import BeautifulSoup
download_classes = (
'video-360p',
'video-720p',
'video-1080p',
'video-wmv',
'audio-mp3',
'audio-m4b',
)
def main(download_individual=True,
download_music=False,
download_sessions=False,
download_priesthood=False,
download_young_women_meeting=False,
download_relief_society_meeting=False,
download_class=None,
dryrun=True,
conference_url='http://lds.org/general-conference/'
'sessions/2011/10?lang=eng'):
if (not download_individual) and download_music:
print "To download music, must download individual"
sys.exit(1)
page = urllib.urlopen(conference_url)
document = BeautifulSoup(page)
download_tags = document.findAll("a", attrs={"class": download_class})
for tag in download_tags:
href = tag.attrMap["href"]
# Don't donwload Young Women Meeting files if not wanted
if not download_young_women_meeting and \
tag.findParents(attrs={"class": "sessions", "id": "young-women"}):
continue
# Don't donwload Young Women Meeting files if not wanted
if not download_relief_society_meeting and \
tag.findParents(attrs={"class": "sessions",
"id": "relief-society"}):
continue
# Don't donwload Priesthood Session files if not wanted
if not download_priesthood and \
tag.findParents(attrs={"class": "sessions", "id": "priesthood"}):
continue
# Don't download full session files if not wanted
if not download_sessions and \
tag.findParents(attrs={"class": "head-row"}):
continue
# Don't download individual files if not wanted
if not download_individual and \
not tag.findParents(attrs={"class": "head-row"}):
continue
# Don't download musical number files if not wanted
if not download_music and \
tag.findParents(attrs={"class": "music"}):
continue
# Get an appropriate filename
filename = urlparse.urlsplit(href).path.split("/")[-1]
print "Downloading %s as %s" % (href, filename)
if not dryrun:
urllib.urlretrieve(href, filename)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="LDS Conference Downloader")
parser.add_argument('year', type=int,
help='Year of general conference sesssions to download')
parser.add_argument('month', type=int,
help='Month of generation conference sessions to download (4 or 10)')
parser.add_argument('--dry-run', action='store_true',
help='Do not perform downloads')
parser.add_argument('--music', action='store_true',
help="Download musical numbers")
parser.add_argument('--priesthood', action='store_true',
help="Download Priesthood Session.")
parser.add_argument('--young-womens', action='store_true',
help="Download Young Women's Session.")
parser.add_argument('--relief-society', action='store_true',
help="Download Relief Society Session.")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('--individual', action='store_true')
group.add_argument('--entire-sessions', action='store_true',
help="Only get files for entire session, not individual files.")
parser.add_argument('class_', type=str, nargs='?',
choices=download_classes, default='audio-mp3',
help="Class of file to download (default: %(default)s).")
args = parser.parse_args()
if args.month not in [4, 10]:
print "conference is only in April or October"
sys.exit(1)
conference_url = 'http://lds.org/general-conference/' \
'sessions/%d/%02d?lang=eng' % (args.year, args.month)
if not args.entire_sessions and args.class_ == 'audio-m4b':
print "m4b is only available for full sessions,"\
"must have --entire-sessions selected"
sys.exit(1)
main(download_individual=args.individual,
download_music=args.music,
download_sessions=args.entire_sessions,
download_priesthood=args.priesthood,
download_young_women_meeting=args.young_womens,
download_relief_society_meeting=args.relief_society,
download_class=args.class_,
dryrun=args.dry_run,
conference_url=conference_url)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment