Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jtprince/394967 to your computer and use it in GitHub Desktop.
Save jtprince/394967 to your computer and use it in GitHub Desktop.
require 'mechanize'
class LDSGeneralConferenceURLFinder
MONTH_TO_NUM = {
'April' => 4,
'October' => 10,
}
LDS_ORG = "http://www.lds.org"
TOC_URL = "http://www.lds.org/conference/display/0,5234,23-1,00.html"
REJECT = {
complete_session: /-general-session|Complete_GeneralYoungWomen|Complete.*Session|Complete.*Meeting|Complete_.*ReliefSociety|6000-general-young-women-meeting|3000-priesthood-session|[1-6]0_000\.mp3$/,
highlights: /general-conference-highlights|Complete_ConferenceHighlights/,
auditing: /auditing/i,
statistics: /statistical/i,
sustaining: /sustaining-?of-?church-?officers/i,
}
# a hash in the format 'April|October YYYY' => url
attr_accessor :conf_hash
def initialize
@agent = Mechanize.new
@conf_hash = get_conf_hash
end
# returns a hash of all conference download pages with keys in the form
# 'April YYYY' or 'October YYYY' and full url's as the values.
def get_conf_hash(base=TOC_URL, lds_org_base=LDS_ORG)
conf_hash = nil
@agent.get(base) do |page|
conference_links = page.links.select do |link|
text = link.text.strip
text.split(", ").last
text =~ /April|October/
end
conf_hash = conference_links.inject({}) do |hash, link|
string = link.text.strip.split(", ").last # eg 'April 2010'
(month, year) = string.split(/\s+/)
month_num = MONTH_TO_NUM[month]
year_num = Integer(year)
hash[[month_num, year_num]] = lds_org_base + link.href
hash
end
end
conf_hash
end
# takes month number (4 or 10) and the year and returns a list of urls
# reject is an array of regexps to reject. month and year may be as string
# or integer.
def mp3_links(month_num, year, reject=REJECT.values)
url = @conf_hash[[Integer(month_num), Integer(year)]]
raise ArgumentError, "Can't find url for: [#{month_num},#{year}]" unless (url && url.size > 0)
mp3_urls = []
@agent.get(url) do |page|
mp3_links = page.links.select do |link|
link.href =~ /\.mp3$/
end
reject.each do |v|
mp3_links.reject! {|link| link.href =~ v }
end
mp3_urls = mp3_links.map {|link| link.href }
end
mp3_urls
end
end
###### Example usage ######
require 'fileutils'
gc = LDSGeneralConferenceURLFinder.new
now = Time.now
(2006..now.year).each do |year|
[4,10].each do |month|
next if year == now.year && now.month < month
dir = "%d-%0.2d" % [year, month]
FileUtils.mkdir(dir) unless File.exist?(dir)
Dir.chdir(dir) do
File.open("links.txt", 'w') do |out|
gc.mp3_links(month, year).each do |url|
out.puts "wget '#{url}'"
end
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment