Skip to content

Instantly share code, notes, and snippets.

@tzudot
Created February 25, 2014 12:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tzudot/9207753 to your computer and use it in GitHub Desktop.
Save tzudot/9207753 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'nokogiri'
require 'open-uri'
require 'progressbar'
def parse_url(url, formats)
response = open(url, 'User-Agent' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1;Trident/5.0)')
page = Nokogiri.parse(response.read)
links = page.css('a')
urls = Array.new
links.each do |anchor|
if anchor.attributes['href']
value = anchor.attributes['href'].value
if value.match(/.*(#{formats.join('|')})$/) != nil
urls << value
end
end
end
puts %[Got #{urls.length} audio links.]
return urls
end
def dowload_url_content(output_path, urls)
count = 0
length = urls.length
urls.each do |uri|
file_name = uri.split('/').last
referer = uri.split('/')[0..1].join('//')
unless File.file? "#{output_path}/#{file_name}"
puts %[Downloading #{uri} (#{count + 1} of #{length})]
File.open(%[#{output_path}/#{file_name}], 'wb') do |file|
pbar = nil
open(%[#{uri}],
'User-Agent' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
'Referer' => %[#{referer}],
:content_length_proc => lambda { |t|
if t && 0 < t
pbar = ProgressBar.new("Saving...", t)
pbar.file_transfer_mode
end
},
:progress_proc => lambda {|s|
pbar.set s if pbar
}) do |response|
file.write(response.read)
end
end
count += 1
puts %[Saved #{file_name}]
else
puts %[#{file_name}: file exists in destination directory.]
end
end
puts %[Downloaded #{count} file(s).]
end
if __FILE__ == $0
media_file_formats = ['mp3', 'ogg', 'aac']
unless ARGV.first.nil? and ARGV[1].nil?
if ARGV.first.match(/^http|https:\/\/.*$/)
begin
urls = parse_url(ARGV.first, media_file_formats)
if File.directory? File.absolute_path(ARGV[1])
dowload_url_content(File.absolute_path(ARGV[1]), urls)
else
Dir.mkdir(File.absolute_path(ARGV[1]))
dowload_url_content(File.absolute_path(ARGV[1]), urls)
end
rescue Interrupt
puts %[halting...]
end
else
puts 'Enter a url starting with http or https.'
end
else
puts 'Input url, followed by output directory.'
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment