chaserx/downloader.rb

## downloader.rb
require 'rubygems'
require 'nokogiri'
require 'open-uri'
require 'uri'
require 'mechanize'
require 'pathname'

# inspired by: http://stackoverflow.com/a/7933585/281699
class Downloader
  def make_absolute href, root
    URI.parse(root).merge(URI.parse(href)).to_s
  end

  def get_images_from_url url, destination='~/Desktop/nest'
    begin
      webpage = Nokogiri::HTML(open(url))
    rescue OpenURI::HTTPError => e
      if e.message == '404 Not Found'
        puts "Not Found"
        return
      else
        raise e, "#{e.message}"
      end
    rescue => e
      raise e, "#{e.message}"
    end

    webpage.xpath("//img/@src").each do |src|
      uri = make_absolute(src,url)
      final_destination = File.join(File.expand_path(destination), File.basename(uri))
      begin
        next if File.exists? final_destination
        puts "Getting: #{uri}"
        agent = Mechanize.new
        agent.get(uri).save final_destination
      rescue => e
        puts "The request for a page at #{uri} returned an error. #{e.message}"
        next
      end
    end
  end
end

URLs = [
      'http://example.com'
    ]

URLs.each do |url|
  Downloader.new.get_images_from_url url
end
	require 'rubygems'
	require 'nokogiri'
	require 'open-uri'
	require 'uri'
	require 'mechanize'
	require 'pathname'

	# inspired by: http://stackoverflow.com/a/7933585/281699
	class Downloader
	def make_absolute href, root
	URI.parse(root).merge(URI.parse(href)).to_s
	end

	def get_images_from_url url, destination='~/Desktop/nest'
	begin
	webpage = Nokogiri::HTML(open(url))
	rescue OpenURI::HTTPError => e
	if e.message == '404 Not Found'
	puts "Not Found"
	return
	else
	raise e, "#{e.message}"
	end
	rescue => e
	raise e, "#{e.message}"
	end

	webpage.xpath("//img/@src").each do \|src\|
	uri = make_absolute(src,url)
	final_destination = File.join(File.expand_path(destination), File.basename(uri))
	begin
	next if File.exists? final_destination
	puts "Getting: #{uri}"
	agent = Mechanize.new
	agent.get(uri).save final_destination
	rescue => e
	puts "The request for a page at #{uri} returned an error. #{e.message}"
	next
	end
	end
	end
	end

	URLs = [
	'http://example.com'
	]

	URLs.each do \|url\|
	Downloader.new.get_images_from_url url
	end