arvind02/img_url.rb

## img_url.rb
require 'net/http'
require 'nokogiri'

# This script parse and download image files from html documents
def get_html(url)
  uri = URI(url)
  response = Net::HTTP.start(uri.host, uri.port,
                             :use_ssl => uri.scheme == 'https') do |http|
    resp = http.get(uri.path)
    case resp
    when Net::HTTPSuccess then
      resp.body
    when Net::HTTPRedirection then
      warn "redirect to #{location}"
      resp.body
    else
      resp.value
    end
  end
end

def parse_html(html)
  html_doc = Nokogiri::HTML(html)
  nodes = html_doc.xpath("//img[@src]")
  raise "No <img .../> tags!" if nodes.empty?
  nodes.inject([]) do |uris, node|
    uris << node.attr('src').strip
  end.uniq
end

def downloader(url, paths)
  host_uri = URI(url)
  Dir.mkdir('images')
  Net::HTTP.start(host_uri.host, host_uri.port,
                    :use_ssl => host_uri.scheme == 'https') { |http|
    paths.each do |path|
      puts "Downloading: " + path
      begin
        resp = http.get(path)
        open(File.join('images', path.split('/')[-1]), "wb") do |file|
          file.write(resp.body)
        end
      rescue
        test_response(resp)
      end
      puts "--100%--"
    end
  }
end

def test_response(resp)
  case resp
  when Net::HTTPServerError
    puts 'HTTPServerError'
  when Net::HTTPClientError
    puts 'HTTPClientError'
  when Net::HTTPRedirection
    puts 'HTTPRedirection'
  when Net::HTTPSuccess
    puts 'OK'
  else
    puts 'UNKNOWN'
  end
end

## process start here
URL = 'https://www.evault.com/support/customer_login.html'
html = get_html(URL)
paths = parse_html(html)
# download all imgs
downloader(URL, paths)
	require 'net/http'
	require 'nokogiri'

	# This script parse and download image files from html documents
	def get_html(url)
	uri = URI(url)
	response = Net::HTTP.start(uri.host, uri.port,
	:use_ssl => uri.scheme == 'https') do \|http\|
	resp = http.get(uri.path)
	case resp
	when Net::HTTPSuccess then
	resp.body
	when Net::HTTPRedirection then
	warn "redirect to #{location}"
	resp.body
	else
	resp.value
	end
	end
	end

	def parse_html(html)
	html_doc = Nokogiri::HTML(html)
	nodes = html_doc.xpath("//img[@src]")
	raise "No <img .../> tags!" if nodes.empty?
	nodes.inject([]) do \|uris, node\|
	uris << node.attr('src').strip
	end.uniq
	end

	def downloader(url, paths)
	host_uri = URI(url)
	Dir.mkdir('images')
	Net::HTTP.start(host_uri.host, host_uri.port,
	:use_ssl => host_uri.scheme == 'https') { \|http\|
	paths.each do \|path\|
	puts "Downloading: " + path
	begin
	resp = http.get(path)
	open(File.join('images', path.split('/')[-1]), "wb") do \|file\|
	file.write(resp.body)
	end
	rescue
	test_response(resp)
	end
	puts "--100%--"
	end
	}
	end

	def test_response(resp)
	case resp
	when Net::HTTPServerError
	puts 'HTTPServerError'
	when Net::HTTPClientError
	puts 'HTTPClientError'
	when Net::HTTPRedirection
	puts 'HTTPRedirection'
	when Net::HTTPSuccess
	puts 'OK'
	else
	puts 'UNKNOWN'
	end
	end

	## process start here
	URL = 'https://www.evault.com/support/customer_login.html'
	html = get_html(URL)
	paths = parse_html(html)
	# download all imgs
	downloader(URL, paths)