darkhelmet/download.rb

## download.rb
main_links.each do |link|
  dir = link.split('.').first # i get 'name' or 'name-2' to download into
  begin
    page = Hpricot(open("#{domain}/#{link}"))
    links = page.search('a').select do |l|
      l.attributes['href'] =~ /javascript:OpenLink\((.*)\)/
      !($1 =~ /^\'\w+\.\w+\'$/)
    end.map do |l|
      l.attributes['href'].to_s =~ /javascript:OpenLink\(\'(.*)\'\)/
      decode_url($1)
    end.uniq

    links.each do |l|
      begin
        page = Hpricot(open(l))
        image = page.search('img').last.attributes['src']
        host = link.split('/').first(3).join('/')
        download_image(dir, host, image)
      rescue Timeout::Error
        print "Timeout on #{link}...\n"
      rescue => e
        print "Error on #{link}: {#{e}}...\n"
      end
    end
  rescue Timeout::Error
    print "Timeout on #{domain}/#{link}...\n"
  rescue
    print "Failed on #{domain}/#{link}...\n"
  end
end

## links-matching.rb
require 'hpricot'
require 'open-uri'

main_links = []

Hpricot(open(url)).search('a') do |a|
  href = a.attributes['href']
  main_links << href if href.match(/^\w+\.htm$/)
end

## multiple-pages.rb
require 'socket'

def page_avail?(site)
  uri = URI(site)
  begin
    TCPSocket.open(uri.host, uri.port) do |socket|
      socket.puts "GET #{uri.path} HTTP/1.1\r\nHOST: 127.0.0.1\r\n\r\n"
      content = socket.read
      return content.match(%r{404 Message}).nil?
    end
  rescue => e
    print "Error on #{site}: {#{e}}...\n"
  end
end

main_links.dup.each do |l|
  name = l.split('.').first # links are of the form "name.htm", and I want the name part
  2.upto(20) do |i|
    url = "#{domain}/#{name}-#{i}.htm"
    main_links << "#{name}-#{i}.htm" if page_avail?(url)
    sleep 2
  end
end
	main_links.each do \|link\|
	dir = link.split('.').first # i get 'name' or 'name-2' to download into
	begin
	page = Hpricot(open("#{domain}/#{link}"))
	links = page.search('a').select do \|l\|
	l.attributes['href'] =~ /javascript:OpenLink\((.*)\)/
	!($1 =~ /^\'\w+\.\w+\'$/)
	end.map do \|l\|
	l.attributes['href'].to_s =~ /javascript:OpenLink\(\'(.*)\'\)/
	decode_url($1)
	end.uniq

	links.each do \|l\|
	begin
	page = Hpricot(open(l))
	image = page.search('img').last.attributes['src']
	host = link.split('/').first(3).join('/')
	download_image(dir, host, image)
	rescue Timeout::Error
	print "Timeout on #{link}...\n"
	rescue => e
	print "Error on #{link}: {#{e}}...\n"
	end
	end
	rescue Timeout::Error
	print "Timeout on #{domain}/#{link}...\n"
	rescue
	print "Failed on #{domain}/#{link}...\n"
	end
	end
	require 'hpricot'
	require 'open-uri'

	main_links = []

	Hpricot(open(url)).search('a') do \|a\|
	href = a.attributes['href']
	main_links << href if href.match(/^\w+\.htm$/)
	end
	require 'socket'

	def page_avail?(site)
	uri = URI(site)
	begin
	TCPSocket.open(uri.host, uri.port) do \|socket\|
	socket.puts "GET #{uri.path} HTTP/1.1\r\nHOST: 127.0.0.1\r\n\r\n"
	content = socket.read
	return content.match(%r{404 Message}).nil?
	end
	rescue => e
	print "Error on #{site}: {#{e}}...\n"
	end
	end

	main_links.dup.each do \|l\|
	name = l.split('.').first # links are of the form "name.htm", and I want the name part
	2.upto(20) do \|i\|
	url = "#{domain}/#{name}-#{i}.htm"
	main_links << "#{name}-#{i}.htm" if page_avail?(url)
	sleep 2
	end
	end