Skip to content

Instantly share code, notes, and snippets.

@gr33n7007h
Created November 4, 2015 21:07
Show Gist options
  • Save gr33n7007h/144f1a933338157a41e7 to your computer and use it in GitHub Desktop.
Save gr33n7007h/144f1a933338157a41e7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env ruby
require 'optparse'
require 'ostruct'
require 'uri'
begin
require 'typhoeus'
rescue LoadError => e
e.message.match(/--\s(.+)/)
gem = $1
$stderr.puts "You don't seem to have the '#{gem}' gem installed."
$stderr.puts "Install it using: `gem install #{gem}`"
exit
end
class ConsoleLogger #:TODO
end
class Options
def self.parse(name, args)
options = OpenStruct.new
options.url = nil
options.ua = nil
opts = OptionParser.new do |opts|
#opts.banner = "#{File.basename(name)}\n"
opts.banner = "Usage: #{name} [options]\n"
opts.on("--url URL", "Target URL (www.example.com)") do |o|
options.url = o
end
opts.on("--user-agent UA", "Set a custom User-Agent (wrap user agent in quotes)") do |o|
options.ua = o
end
opts.on_tail("--help", "Show this help message.\n\n") do
puts opts
exit
end
end
begin
opts.parse!(args)
rescue OptionParser::ParseError => e
puts "#{e}\n\n#{opts}"
exit(1)
end
options.help = opts
options
end
end
class RobotWalker
def initialize(target_url, user_agent=nil)
@url = target_url
@user_agent = user_agent
@paths = []
@url_count = 0
@logger = ConsoleLogger.new
@hydra = Typhoeus::Hydra.new # hmm... max_concurreny doesn't seem to be a valid attribute
end
def run
resp = Typhoeus.get(@url+"/robots.txt", followlocation: true)
if resp.success?
resp.body.scan(/Disallow: (.+)/) do |path|
@paths << path[0]
end
end
if @paths.empty?
$stderr.puts "Could not find a robots file on server"
exit(1)
else
puts "%-9s %-8s %s" % ["Time", "Code", "URL"]
puts "-" * 70
stime = Time.now
@paths.uniq.map do |path|
target = @url+path
request = Typhoeus::Request.new(target)
request.on_complete do |resp|
case resp.code
when 200
@url_count += 1
puts "\e[0;32m%-9s %-8s %s\e[0m" % [resp.connect_time.to_f.round(4), resp.code, target]
when 301
@url_count += 1
puts "\e[0;34m%-9s %-8s %s\e[0m" % [resp.connect_time.to_f.round(4), resp.code, target]
when 302
@url_count += 1
puts "\e[0;33m%-9s %-8s %s\e[0m" % [resp.connect_time.to_f.round(4), resp.code, target]
when 404
@url_count += 1
puts "\e[0;31m%-9s %-8s %s\e[0m" % [resp.connect_time.to_f.round(4), resp.code, target]
when 0
@url_count += 1
puts "\e[0;35mNo HTTP response for #{target}\e[0m"
else
@url_count += 1
puts "\e[0;36m%-9s %-8s %s\e[0m" % [resp.connect_time.to_f.round(4), resp.code, target]
end
end
@hydra.queue(request)
end
@hydra.run
puts "-" * 70
etime = Time.now
exec_time = etime-stime
puts "\nFinished: scanned #{@url_count} in #{exec_time.round(2)} seconds."
end
end
end
options = Options.parse(__FILE__, ARGV)
if options.url.nil?
puts "Target URL required\n\n"
puts options.help
exit(1)
end
unless options.url.nil?
Typhoeus::Config.user_agent = options.ua
robots = RobotWalker.new(options.url, options.ua)
robots.run
else
robots = RobotWalker.new(options.url)
robots.run
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment