Skip to content

Instantly share code, notes, and snippets.

@lastk
Created September 7, 2010 13:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lastk/568312 to your computer and use it in GitHub Desktop.
Save lastk/568312 to your computer and use it in GitHub Desktop.
require 'open-uri'
require 'nokogiri'
require 'net/http'
# Script tested on 1.9.2-Head Ruby (Linux)
#
# Nokogiri gem install required
class RubyGet
attr_reader :hostsInfo
def initialize(url)
@url = "http://www.microsoft.com"
puts "#{yellowText('initialized script with')} #{blueText(@url)}"
@userAgent = "Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4"
@doc = Nokogiri::HTML(open(@url, "User-Agent" => @userAgent))
@hostsInfo = []
@someURLURLs = []
@scriptURLs = []
@cssURLs = []
find_all_elements('img', @someURLURLs)
size_of_element('img', @someURLURLs)
find_all_elements('script', @scriptURLs)
size_of_element('script',@scriptURLs)
find_all_elements('link', @cssURLs)
size_of_element('link',@cssURLs)
puts "#{yellowText("Results found:")}"
@hostsInfo.each {|info| puts "#{info.inspect}"}
puts "Generating output file"
generate_output_file
end
# will find all elements in the @doc of name
# * element
# and populate the array
# * array
# with the elements src or href
def find_all_elements(element, array)
# Find all elements with CSS selector
@doc.css(element).each do |link|
# copy src url to array
if(element.eql? "link")
# Check if the <link> tag is a CSS type
if(link['type'].eql? "text/css")
# Check if the <link> href is not nil
if(link['href'] != nil)
# Add the href (not src)
array << link['href']
end
end
else
# Check if the <tag> src is not nil
if(link['src'] != nil)
array << link['src']
end
end
end
end
# will try to find all content-lengths in array
# * array
# for the element
# * element
def size_of_element(element, array)
urlThreads = []
totalSize = 0
# Find the content-length of the elements
for someURL in array
urlThreads << Thread.new(someURL) do |urlThread|
host = URI.parse(@url).host
# get path of someURL resourse
path = URI.parse(urlThread).path
# check if url is absolute or relative
if (URI.parse(urlThread).host != nil)
# in this case it's absolute (has a host)
host = URI.parse(urlThread).host
else
# if it's relative, check that a leading / is infront of the path
if (path.index('/') != 0)
path = "/#{path}"
end
end
# print out and add to totalSize
Net::HTTP.start(host) do |http|
query = ""
if(URI.parse(urlThread).query != nil)
query = "?" + URI.parse(urlThread).query
end
response = http.get(path + query)
store_content_length(element, response, host)
end
end
urlThreads.each {|thr| thr.join}
end
end
def store_content_length(element, response, host)
if (response.code == "200")
puts "[*] Element #{element} Code =#{greenText(response.code)}#{response.message} for #{host}"
hostFound = @hostsInfo.detect {|info| info[:host].eql? host}
if(hostFound == nil)
# if doesnt contain domain
hostInfo = {
host: host,
images: 0,
scripts: 0,
css: 0
}
@hostsInfo << determine_content_type_and_store(hostInfo, response, element)
else
# if does contain domain
hostFound = determine_content_type_and_store(hostFound, response, element)
end
else
puts "[!!] Element #{element} Code =#{redText(response.code)}#{response.message} for #{host}"
end
end
def generate_output_file
lines = File.readlines('chartTemplate.html')
lines.each_with_index do |line, index|
line.chomp!
if(line.eql? "//0")
lines.insert(index+1,"images = [10,10,10]")
lines.insert(index+2,"scripts = [50,10,40]")
lines.insert(index+3,"css = [0,12,0]")
elsif (line.eql? "//1")
lines.insert(index+1,"var tickers = ['online.vodafone.com', 'statatse.webtrends.com', 'vodafoneuk.122.2o7.net'];")
elsif (line.eql? "//2")
lines.insert(index+1,"plot = $.jqplot('chart1', [images, scripts, css], {")
elsif (line.eql? "//3")
lines.insert(index+1,"yaxis:{ticks:[0, 20, 40, 60, 80]}")
end
end
File.open('chart.html', "w") do |file|
lines.each{|line| file.puts line }
end
end
def determine_content_type_and_store (hostInfo, response, element)
if (element.eql? "img")
hostInfo[:images] = hostInfo[:images] + response['content-length'].to_i
elsif (element.eql? "link")
hostInfo[:css] = hostInfo[:css] + response['content-length'].to_i
elsif (element.eql? "script")
hostInfo[:scripts] = hostInfo[:scripts] + response['content-length'].to_i
end
return hostInfo
end
def greenText(text)
"\e[32m #{text} \e[0m"
end
def redText(text)
"\e[31m #{text} \e[0m"
end
def yellowText(text)
"\e[33m #{text} \e[0m"
end
def blueText(text)
"\e[34m #{text} \e[0m"
end
end
rubyGet = RubyGet.new(ARGV[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment