Skip to content

Instantly share code, notes, and snippets.

@wyldrodney
Created March 27, 2012 07:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wyldrodney/2213785 to your computer and use it in GitHub Desktop.
Save wyldrodney/2213785 to your computer and use it in GitHub Desktop.
module PageStatus
require 'net/http'
require 'net/https'
require 'net/ping/tcp'
require 'digest/sha2'
class PageProbe
def initialize(address)
@address = address
get_status
end
attr_reader :time, :code, :message, :raw_body, :charset
def alive?
@alive
end
private
def get_status
@uri = URI(@address)
if @uri
http = Net::HTTP.new(@uri.host, @uri.port)
http.use_ssl = true if @uri.scheme == 'https'
begin
start_point = Time.now.to_f
response = http.get(@uri.path, {"User-Agent" => "Mozilla/5.0 (compatible; SiteSaver/0.1; +http://sitesaver.ru/)"})
time_delta = (Time.now.to_f - start_point).round(3)
set_answer(response, time_delta)
rescue
ping(@uri.host)
end
end
end
def ping(address)
Net::Ping::TCP.service_check = true
set_answer(Net::Ping::TCP.new(address).ping?)
end
def set_answer(response, time_delta=nil)
@code = nil
@message = nil
@time = time_delta
@raw_body = nil
@charset = nil
case
when response.kind_of?(TrueClass)
@alive = true
when response.kind_of?(FalseClass)
@alive = false
else
@alive = true
@code = response.code
@message = response.message
@raw_body = response.body
@charset = response.type_params["charset"]
end
end
end
class PageAnalyze
def initialize(page)
@page = page
if @page.raw_body
convert_to_utf8
add_line_breaks
else
@body = nil
end
end
def body
@body
end
def checksum
@body ? Digest::SHA256.hexdigest(@body) : nil
end
private
def detect_charset
charset = @page.charset
case charset
when %w(utf-8 UTF-8 utf8 UTF8)
'utf-8'
when %w(cp1251 CP1251 cp-1251 CP-1251 Windows-1251 WINDOWS-1251)
'cp1251'
when nil
'utf-8'
else
charset
end
end
def convert_to_utf8
begin
@body = @page.raw_body.encode('utf-8', detect_charset)
rescue
@body = nil
end
end
def add_line_breaks
buf = ''
@body.each_line do |line|
line.gsub!(/(?<line><[a-zA-Z]+)/, "\n" + '\k<line>')
line.gsub!(/(?<line><\/[a-zA-Z]*>)/, "\n" + '\k<line>')
line.gsub!(/(?<line>.{90,120}(>|\s|\t))/, '\k<line>' + "\n")
line.gsub!(/\n{2,}/, "\n")
line.gsub!(/^\s*/, "")
buf << line
end
@body = buf
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment