Skip to content

Instantly share code, notes, and snippets.

@ronan-mch
Last active December 21, 2015 11:17
Show Gist options
  • Save ronan-mch/879ddfb27bb58042cbbd to your computer and use it in GitHub Desktop.
Save ronan-mch/879ddfb27bb58042cbbd to your computer and use it in GitHub Desktop.
require 'nokogiri'
require 'net/http'
require 'colorize'
require 'open-uri'
require 'zlib'
class SitemapCheck
def initialize(basemap)
@root_target = basemap
@log = 'sitemap.log'
File.open(@log, 'w') {|file| file.truncate(0) }
@sitemaps = []
end
def run
check_map(@root_target)
@sitemaps.each { |map| check_map(map) }
end
def check_map(map)
doc = Nokogiri::XML(get_file_pointer(map)) { |config| config.strict }
doc.xpath('//sm:loc', 'sm' => 'http://www.sitemaps.org/schemas/sitemap/0.9').each do |loc|
targ = loc.text
targ << '/' unless targ[-1] == '/' || targ.include?('gz')
if targ =~ /sitemap\d+\.xml/
@sitemaps << targ
else
check_target(targ)
end
end
end
def check_target(targ)
puts "parsing #{targ}".colorize(:yellow)
url = URI.parse(targ)
req = Net::HTTP.new(url.host, url.port)
res = req.request_head(url.path)
if res.code != '200'
puts "Error #{res.code} parsing #{targ}".colorize(:red)
log("#{res.code}: #{targ}\n")
end
end
def log(message)
File.open(@log, 'a') { |f| f << message }
end
def get_file_pointer(loc)
puts "opening #{loc}".colorize(:yellow)
if loc.include?('http') && loc.include?('.gz')
fname = loc.split('/').last
`wget --no-verbose #{loc} -O #{fname}`
::Zlib::GzipReader.open(fname)
else
File.open(loc)
end
end
end
target = ARGV.shift
checker = SitemapCheck.new(target)
checker.run
puts "Check complete - reading error log:".colorize(:green)
puts File.open('sitemap.log', 'r').read
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment