Skip to content

Instantly share code, notes, and snippets.

@daveshah
Last active January 1, 2016 13:09
Show Gist options
  • Save daveshah/8148977 to your computer and use it in GitHub Desktop.
Save daveshah/8148977 to your computer and use it in GitHub Desktop.
Crawling the clinic site for all our css yo'
require 'anemone'
require 'nokogiri'
require 'set'
@url = "http://my.clevelandclinic.org/default.aspx"
@moss_garbage = "layouts/1033/styles"
@css_link_set = Set.new
@flash_link_set = Set.new
@css_out_file = File.new("css_output.txt","w+")
@flash_out_file = File.new("flash_output.txt","w+")
@snips_out_file = File.new("snips_output.txt","w+")
def add_links_from(page)
Nokogiri::HTML(page.body).xpath('//link[@href]').each { |link|
href = link["href"]
if (href.include?(".css") && !href.include?(@moss_garbage))
unless @css_link_set.add?(href).nil?
puts "CSS used = #{href}"
@css_out_file.puts href
end
end
}
end
def add_pages_with_flash_from(page)
Nokogiri::HTML(page.body).xpath('//object').each { |obj|
unless @flash_link_set.add?(page.url).nil?
puts "Flash found on #{page.url}"
@flash_out_file.puts page.url
end
}
end
def look_for_snips_on(page)
@snips_out_file.puts "------ BEGIN #{page.url} ------ "
Nokogiri::HTML(page.body).xpath('//comment()').each { |comment|
if(comment.to_s.include?("Snippets"))
puts "#{comment} on page #{page.url}"
@snips_out_file.puts comment
end
}
@snips_out_file.puts "------- END #{page.url} ------ \n"
end
def crawl(site)
Anemone.crawl(site) do |anemone|
anemone.on_every_page { |page|
add_links_from page
add_pages_with_flash_from page
look_for_snips_on page
}
end
end
start_time = Time.new
crawl(@url)
end_time = Time.new
puts "completed in #{end_time - start_time}"
@daveshah
Copy link
Author

daveshah commented Jan 2, 2014

Good gravy this thing is getting ugly...

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment