Skip to content

Instantly share code, notes, and snippets.

@deatheragetr
Created June 21, 2013 04:27
Show Gist options
  • Save deatheragetr/5828797 to your computer and use it in GitHub Desktop.
Save deatheragetr/5828797 to your computer and use it in GitHub Desktop.
crawler part 1
require 'nokogiri'
require 'open-uri'
class WebPage
attr_reader :search_frequency, :url, :links
@@all = []
@@top_20 = []
def self.sort
@@all.sort_by! {|page| page.search_frequency }
@@top_10 = @@all.first(10)
end
def self.all
@@all
end
def initialize(url, search_term)
@url = url
@search_term
@webpage = Nokogiri::HTML(open(url))
@links = Links.new(@webpage, @url).urls
frequency_of(search_term)
@@all << self
end
def frequency_of(search_term)
search_term.downcase!
@search_frequency = @webpage.to_s.downcase.scan(search_term).size
end
end
class Links
attr_reader :urls
def initialize(webpage, url)
@doc = webpage
@url = url
searcher
urls
end
def searcher
@raw_urls = @doc.search('a')
end
def urls
@raw_urls.each do |url|
@urls ||= []
@urls << prepend_domain(url.attr('href'))
end
@urls
end
def prepend_domain(path)
if path && path[0, 5] != "http:"
path = @url + path
end
end
end
def search_term_frequencies(*urls, search_term)
urls.each do |url|
page = WebPage.new(url, search_term)
puts "#{page.url}: +#{page.search_frequency} hits"
end
end
puts "Welcome. Search."
def user_interface
print "search term >> "
search_term = gets.strip
print "search urls(seperated by spaces)>> "
links = gets.strip.split
search_term_frequencies(*links, search_term)
end
user_interface
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment