Skip to content

Instantly share code, notes, and snippets.

@benjamintanweihao
Last active August 29, 2015 13:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save benjamintanweihao/8913375 to your computer and use it in GitHub Desktop.
Save benjamintanweihao/8913375 to your computer and use it in GitHub Desktop.
# encoding: utf-8
require 'cgi'
require 'mechanize'
require 'open-uri'
require 'pathname'
require 'uri'
class Scraper
def initialize
@agent = Mechanize.new
end
def scrape(game_name_edition_platform)
output = ""
page = get_page("Amazon.com Video Games #{game_name_edition_platform}")
page.links.each do |link|
link = link.href.to_s
if link.include?("www.amazon.com") and link =~ (/(d|g)p/)
output = URI.extract(link).first.split("&").first
output = output.split("/").last
break
end
end
output
end
def product_name(asin)
url = "http://www.amazon.com/dp/#{asin}"
open(url).read =~ /<title>(.*?)<\/title>/
result = $1
result = result.to_s.gsub("Amazon.com: ", "").gsub(": Video Games","")
CGI.unescapeHTML(result)
end
private
def get_page(query)
search_engine = ['http://www.google.com', 'http://www.bing.com/'].sample
@agent.get(search_engine).forms[0].tap { |f| f.q = query }.submit
end
end
class Recommender
def initialize
end
def recommend(asin)
url = "http://www.yournextgame.com/php/game/getLinkedGames.php?isbn=#{asin}&loc=0"
isbns = Nokogiri.XML(open(url)).xpath("//isbn").map(&:text)
end
end
scraper = Scraper.new
recommender = Recommender.new
File.open("products.txt").read.each_line do |line|
line = line.encode('UTF-8', :invalid => :replace)
product_id, product_name, platform = line.split(/\t/)
puts product_name
asin = scraper.scrape("#{product_name} #{platform}")
puts "Recommendations: "
recommender.recommend(asin).each do |asin|
puts scraper.product_name(asin)
end
puts "-----------------"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment