Skip to content

Instantly share code, notes, and snippets.

@koseki
Created February 22, 2009 19:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save koseki/68571 to your computer and use it in GitHub Desktop.
Save koseki/68571 to your computer and use it in GitHub Desktop.
access Amazon A2S using EventMachine
class A2S
require 'uri'
require 'rexml/document'
require 'open-uri'
DEFAULT_PARAMS = {
:AWSAccessKeyId => "XXXXXXXXXXXXXXXXXXXX",
:AssociateTag => nil,
:Operation => "ItemSearch",
:Version => "2008-08-19",
:ItemPage => 1,
# http://docs.amazonwebservices.com/AWSECommerceService/2008-08-19/DG/index.html?ItemSearch.html
:Actor => nil,
:Artist => nil,
:AudienceRating => nil,
:Author => nil,
:Availability => nil,
:Brand => nil,
:BrowseNode => nil,
:City => nil,
:Composer => nil,
:Condition => nil,
:Conductor => nil,
:Director => nil,
:Keywords => nil,
:Manufacturer => nil,
:MaximumPrice => nil,
:MerchantId => nil,
:MinimumPrice => nil,
:MusicLabel => nil,
:Neighborhood => nil,
:Orchestra => nil,
:PostalCode => nil,
:Power => nil,
:Publisher => nil,
:RelatedItemsPage => nil,
:RelationshipType => nil,
:ResponseGroup => nil,
:ReviewSort => nil,
:SearchIndex => nil,
:Sort => nil,
:TagPage => nil,
:TagsPerPage => nil,
:TagSort => nil,
:TextStream => nil,
:Title => nil,
:VariationPage => nil,
# http://docs.amazonwebservices.com/AWSECommerceService/2008-08-19/DG/index.html?ItemLookup.html
:ItemId => nil,
:IdType => nil,
}
URI_BASE ="http://ecs.amazonaws.jp/onca/xml?Service=AWSECommerceService"
def self.create_uri(params = {})
params = DEFAULT_PARAMS.merge(params)
uri = URI_BASE
params.each do |k,v|
next if v.to_s.empty?
uri += "&#{k}=#{URI.escape(v.to_s)}"
end
return uri
end
def self.parse(src)
return REXML::Document.new(src)
end
def self.get(params)
src = nil
open(create_uri(params)) do |io|
src = io.read
end
return parse(src)
end
def self.asins(doc)
return REXML::XPath.match(doc, "//Items/Item/ASIN/text()")
end
def self.titles(doc)
return REXML::XPath.match(doc, "//Items/Item/ItemAttributes/Title/text()")
end
def self.detail_page_urls(doc)
return REXML::XPath.match(doc, "//Items/Item/DetailPageURL/text()")
end
end
if __FILE__ == $0
doc = A2S.get({:SearchIndex => "Books", :Keywords => "Ruby" })
puts A2S.detail_page_urls(doc).join("\n")
end
#! /usr/bin/env ruby
require 'rubygems'
require 'eventmachine'
require 'a2s'
require 'uri'
require 'csv'
if ARGV.length != 2
puts "Usage: #{$0} input.csv output.(csv|html)"
exit
end
CONCURRENCY = 2
PUBLISHER_COLUMN = 0
TITLE_COLUMN = 3
AUTHOR_COLUMN = 2
class A2SClient
include EM::Protocols
@@running = 0
@@found = 0
@@found_multi = 0
def initialize(data, out, format)
@data = data
@out = out
@format = format
@conn = nil
end
def request
if @data.empty?
EM.stop if @@running == 0
return
end
data = @data.shift
params = {
:SearchIndex => "Books",
:Title => data[TITLE_COLUMN+1],
}
params[:Publisher] = data[PUBLISHER_COLUMN+1] unless data[PUBLISHER_COLUMN+1].to_s.strip.empty?
params[:Author] = data[AUTHOR_COLUMN+1] unless data[AUTHOR_COLUMN+1].to_s.strip.empty?
uri = A2S.create_uri(params)
uri = URI.parse(uri)
@conn ||= HttpClient2.connect(uri.host, uri.port)
path = uri.path + "?" + uri.query
req = @conn.get(path)
@@running += 1
req.callback do
begin
save_response(req, data)
rescue => e
puts "ERROR(#{data[0]} can't parse. #{e} #{data[TITLE_COLUMN+1]}"
puts e
end
@@running -= 1
request
end
req.errback do
puts "ERROR(#{data[0]}) can't get response."
@@running -= 1
request
end
end
def save_response(req, data)
src = req.content
doc = A2S.parse(src)
uris = A2S.detail_page_urls(doc)
if @format == "csv"
@out << data + uris[0..5]
else
save_html_entry(doc, data, uris)
end
puts "Saved(#{data[0]}) found:#{uris.length}"
@@found += 1 if 0 < uris.length
@@found_multi += 1 if 1 < uris.length
end
def save_html_entry(doc, data, uris)
@out << "<h2>#{data[0]}. #{data[TITLE_COLUMN + 1]}</h2>\n"
@out << "<p>#{data[AUTHOR_COLUMN + 1]} / #{data[PUBLISHER_COLUMN + 1]}</p><ul>\n"
if uris.to_a.empty?
@out << "<li id='line#{data[0]}-0'>no entry</li>\n"
else
titles = A2S.titles(doc)
uris.each_with_index do |uri, i|
@out << "<li id='line#{data[0]}-#{i}'><a href='#{uri}'>#{titles[i]}</a></li>\n"
end
end
@out << "</ul>\n\n"
end
def self.puts_stat
puts "Found: #{@@found} / More than one: #{@@found_multi}"
end
end
start = Time.now
data = CSV.read(ARGV[0])
data.each_with_index {|d,i| d.unshift(i + 1)}
html = ARGV[1] =~ /\.html$/
if html
out = open(ARGV[1], "w")
out << "<html><body><h1>#{ARGV[0]}</h1>\n"
else
out = CSV.open(ARGV[1], "w")
end
EM.run do
CONCURRENCY.times do
c = A2SClient.new(data, out, html ? "html" : "csv")
c.request
end
end
if html
out << "</body></html>\n"
end
out.close
puts ""
puts "Elapsed #{Time.now - start} sec."
A2SClient.puts_stat
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment