Created
February 22, 2009 19:06
-
-
Save koseki/68571 to your computer and use it in GitHub Desktop.
access Amazon A2S using EventMachine
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class A2S | |
require 'uri' | |
require 'rexml/document' | |
require 'open-uri' | |
DEFAULT_PARAMS = { | |
:AWSAccessKeyId => "XXXXXXXXXXXXXXXXXXXX", | |
:AssociateTag => nil, | |
:Operation => "ItemSearch", | |
:Version => "2008-08-19", | |
:ItemPage => 1, | |
# http://docs.amazonwebservices.com/AWSECommerceService/2008-08-19/DG/index.html?ItemSearch.html | |
:Actor => nil, | |
:Artist => nil, | |
:AudienceRating => nil, | |
:Author => nil, | |
:Availability => nil, | |
:Brand => nil, | |
:BrowseNode => nil, | |
:City => nil, | |
:Composer => nil, | |
:Condition => nil, | |
:Conductor => nil, | |
:Director => nil, | |
:Keywords => nil, | |
:Manufacturer => nil, | |
:MaximumPrice => nil, | |
:MerchantId => nil, | |
:MinimumPrice => nil, | |
:MusicLabel => nil, | |
:Neighborhood => nil, | |
:Orchestra => nil, | |
:PostalCode => nil, | |
:Power => nil, | |
:Publisher => nil, | |
:RelatedItemsPage => nil, | |
:RelationshipType => nil, | |
:ResponseGroup => nil, | |
:ReviewSort => nil, | |
:SearchIndex => nil, | |
:Sort => nil, | |
:TagPage => nil, | |
:TagsPerPage => nil, | |
:TagSort => nil, | |
:TextStream => nil, | |
:Title => nil, | |
:VariationPage => nil, | |
# http://docs.amazonwebservices.com/AWSECommerceService/2008-08-19/DG/index.html?ItemLookup.html | |
:ItemId => nil, | |
:IdType => nil, | |
} | |
URI_BASE ="http://ecs.amazonaws.jp/onca/xml?Service=AWSECommerceService" | |
def self.create_uri(params = {}) | |
params = DEFAULT_PARAMS.merge(params) | |
uri = URI_BASE | |
params.each do |k,v| | |
next if v.to_s.empty? | |
uri += "&#{k}=#{URI.escape(v.to_s)}" | |
end | |
return uri | |
end | |
def self.parse(src) | |
return REXML::Document.new(src) | |
end | |
def self.get(params) | |
src = nil | |
open(create_uri(params)) do |io| | |
src = io.read | |
end | |
return parse(src) | |
end | |
def self.asins(doc) | |
return REXML::XPath.match(doc, "//Items/Item/ASIN/text()") | |
end | |
def self.titles(doc) | |
return REXML::XPath.match(doc, "//Items/Item/ItemAttributes/Title/text()") | |
end | |
def self.detail_page_urls(doc) | |
return REXML::XPath.match(doc, "//Items/Item/DetailPageURL/text()") | |
end | |
end | |
if __FILE__ == $0 | |
doc = A2S.get({:SearchIndex => "Books", :Keywords => "Ruby" }) | |
puts A2S.detail_page_urls(doc).join("\n") | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby | |
require 'rubygems' | |
require 'eventmachine' | |
require 'a2s' | |
require 'uri' | |
require 'csv' | |
if ARGV.length != 2 | |
puts "Usage: #{$0} input.csv output.(csv|html)" | |
exit | |
end | |
CONCURRENCY = 2 | |
PUBLISHER_COLUMN = 0 | |
TITLE_COLUMN = 3 | |
AUTHOR_COLUMN = 2 | |
class A2SClient | |
include EM::Protocols | |
@@running = 0 | |
@@found = 0 | |
@@found_multi = 0 | |
def initialize(data, out, format) | |
@data = data | |
@out = out | |
@format = format | |
@conn = nil | |
end | |
def request | |
if @data.empty? | |
EM.stop if @@running == 0 | |
return | |
end | |
data = @data.shift | |
params = { | |
:SearchIndex => "Books", | |
:Title => data[TITLE_COLUMN+1], | |
} | |
params[:Publisher] = data[PUBLISHER_COLUMN+1] unless data[PUBLISHER_COLUMN+1].to_s.strip.empty? | |
params[:Author] = data[AUTHOR_COLUMN+1] unless data[AUTHOR_COLUMN+1].to_s.strip.empty? | |
uri = A2S.create_uri(params) | |
uri = URI.parse(uri) | |
@conn ||= HttpClient2.connect(uri.host, uri.port) | |
path = uri.path + "?" + uri.query | |
req = @conn.get(path) | |
@@running += 1 | |
req.callback do | |
begin | |
save_response(req, data) | |
rescue => e | |
puts "ERROR(#{data[0]} can't parse. #{e} #{data[TITLE_COLUMN+1]}" | |
puts e | |
end | |
@@running -= 1 | |
request | |
end | |
req.errback do | |
puts "ERROR(#{data[0]}) can't get response." | |
@@running -= 1 | |
request | |
end | |
end | |
def save_response(req, data) | |
src = req.content | |
doc = A2S.parse(src) | |
uris = A2S.detail_page_urls(doc) | |
if @format == "csv" | |
@out << data + uris[0..5] | |
else | |
save_html_entry(doc, data, uris) | |
end | |
puts "Saved(#{data[0]}) found:#{uris.length}" | |
@@found += 1 if 0 < uris.length | |
@@found_multi += 1 if 1 < uris.length | |
end | |
def save_html_entry(doc, data, uris) | |
@out << "<h2>#{data[0]}. #{data[TITLE_COLUMN + 1]}</h2>\n" | |
@out << "<p>#{data[AUTHOR_COLUMN + 1]} / #{data[PUBLISHER_COLUMN + 1]}</p><ul>\n" | |
if uris.to_a.empty? | |
@out << "<li id='line#{data[0]}-0'>no entry</li>\n" | |
else | |
titles = A2S.titles(doc) | |
uris.each_with_index do |uri, i| | |
@out << "<li id='line#{data[0]}-#{i}'><a href='#{uri}'>#{titles[i]}</a></li>\n" | |
end | |
end | |
@out << "</ul>\n\n" | |
end | |
def self.puts_stat | |
puts "Found: #{@@found} / More than one: #{@@found_multi}" | |
end | |
end | |
start = Time.now | |
data = CSV.read(ARGV[0]) | |
data.each_with_index {|d,i| d.unshift(i + 1)} | |
html = ARGV[1] =~ /\.html$/ | |
if html | |
out = open(ARGV[1], "w") | |
out << "<html><body><h1>#{ARGV[0]}</h1>\n" | |
else | |
out = CSV.open(ARGV[1], "w") | |
end | |
EM.run do | |
CONCURRENCY.times do | |
c = A2SClient.new(data, out, html ? "html" : "csv") | |
c.request | |
end | |
end | |
if html | |
out << "</body></html>\n" | |
end | |
out.close | |
puts "" | |
puts "Elapsed #{Time.now - start} sec." | |
A2SClient.puts_stat |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment