Created
September 1, 2014 11:58
-
-
Save xbin999/6d03649cf7fed5432dd0 to your computer and use it in GitHub Desktop.
从豆瓣购书单中挑出有亚马逊促销的书籍
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# encoding:utf-8 | |
require 'net/http' | |
require 'uri' | |
def fetch(uri_str, limit = 10) | |
# You should choose better exception. | |
raise ArgumentError, 'HTTP redirect too deep' if limit == 0 | |
url = URI.parse(uri_str) | |
req = Net::HTTP::Get.new(url.request_uri) | |
# puts "fetch #{url.host}:#{url.port}#{url.request_uri}" | |
response = Net::HTTP.start(url.host, url.port) { |http| http.request(req) } | |
case response | |
when Net::HTTPSuccess then response | |
when Net::HTTPRedirection then fetch(response['location'], limit - 1) | |
when Net::HTTPServiceUnavailable then fetch(uri_str, limit - 1) | |
else | |
response.error! | |
end | |
end | |
cart_regexp = /( id:\')([\d]{8})(\')\ | |
([\u0000-\uffff]*?)\ | |
(title:\')([\u0000-\uffff]*?)(\')\ | |
([\u0000-\uffff]*?)\ | |
(sid:\')([\d]*?)(\')\ | |
([\u0000-\uffff]*?)\ | |
(path:\')([\u0000-\uffff]*?)(\')\ | |
/ | |
db2zlink_regexp = /\ | |
(href=\"http:\/\/www\.douban\.com\/link2[^\s]*?vendor=joyo[^\s]*?\;)\ | |
([^\s]*?)\ | |
(url=)([^\s]*?)(\;)\ | |
/ | |
# promotion code of amazon.cn | |
prom_regexp = /\ | |
(6HREB5FXXK)\ | |
/ | |
# save http://book.douban.com/cart into cart.html | |
carts = File.read("cart.html") | |
books = {} | |
# scan all books in cart | |
carts.scan(cart_regexp).each{ |m| | |
title = m[5] | |
sid = m[9] | |
path = m[13] | |
resp = fetch(path) | |
puts "=== get #{title} Code = #{resp.code}" | |
if resp.message == "OK" | |
r = db2zlink_regexp.match(resp.body) | |
unless r.nil? || r == 0 | |
# get the amazon.cn's link for the book. | |
zurl = URI.unescape(r[4]) | |
resp2 = fetch(zurl) | |
if resp2.message == "OK" | |
puts "=== fetch ok" | |
# test if the book is in promotion | |
is_promed = prom_regexp.match(resp2.body) | |
unless is_promed.nil? || is_promed == 0 | |
puts "#{title} is in promotion." | |
books["#{title}"] = path | |
else | |
puts "#{title} is not in promotion." | |
end | |
end | |
else | |
puts "#{title} not match." | |
end | |
end | |
sleep(3.0 / 1.0) | |
} | |
puts "=== books in promotion ===" | |
books.each { |k, v| | |
puts "#{k}, #{v}" | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment