Skip to content

Instantly share code, notes, and snippets.

@mimosz
Created December 28, 2012 10:46
Show Gist options
  • Save mimosz/4396767 to your computer and use it in GitHub Desktop.
Save mimosz/4396767 to your computer and use it in GitHub Desktop.
# 获取购买记录
def get_trades(crawler, item_id, seller_id, item_sbn, page=1)
start_at = Date.yesterday.beginning_of_day.to_i * 1000
end_at = Date.yesterday.end_of_day.to_i * 1000
# 购买列表
crawler.url = 'http://tbskip.taobao.com/json/show_buyer_list.htm'
crawler.params = {
bid_page: page,
ends: end_at,
starts: start_at,
item_id: item_id,
seller_num_id: seller_id,
sbn: item_sbn,
}
item_url = "http://detail.tmall.com/auction/item_detail.htm?item_num_id=#{item_id}&show_review=1&tbpm=1"
crawler.headers = { 'Host' => 'tbskip.taobao.com', 'Referer' => item_url }
body = crawler.request.execute
if body
body = Zlib::GzipReader.new(StringIO.new(body)).read.force_encoding('GB18030').encode('UTF-8')
body = Nokogiri::HTML(body)
return body
end
end
# 宝贝SBN,用于获取购买记录
def parse_sbn(page_dom)
sbn_dom = page_dom.at('button#J_listBuyerOnView') # 找节点
item_sbn = nil
if sbn_dom
sbn_dom = sbn_dom['detail:params'] # 淘宝自定义属性
item_sbn = sbn_dom.match(/sbn=(.+)\,showBuyerList/) if sbn_dom
item_sbn = item_sbn[1] if item_sbn
else
logger.warn "无法解析,宝贝购买记录";
end
return item_sbn
end
# 拆分交易属性
def props_split(str)
props = {}
str.split(';').each do |prop|
prop = prop.split(':')
props[prop[0]] = prop[1]
end
return props
end
url = "http://jump.taobao.com/jump"
crawler = Crawler.new(url)
crawler.params = { target: 'http://detail.tmall.com/auction/item_detail.htm?item_num_id=20485336962&show_review=1&tbpm=1' }
page_dom = crawler.get_dom
if page_dom
detail_dom = page_dom.at('div#detail>script')
if detail_dom
detail_dom = detail_dom.text.gsub(/\s+/, '').strip
detail_dom = detail_dom.match(/\"itemDO\"\:(.*)\,\"detail/)
if detail_dom
json = detail_dom[1].gsub("'","\"")
item_info = ActiveSupport::JSON.decode(json)
item = {}
item[:id] = item_info['itemId']
item[:type] = item_info['auctionType']
item[:status] = case
when item_info['isOnline'] == true
'在售'
when item_info['isOnline'] == true
'售罄'
else
'下架'
end
item[:category_id] = item_info['categoryId'].to_i
item[:seller_id] = item_info['userId'].to_i
item[:spu_id] = item_info['spuId'].to_i
item[:tag_price] = item_info['reservePrice'].to_f
item[:brand_id] = item_info['brandId'].to_i
item[:brand] = item_info['brand']
item[:quantity] = item_info['quantity'].to_i
item[:item_sbn] = parse_sbn(page_dom)
trades_dom = get_trades(crawler, item[:id], item[:seller_id], item[:item_sbn])
page = 1
if trades_dom.at('span.page-end').nil?
page += 1
end
trades_dom = trades_dom.css('table>tr')
trades.each do |trade_dom|
trade_dom = trade_dom.text.split(' ')
if trade_dom.last == '成交'
trade = { buyer: trade_dom[0], props: props_split(trade_dom[2]), price: trade_dom[3].to_f, num: trade_dom[4].to_i, date: Time.parse("#{trade_dom[5]} #{trade_dom[6]}") }
puts trade
end
end
else
logger.warn "无法解析,宝贝基础信息";
end
else
logger.warn "无法解析,宝贝详情";
end
else
logger.error "没什么好说的了~~";
end
json = str.match(/\"skuMap\"\:(.*)\}\,\"isSevenDaysRefundment/)[1].gsub("'","\"")
item_sku = ActiveSupport::JSON.decode(json)
item_sku.each do |tag, sku|
props_tag = tag.split(';')
props_tag.delete("")
sku_props = {}
props_tag.each do |prop_tag|
if props.has_key?(prop_tag)
prop = props[prop_tag]
sku_props[prop[:type]] = prop[:name]
end
end
s = { id: sku['skuId'], tag: tag, quantity: sku['stock'].to_i, props: sku_props }
puts s
end
# 下架宝贝无此节点,宝贝SKU属性
props_dom = page_dom.at('div.tb-sku').css('dl.tb-prop')
props = {}
props_dom.each do |prop_dom|
prop_type = prop_dom.at('dt').text
if prop_type
prop_dom.css('dd>ul>li').each do |el|
props[el['data-value']] = { type: prop_type, name: el.at('a>span').text}
end
end
end
# 创建宝贝时,需要解析产品属性
def parse_attributes(page_dom)
attributes_dom = page_dom.css('div#attributes>div.attributes-list>ul>li')
attributes = {}
if attributes_dom
attributes_dom.each do |attribute|
str = attribute.text.strip
if str.count(" ") > 2
arr = str.split(' ')
attr_name = arr[0].gsub(':', '')
attributes[attr_name] = (arr - [arr[0]]).join(',')
else
arr = str.split(':')
attr_name = arr[0]
attributes[attr_name] = arr[1].gsub(" ", '')
end
end
else
logger.warn "无法解析,产品属性";
end
return attributes
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment