Skip to content

Instantly share code, notes, and snippets.

@re4lfl0w
Last active August 29, 2015 14:27
Show Gist options
  • Save re4lfl0w/840b05a534edb678eb51 to your computer and use it in GitHub Desktop.
Save re4lfl0w/840b05a534edb678eb51 to your computer and use it in GitHub Desktop.
자바스크립트로 동작하는 버튼이 있을 때 다음 페이지로 가는 방식을 구현한 scrapy spider, 네이버 지식쇼핑 다음페이지 숫자 링크 클릭할 때 동작하는 스크립트
#출처: https://ide.c9.io/redapple/so_18810850
from scrapy.spider import BaseSpider
from scrapy.http import Request, FormRequest
from scrapy.selector import HtmlXPathSelector
from scrapy.contrib.spiders import Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor
class MySpider(BaseSpider):
name = "commu"
allowed_domains = ["saintbarnabas.hodesiq.com"]
start_urls = ["http://saintbarnabas.hodesiq.com/joblist.asp?user_id=",
]
extractor = SgmlLinkExtractor(allow=('\d+'), restrict_xpaths=('*'))
def parse(self, response):
try:
links = self.extractor.extract_links(response)
if links:
for link in links:
yield Request(url=link.url, callback=self.parse_items)
except:
pass
hxs = HtmlXPathSelector(response)
next_links = hxs.select('//td[@align="right"]/a[contains(., "Next")]')
if next_links:
freq = FormRequest.from_response(response, formname="frm",
formdata={'move_indicator': 'next'})
if freq:
yield freq
def parse_items(self, response):
print "parse_items"
// 네이버 지식쇼핑 페이지에서 개발자도구로 따온 스크립트
this.goPage = function(e){
if (e<1) {
this.options.pagingIndex=1;
} else {
this.options.pagingIndex=e;
}
this.options.frm="NVSHPAG";
this._ajaxLoad(false);
jQuery("#_content_area").get(0).scrollIntoView();
};
import scrapy
from scrapy.http import Request, FormRequest
from wise_shop.items import WiseShopItem
class WiseSpider(scrapy.Spider):
name = "wise"
allowed_domains = ["shopping.naver.com"]
start_urls = [
"http://shopping.naver.com/search/all_search.nhn?where=all&frm=NVSCTAB&query=%EB%85%B8%ED%8A%B8%EB%B6%81"
]
e = 1
def parse(self, response):
for sel in response.xpath('//ul[@class="goods_list"]/li'):
item = WiseShopItem()
item['product_name'] = sel.xpath('div[@class="info"]/a/@title').extract()[0].encode('utf-8')
item['min_price'] = sel.xpath('div[@class="info"]/span[@class="price"]/em/span/text()').extract()[0].encode('utf-8')
#item['max_price'] = sel.xpath('div[@class="info"]/span[@class="price"][strong]/span/text()').extract()[0].encode('utf-8')
yield item
next_page = response.xpath('//div[@class="co_paginate"]/strong/following-sibling::*').extract()[0].encode('utf-8')
# WiseSpider.e = response.xpath('//div[@class="co_paginate"]/strong/following-sibling::*/text()').extract()[0]
# 위 코드가 계속 2만 리턴해서 값을 대입합니다.
# 페이지가 넘어갈 때마다 다음 페이지를 가리키는 값을 입력하려고 했는데 계속 2만 입력되어서 다른 방법을 썼습니다.
if next_page:
WiseSpider.e += 1
yield FormRequest.from_response(response, formname="frm", formdata={'e': str(WiseSpider.e)})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment