Skip to content

Instantly share code, notes, and snippets.

@luanfonceca
Last active July 25, 2017 15:28
Show Gist options
  • Save luanfonceca/c49f5d8bce161689df3e3db5c822309a to your computer and use it in GitHub Desktop.
Save luanfonceca/c49f5d8bce161689df3e3db5c822309a to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
class CarsSpider(scrapy.Spider):
name = 'cars'
start_urls = ['http://pe.olx.com.br/veiculos/carros']
def parse(self, response):
items = response.xpath(
'//ul[@id="main-ad-list"]/li[not(contains(@class, "list_native"))]'
)
for item in items:
url = item.xpath('./a/@href').extract_first()
yield scrapy.Request(
url=url,
callback=self.parse_detail
)
def parse_detail(self, response):
url = response.xpath('//title/text()').extract_first()
yield {
'url': url,
}
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
class OlxPipeline(object):
def process_item(self, item, spider):
import pdb; pdb.set_trace()
return item
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment