luanfonceca/cars.py

## cars.py
# -*- coding: utf-8 -*-
import scrapy


class CarsSpider(scrapy.Spider):
    name = 'cars'
    start_urls = ['http://pe.olx.com.br/veiculos/carros']

    def parse(self, response):
        items = response.xpath(
            '//ul[@id="main-ad-list"]/li[not(contains(@class, "list_native"))]'
        )
        for item in items:
            url = item.xpath('./a/@href').extract_first()
            yield scrapy.Request(
                url=url,
                callback=self.parse_detail
            )

    def parse_detail(self, response):
        url = response.xpath('//title/text()').extract_first()
        yield {
            'url': url,
        }

## pipelines.py
# -*- coding: utf-8 -*-

# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html


class OlxPipeline(object):
    def process_item(self, item, spider):
        import pdb; pdb.set_trace()
        return item
	# -- coding: utf-8 --
	import scrapy


	class CarsSpider(scrapy.Spider):
	name = 'cars'
	start_urls = ['http://pe.olx.com.br/veiculos/carros']

	def parse(self, response):
	items = response.xpath(
	'//ul[@id="main-ad-list"]/li[not(contains(@class, "list_native"))]'
	)
	for item in items:
	url = item.xpath('./a/@href').extract_first()
	yield scrapy.Request(
	url=url,
	callback=self.parse_detail
	)

	def parse_detail(self, response):
	url = response.xpath('//title/text()').extract_first()
	yield {
	'url': url,
	}
	# -- coding: utf-8 --

	# Define your item pipelines here
	#
	# Don't forget to add your pipeline to the ITEM_PIPELINES setting
	# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html


	class OlxPipeline(object):
	def process_item(self, item, spider):
	import pdb; pdb.set_trace()
	return item