Skip to content

Instantly share code, notes, and snippets.

@gileno
Last active August 29, 2015 14:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save gileno/b9013b0aa9b2d518fe6c to your computer and use it in GitHub Desktop.
Save gileno/b9013b0aa9b2d518fe6c to your computer and use it in GitHub Desktop.
Scrapy blog post IV
# -*- coding: utf-8 -*-
import scrapy
class OlxSpider(scrapy.Spider):
name = "olx"
allowed_domains = ["pe.olx.com.br"]
start_urls = (
'http://pe.olx.com.br/imoveis/aluguel',
)
def parse(self, response):
items = response.xpath(
'//div[contains(@class,"section_OLXad-list")]//li[contains'
'(@class,"item")]'
)
for item in items:
url = item.xpath(
".//a[contains(@class,'OLXad-list-link')]/@href"
).extract_first()
yield scrapy.Request(url=url, callback=self.parse_detail)
def parse_detail(self, response):
self.log(u'Imóvel URL: {0}'.format(response.url))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment