Skip to content

Instantly share code, notes, and snippets.

@rafikahmed
Created May 28, 2020 07:37
Show Gist options
  • Save rafikahmed/630fb016971e72c3fcd91ef4471127f3 to your computer and use it in GitHub Desktop.
Save rafikahmed/630fb016971e72c3fcd91ef4471127f3 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
class MetrocuadradoSpider(scrapy.Spider):
name = 'metroCuadrado'
allowed_domains = ['metrocuadrado.com']
def start_requests(self):
yield scrapy.Request(
url='https://www.metrocuadrado.com/search/list/ajax?&mnrogarajes=&mnrobanos=&mnrocuartos=&mtiempoconstruido=&marea=&mvalorarriendo=&mvalorventa=&mciudad=&mubicacion=&mtiponegocio=venta&mtipoinmueble=edificio-de-oficinas;edificio-de-apartamentos;casa;apartamento;local;oficina;bodega;finca;lote;consultorio&mzona=&msector=&mbarrio=&selectedLocationCategory=&selectedLocationFilter=&mestadoinmueble=&madicionales=&orderBy=&sortType=&companyType=&companyName=&midempresa=&mgrupo=&mgrupoid=&mbasico=&msemillero=&currentPage=1&totalPropertiesCount=204473&totalUsedPropertiesCount=203689&totalNewPropertiesCount=784&sfh=1',
method='POST',
meta={
'currentPage': 1
}
)
def parse(self, response):
listings = response.xpath("//div[@class='m_rs_list_item ']")
for listing in listings:
yield {
'url': listing.xpath(".//a[@itemprop='url']/@href").get()
}
# pagination
current_page = response.meta['currentPage']
if current_page <= 5:
current_page += 1
yield scrapy.Request(
url=f'https://www.metrocuadrado.com/search/list/ajax?&mnrogarajes=&mnrobanos=&mnrocuartos=&mtiempoconstruido=&marea=&mvalorarriendo=&mvalorventa=&mciudad=&mubicacion=&mtiponegocio=venta&mtipoinmueble=edificio-de-oficinas;edificio-de-apartamentos;casa;apartamento;local;oficina;bodega;finca;lote;consultorio&mzona=&msector=&mbarrio=&selectedLocationCategory=&selectedLocationFilter=&mestadoinmueble=&madicionales=&orderBy=&sortType=&companyType=&companyName=&midempresa=&mgrupo=&mgrupoid=&mbasico=&msemillero=&currentPage={current_page}&totalPropertiesCount=204473&totalUsedPropertiesCount=203689&totalNewPropertiesCount=784&sfh=1',
method='POST',
meta={
'currentPage': current_page
}
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment