Skip to content

Instantly share code, notes, and snippets.

@dangra
Created August 19, 2014 23:01
Show Gist options
  • Save dangra/77ecd47d0352935849db to your computer and use it in GitHub Desktop.
Save dangra/77ecd47d0352935849db to your computer and use it in GitHub Desktop.
import scrapy
class Spider(scrapy.Spider):
name = 'loremipsum'
start_urls = ('https://www.lipsum.com',)
def parse(self, response):
for lnk in response.links():
yield scrapy.Request(lnk['url'], callback=self._parse2, meta=lnk)
#yield response.follow(lnk['url'], callback=self._parse2, meta=lnk)
#yield response.follow(lnk, callback=self._parse2, meta=lnk)
def _parse2(self, response):
for href in response.xpath('//a[class="product"]/@href').extract():
yield response.follow(href, cookies={'foo': 'bar'})
yield response.follow('relative/path.html', dont_filter=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment