Skip to content

Instantly share code, notes, and snippets.

@kmike
Forked from dangra/idealspider.py
Last active August 29, 2015 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kmike/e915738ac97b992b9328 to your computer and use it in GitHub Desktop.
Save kmike/e915738ac97b992b9328 to your computer and use it in GitHub Desktop.
import scrapy
from scrapy.http import safeurl
class Spider(scrapy.Spider):
name = 'loremipsum'
start_urls = ('https://www.lipsum.com',)
def parse(self, response):
for lnk in response.links():
yield scrapy.Request(lnk['url'], callback=self._parse2, meta=lnk)
#yield response.follow(lnk['url'], callback=self._parse2, meta=lnk)
#yield response.follow(lnk, callback=self._parse2, meta=lnk)
#yield scrapy.Request(lnk, self._parse2, meta={'link': lnk})
#yield lnk.click(callback=self._parse2, meta={'link': lnk})
#yield lnk.click(callback=self._parse2) # clicked_link is added to meta
yield scrapy.Request('XSS?\0=badass')
# Request.url.encode('ascii') == b'XSS%00badass'
yield scrapy.Request(safeurl('XSS?\0=badass'))
# Request.url.encode('ascii') == b'XSS\x00badass'
yield scrapy.Request(b'XSS\x00badass')
# Request._url_bytes == b'XSS\x00badass'
def _parse2(self, response):
for href in response.xpath('//a[class="product"]/@href').extract():
yield response.follow(href, cookies={'foo': 'bar'})
yield response.follow('relative/path.html', dont_filter=True)
yield from response.follow_all(xpath='//a[class="product"]/@href')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment