Skip to content

Instantly share code, notes, and snippets.

@dangra
Created August 19, 2014 23:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dangra/a8a9bf4a82b4931d6578 to your computer and use it in GitHub Desktop.
Save dangra/a8a9bf4a82b4931d6578 to your computer and use it in GitHub Desktop.
import scrapy
from scrapy.http import safeurl
class Spider(scrapy.Spider):
name = 'loremipsum'
start_urls = ('https://www.lipsum.com',)
def parse(self, response):
for lnk in response.links():
yield scrapy.Request(lnk['url'], callback=self._parse2, meta=lnk)
#yield response.follow(lnk['url'], callback=self._parse2, meta=lnk)
#yield response.follow(lnk, callback=self._parse2, meta=lnk)
yield scrapy.Request('XSS?\0=badass')
# Request.url.encode('ascii') == b'XSS%00badass'
yield scrapy.Request(safeurl('XSS?\0=badass'))
# Request.url.encode('ascii') == b'XSS\x00badass'
def _parse2(self, response):
for href in response.xpath('//a[class="product"]/@href').extract():
yield response.follow(href, cookies={'foo': 'bar'})
yield response.follow('relative/path.html', dont_filter=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment