Skip to content

Instantly share code, notes, and snippets.

@rmax
Created May 24, 2010 22:26
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save rmax/412506 to your computer and use it in GitHub Desktop.
Save rmax/412506 to your computer and use it in GitHub Desktop.
from scrapy.spider import BaseSpider
from scrapy.http import FormRequest, Request
from scrapy.selector import HtmlXPathSelector
class DjangoSpider(BaseSpider):
domain_name = "django.local"
start_urls = ["http://localhost:8000/admin/"]
extra_domain_names = ["localhost"]
def parse(self, response):
# log in into django's admin interface
data = {'username': 'admin', 'password': 'admin'}
return FormRequest.from_response(response,
formdata=data,
callback=self.after_login,
dont_filter=True # because we will get
# redirected to same
# url and can be
# filtered out by
# dupesfilter
)
def after_login(self, response):
if 'id="form_login"' in response.body:
# login form displayed again, could not be able to login
self.log("Username or password incorrect")
else:
# response is django's dashboard
hxs = HtmlXPathSelector(response)
# do stuff and/or return new requests
SPIDER = DjangoSpider()
Copy link

ghost commented Apr 11, 2013

Why can not find the param dont_filter in doc :https://scrapy.readthedocs.org/en/latest/topics/request-response.html

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment