Skip to content

Instantly share code, notes, and snippets.

@hugo53
Forked from rmax/djangolocal.py
Created September 28, 2011 09:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hugo53/1247487 to your computer and use it in GitHub Desktop.
Save hugo53/1247487 to your computer and use it in GitHub Desktop.
from scrapy.spider import BaseSpider
from scrapy.http import FormRequest, Request
from scrapy.selector import HtmlXPathSelector
class DjangoSpider(BaseSpider):
domain_name = "django.local"
start_urls = ["http://localhost:8000/admin/"]
extra_domain_names = ["localhost"]
def parse(self, response):
# log in into django's admin interface
data = {'username': 'admin', 'password': 'admin'}
return FormRequest.from_response(response,
formdata=data,
callback=self.after_login,
dont_filter=True # because we will get
# redirected to same
# url and can be
# filtered out by
# dupesfilter
)
def after_login(self, response):
if 'id="form_login"' in response.body:
# login form displayed again, could not be able to login
self.log("Username or password incorrect")
else:
# response is django's dashboard
hxs = HtmlXPathSelector(response)
# do stuff and/or return new requests
SPIDER = DjangoSpider()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment