Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save chrisdietr/b6e8f86adaebd5820ed0766cd44ec37b to your computer and use it in GitHub Desktop.
Save chrisdietr/b6e8f86adaebd5820ed0766cd44ec37b to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import FormRequest, Request
class QuotesSpider(scrapy.Spider):
name = 'quoteslogin'
allowed_domains = ['quotes.toscrape.com']
start_urls = [
'http://quotes.toscrape.com/login'
]
directory_url = 'http://quotes.toscrape.com/page/1/'
def parse(self, response):
csrf_token = response.xpath("//input[@name='csrf_token']/@value").get()
yield FormRequest.from_response(response, formxpath="//form", formdata={
'csrf_token': csrf_token,
'username': 'admin',
'password': 'admin'
}, callback=self.go_to_directory)
def go_to_directory(self, response):
return Request(url='http://quotes.toscrape.com/page/3/', callback=self.after_login)
def after_login(self, response):
for href in response.xpath('//a[contains(.,"(about)")]/@href'):
yield response.follow(href, callback=self.parse_items)
# for quote in response.xpath("//div[@class='quote']"):
# yield {
# 'quote': quote.xpath(".//span[@class='text']/text()").get()
# }
next_page = response.xpath("//li[@class='next']/a/@href").get()
if next_page:
yield response.follow(url=next_page, callback=self.after_login)
def parse_items(self, response):
yield {
'year': response.xpath('//span[@class="author-born-date"]').get()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment