cargan/saras_spider.py

## saras_spider.py
# -*- coding: utf-8 -*-

# 1. create virtual environment: virtualenv venv
# 2. install scrapy: ./venv/bin/pip install  scrapy
# 3. run spider: ./venv/bin/scrapy runspider spider.py -o saras_delfi.json


import scrapy


class SarasDelfiSpider(scrapy.Spider):
    name = "saras"
    start_urls = [
        'https://www.delfi.lt/temos/sarunas-jasikevicius',
    ]

    def parse(self, response):
        for quote in response.css('div.headline'):
            yield {
                'title': quote.css('h3.headline-title').xpath('a/text()').extract_first(),
                'excerpt': quote.css('p.headline-lead').xpath('text()').extract_first()
            }

        next_page = response.css('a.next:not([class^="next hidden"])::attr("href")').extract_first()
        if next_page is not None:
            yield response.follow(next_page, self.parse)
	# -- coding: utf-8 --

	# 1. create virtual environment: virtualenv venv
	# 2. install scrapy: ./venv/bin/pip install scrapy
	# 3. run spider: ./venv/bin/scrapy runspider spider.py -o saras_delfi.json


	import scrapy


	class SarasDelfiSpider(scrapy.Spider):
	name = "saras"
	start_urls = [
	'https://www.delfi.lt/temos/sarunas-jasikevicius',
	]

	def parse(self, response):
	for quote in response.css('div.headline'):
	yield {
	'title': quote.css('h3.headline-title').xpath('a/text()').extract_first(),
	'excerpt': quote.css('p.headline-lead').xpath('text()').extract_first()
	}

	next_page = response.css('a.next:not([class^="next hidden"])::attr("href")').extract_first()
	if next_page is not None:
	yield response.follow(next_page, self.parse)