jluczak/items.py

## items.py
import scrapy


class CrosswebItem(scrapy.Item):
    name=scrapy.Field()
    city=scrapy.Field()
    topics=scrapy.Field()
    file_urls=scrapy.Field()
    files=scrapy.Field()
    description=scrapy.Field()


## settings.py
# -*- coding: utf-8 -*-

BOT_NAME = 'crossweb'

SPIDER_MODULES = ['crossweb.spiders']
NEWSPIDER_MODULE = 'crossweb.spiders'

ROBOTSTXT_OBEY = True

ITEM_PIPELINES = {
    'crossweb.pipelines.CrosswebPipeline': 1,
    }
FILES_STORE = 'crossweb_photos'


## spider_crossweb.py


import scrapy
from crossweb.items import CrosswebItem

class ResearchSpider(scrapy.Spider):
    start_urls = [
        'https://crossweb.pl/job/',
        'https://crossweb.pl/job/?page=2'
    ]
    name = 'crossweb'

    def parse(self, response):
        for talk in response.css('.company a'):
            href = talk.css('a::attr(href)').extract_first()
            full_url = response.urljoin(href)

            yield scrapy.Request(full_url, callback=self.parse_book)


    def parse_book(self, response):
        name = response.css('#container > h1::text').extract_first()
        city = response.css('#content > section:nth-child(3) > div.param > div:nth-child(1) > span::text').extract_first()
        description = response.css('#eventText > p::text').extract_first()
        topics = response.css('#content > section:nth-child(5) > div.param > div:nth-child(1) > span::text').extract()
        file_urls = response.css('#container > div.company-photo > img:nth-child(2)::attr(src)').extract_first()

        yield CrosswebItem(name=name, city=city, description=description, topics=topics,file_urls=file_urls)
	import scrapy


	class CrosswebItem(scrapy.Item):
	name=scrapy.Field()
	city=scrapy.Field()
	topics=scrapy.Field()
	file_urls=scrapy.Field()
	files=scrapy.Field()
	description=scrapy.Field()
	# -- coding: utf-8 --

	BOT_NAME = 'crossweb'

	SPIDER_MODULES = ['crossweb.spiders']
	NEWSPIDER_MODULE = 'crossweb.spiders'

	ROBOTSTXT_OBEY = True

	ITEM_PIPELINES = {
	'crossweb.pipelines.CrosswebPipeline': 1,
	}
	FILES_STORE = 'crossweb_photos'


	import scrapy
	from crossweb.items import CrosswebItem

	class ResearchSpider(scrapy.Spider):
	start_urls = [
	'https://crossweb.pl/job/',
	'https://crossweb.pl/job/?page=2'
	]
	name = 'crossweb'

	def parse(self, response):
	for talk in response.css('.company a'):
	href = talk.css('a::attr(href)').extract_first()
	full_url = response.urljoin(href)

	yield scrapy.Request(full_url, callback=self.parse_book)


	def parse_book(self, response):
	name = response.css('#container > h1::text').extract_first()
	city = response.css('#content > section:nth-child(3) > div.param > div:nth-child(1) > span::text').extract_first()
	description = response.css('#eventText > p::text').extract_first()
	topics = response.css('#content > section:nth-child(5) > div.param > div:nth-child(1) > span::text').extract()
	file_urls = response.css('#container > div.company-photo > img:nth-child(2)::attr(src)').extract_first()

	yield CrosswebItem(name=name, city=city, description=description, topics=topics,file_urls=file_urls)