eads/scrapy_example.py

## scrapy_example.py
# -*- coding: utf-8 -*-
import csv
import os
import scrapy
from scrapy.http import FormRequest
from my_scraper.models import TableRow

END_DATE = '01/01/2019'
BASE_PATH = os.path.dirname(__file__)
ID_FILE = os.path.join(BASE_PATH, 'ids.csv')
SEARCH_URL = 'https://xxx'

class RemoteTableSpider(scrapy.Spider):
    """
    Run with `scrapy crawl remote_table`.
    """

    name = 'remote_table'
    start_urls = ['https://xxx']

    def parse(self, response):
        """
        The first parse step calls up the start_url(s) defined above.
        It scrapes no data but initializes the session.
        """
        with open(ID_FILE) as f:
            reader = csv.DictReader(f)
            rows = list(reader)
            for row in rows:
                formdata = {
                    'Id': row['id'],
                    'startDate': row['start_date'],
                    'endDate': END_DATE,
                    'Submit': 'Param*',
                    'Sort': '1',
                }
                request = FormRequest(SEARCH_URL, formdata=formdata, callback=self.extract_table)
                yield request

    def extract_table(self, response):
        """
        Extract table from response to search POST.
        """
        table_rows = response.css('table#results tr')
        for row in table_rows:
            rowobj = TableRow(row)
            yield rowobj.serialize()
	# -- coding: utf-8 --
	import csv
	import os
	import scrapy
	from scrapy.http import FormRequest
	from my_scraper.models import TableRow

	END_DATE = '01/01/2019'
	BASE_PATH = os.path.dirname(__file__)
	ID_FILE = os.path.join(BASE_PATH, 'ids.csv')
	SEARCH_URL = 'https://xxx'

	class RemoteTableSpider(scrapy.Spider):
	"""
	Run with `scrapy crawl remote_table`.
	"""

	name = 'remote_table'
	start_urls = ['https://xxx']

	def parse(self, response):
	"""
	The first parse step calls up the start_url(s) defined above.
	It scrapes no data but initializes the session.
	"""
	with open(ID_FILE) as f:
	reader = csv.DictReader(f)
	rows = list(reader)
	for row in rows:
	formdata = {
	'Id': row['id'],
	'startDate': row['start_date'],
	'endDate': END_DATE,
	'Submit': 'Param*',
	'Sort': '1',
	}
	request = FormRequest(SEARCH_URL, formdata=formdata, callback=self.extract_table)
	yield request

	def extract_table(self, response):
	"""
	Extract table from response to search POST.
	"""
	table_rows = response.css('table#results tr')
	for row in table_rows:
	rowobj = TableRow(row)
	yield rowobj.serialize()