ryonlife/autointegration_pipeline.py

## autointegration_pipeline.py
# -*- coding: utf-8 -*-

import os
import shutil
import time

import yaml

from pegbot.utils import get_project_root


class AutointegrationPipeline:
    """
    Pipeline for generating autointegration test and fixture files.
    """

    def open_spider(self, spider): # pylint: disable=no-self-use
        """
        Create a fresh directory and test files for the spider being run.
        """

        if not spider.settings.getbool('AUTOUNIT_ENABLED', default=False):
            # Piggyback off autounit settings so integration tests are generated on the same crawl
            return

        # Directory
        path = f'{get_project_root()}/autointegration/tests/{spider.name}'
        if os.path.exists(path):
            shutil.rmtree(path)
        os.mkdir(path)

        # Module file
        file = open(f'{path}/__init__.py', 'w')
        file.close()

        # Test file
        test = ''
        test += "# -*- coding: utf-8 -*-\n"
        test += "from autointegration.generate_test import generate_test\n"
        test += f"def test_{spider.name}():\n"
        test += f"\tgenerate_test('{spider.name}')()\n"
        file = open(f'{path}/test_{spider.name}.py', 'w')
        file.write(test)
        file.close()

    def process_item(self, product, spider): # pylint: disable=no-self-use
        """
        Create new fixture files.
        """

        if not spider.settings.getbool('AUTOUNIT_ENABLED', default=False):
            # Piggyback off autounit settings so integration tests are generated on the same crawl
            return product

        path = f'{get_project_root()}/autointegration/tests/{spider.name}'
        file = open(f'{path}/fixture_{int(time.time())}.yaml', 'w')
        file.write(yaml.dump({
            'url': product['url'],
            'product': {
                'category': product['category'],
                'name': product['name'],
            }
        }))
        file.close()
        return product

## generate_test.py
# -*- coding: utf-8 -*-

import os

import yaml
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.utils.project import get_project_settings
from w3lib.url import canonicalize_url


def generate_test(spider_name):
    """Generates an integration test of a VendorSpider subclass."""
    def _test():
        """Performs an integration test of a VendorSpider subclass."""
        # Configure the crawler
        settings = get_project_settings()
        settings['AUTOUNIT_ENABLED'] = False
        process = CrawlerProcess(settings)
        crawler = process.create_crawler(spider_name)

        # Load fixtures from .yaml files
        fixtures = {}
        path = f'{os.path.dirname(os.path.abspath(__file__))}/tests/{spider_name}'
        files = [file for file in os.listdir(path) if file.endswith('.yaml')]
        for file in files:
            file_path = os.path.join(path, file)
            with open(file_path) as file:
                fixture = yaml.load(file, Loader=yaml.FullLoader)
                fixtures[canonicalize_url(fixture['url'])] = fixture['product']

        def _test_parse_product(item):
            """Tests for correct scraping of product info."""
            nonlocal fixtures
            if item['url'] not in fixtures.keys():
                raise AssertionError(f"Product URL mismatch: {canonicalize_url(item['url'])}")
            for key, val in fixtures[item['url']].items():
                assert item[key] == val

        # Attach test handlers to various event signals
        # https://docs.scrapy.org/en/latest/topics/signals.html#topics-signals-ref
        crawler.signals.connect(_test_parse_product, signal=signals.item_scraped)

        # Run the crawler
        process.crawl(crawler, *[], seed_urls=fixtures.keys(), crawl_patterns=[])
        process.start()

        # Integration test fails if errors have been counted in the crawler's stats
        if crawler.stats.get_value('log_count/ERROR'):
            raise AssertionError

    return _test

## test_name_of_spider.py
# -*- coding: utf-8 -*-
from autointegration.generate_test import generate_test
def test_name_of_spider():
	generate_test('test_name_of_spider')()
	# -- coding: utf-8 --

	import os
	import shutil
	import time

	import yaml

	from pegbot.utils import get_project_root


	class AutointegrationPipeline:
	"""
	Pipeline for generating autointegration test and fixture files.
	"""

	def open_spider(self, spider): # pylint: disable=no-self-use
	"""
	Create a fresh directory and test files for the spider being run.
	"""

	if not spider.settings.getbool('AUTOUNIT_ENABLED', default=False):
	# Piggyback off autounit settings so integration tests are generated on the same crawl
	return

	# Directory
	path = f'{get_project_root()}/autointegration/tests/{spider.name}'
	if os.path.exists(path):
	shutil.rmtree(path)
	os.mkdir(path)

	# Module file
	file = open(f'{path}/__init__.py', 'w')
	file.close()

	# Test file
	test = ''
	test += "# -- coding: utf-8 --\n"
	test += "from autointegration.generate_test import generate_test\n"
	test += f"def test_{spider.name}():\n"
	test += f"\tgenerate_test('{spider.name}')()\n"
	file = open(f'{path}/test_{spider.name}.py', 'w')
	file.write(test)
	file.close()

	def process_item(self, product, spider): # pylint: disable=no-self-use
	"""
	Create new fixture files.
	"""

	if not spider.settings.getbool('AUTOUNIT_ENABLED', default=False):
	# Piggyback off autounit settings so integration tests are generated on the same crawl
	return product

	path = f'{get_project_root()}/autointegration/tests/{spider.name}'
	file = open(f'{path}/fixture_{int(time.time())}.yaml', 'w')
	file.write(yaml.dump({
	'url': product['url'],
	'product': {
	'category': product['category'],
	'name': product['name'],
	}
	}))
	file.close()
	return product
	# -- coding: utf-8 --
	from autointegration.generate_test import generate_test
	def test_name_of_spider():
	generate_test('test_name_of_spider')()