miroesli/test_worker_basic.py

## test_worker_basic.py

import unittest
import codecs
import os
import settings

from workers.basic_worker import BasicUserParseWorker


class TestWorkerBasic(unittest.TestCase):

    def test_basic_worker_connection(self):
        """
        Purpose: Test regular running of worker
        Expectation: startup system, hit the reddit user and parse the data, fail to send to mothership (exception)

        :precondition: Mothership server not running
        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        # Can't connect to mother, so should raise ConnectionRefusedError, but should run everything else
        self.assertRaises(ConnectionRefusedError, worker.run)

    def test_worker_parsing(self):
        """
        Purpose: Test regular parsing mechanisms of worker
        Expectation: Load html file, send it to worker to parse, should return list of results

        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

        self.assertGreater(len(results), 0)     # Check that results are returned
        self.assertEqual(len(results[0]), 3)    # Check that results are in triplets (check formatting)

    def test_worker_add_links_max_limit(self):
        """
        Purpose: Test if crawler does not add links past max limit
        Expectation: Load html file, send it to worker to parse, should not add to crawler list

        :return:
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 0
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)

    def test_worker_parsing_different_links(self):
        """
        Purpose: Test if mechanisms of worker parsing different titles
        Expectation: Load html file, send it to worker to parse, should return list of results

        :return:
        """
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
        file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

        with codecs.open(file_path, encoding='utf-8') as f:
            text = f.read()

        results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

        # self.assertGreater(len(results), 0)     # Check that results are returned
        self.assertGreater(len(results[0]), 0)
        self.assertGreater(len(results[1]), 0)
        self.assertNotEqual(results[0], results[1])    # Check that results are different

    def test_worker_add_links_below_max_limit(self):
        """
        Purpose: Test regular parsing and adding to crawler below max limit
        Expectation: Load html file, send it to worker to parse, add to cralwer list with new link

        :return:
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 10
        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links("test.com")
        len_to_crawl_after = len(worker.to_crawl)

        self.assertNotEqual(len_to_crawl_after, len_to_crawl_before)

    def test_buffer_size_zero(self):
        """
        Purpose: Test running worker with buffer size 0
        Expectation: should refuse the connection to the link

        :return:
        """
        worker = None
        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        worker.max_links = 10
        settings.BUFFER_SIZE = 0

        self.assertRaises(ConnectionRefusedError, worker.run)

    def test_worker_add_links_in_crawled(self):
        """
        Purpose: Test adding links which were already crawled
        Expectation: does not add links which were already crawled

        :return:
        """

        worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

        len_to_crawl_before = len(worker.to_crawl)
        worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
        len_to_crawl_after = len(worker.to_crawl)

        self.assertEqual(len_to_crawl_after, len_to_crawl_before)

    #not working unit test

    # def test_worker_equals_none_connection(self):
    #     worker = BasicUserParseWorker(None)
    #
    #     worker.max_links = 10
    #     len_to_crawl_before = len(worker.to_crawl)
    #     worker.add_links("test.com")
    #     len_to_crawl_after = len(worker.to_crawl)
    #
    #     self.assertRaises(ConnectionRefusedError, worker.run)
    #     self.assertGreater(len_to_crawl_before, 0)
    #     self.assertGreater(len_to_crawl_after, 0)
    #     self.assertNotEqual(len_to_crawl_after, len_to_crawl_before)

	import unittest
	import codecs
	import os
	import settings

	from workers.basic_worker import BasicUserParseWorker


	class TestWorkerBasic(unittest.TestCase):

	def test_basic_worker_connection(self):
	"""
	Purpose: Test regular running of worker
	Expectation: startup system, hit the reddit user and parse the data, fail to send to mothership (exception)

	:precondition: Mothership server not running
	:return:
	"""
	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

	# Can't connect to mother, so should raise ConnectionRefusedError, but should run everything else
	self.assertRaises(ConnectionRefusedError, worker.run)

	def test_worker_parsing(self):
	"""
	Purpose: Test regular parsing mechanisms of worker
	Expectation: Load html file, send it to worker to parse, should return list of results

	:return:
	"""
	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
	file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

	with codecs.open(file_path, encoding='utf-8') as f:
	text = f.read()

	results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

	self.assertGreater(len(results), 0) # Check that results are returned
	self.assertEqual(len(results[0]), 3) # Check that results are in triplets (check formatting)

	def test_worker_add_links_max_limit(self):
	"""
	Purpose: Test if crawler does not add links past max limit
	Expectation: Load html file, send it to worker to parse, should not add to crawler list

	:return:
	"""
	worker = None
	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

	worker.max_links = 0
	len_to_crawl_before = len(worker.to_crawl)
	worker.add_links("test.com")
	len_to_crawl_after = len(worker.to_crawl)

	self.assertEqual(len_to_crawl_after, len_to_crawl_before)

	def test_worker_parsing_different_links(self):
	"""
	Purpose: Test if mechanisms of worker parsing different titles
	Expectation: Load html file, send it to worker to parse, should return list of results

	:return:
	"""
	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")
	file_path = '%s/%s' % (os.path.dirname(os.path.realpath(__file__)), 'test_resources/sample_GET_response.html')

	with codecs.open(file_path, encoding='utf-8') as f:
	text = f.read()

	results, next_page = worker.parse_text(str(text).strip().replace('\r\n', ''))

	# self.assertGreater(len(results), 0) # Check that results are returned
	self.assertGreater(len(results[0]), 0)
	self.assertGreater(len(results[1]), 0)
	self.assertNotEqual(results[0], results[1]) # Check that results are different

	def test_worker_add_links_below_max_limit(self):
	"""
	Purpose: Test regular parsing and adding to crawler below max limit
	Expectation: Load html file, send it to worker to parse, add to cralwer list with new link

	:return:
	"""
	worker = None
	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

	worker.max_links = 10
	len_to_crawl_before = len(worker.to_crawl)
	worker.add_links("test.com")
	len_to_crawl_after = len(worker.to_crawl)

	self.assertNotEqual(len_to_crawl_after, len_to_crawl_before)

	def test_buffer_size_zero(self):
	"""
	Purpose: Test running worker with buffer size 0
	Expectation: should refuse the connection to the link

	:return:
	"""
	worker = None
	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

	worker.max_links = 10
	settings.BUFFER_SIZE = 0

	self.assertRaises(ConnectionRefusedError, worker.run)

	def test_worker_add_links_in_crawled(self):
	"""
	Purpose: Test adding links which were already crawled
	Expectation: does not add links which were already crawled

	:return:
	"""

	worker = BasicUserParseWorker("https://www.reddit.com/user/Chrikelnel")

	len_to_crawl_before = len(worker.to_crawl)
	worker.add_links(["https://www.reddit.com/user/Chrikelnel"])
	len_to_crawl_after = len(worker.to_crawl)

	self.assertEqual(len_to_crawl_after, len_to_crawl_before)

	#not working unit test

	# def test_worker_equals_none_connection(self):
	# worker = BasicUserParseWorker(None)
	#
	# worker.max_links = 10
	# len_to_crawl_before = len(worker.to_crawl)
	# worker.add_links("test.com")
	# len_to_crawl_after = len(worker.to_crawl)
	#
	# self.assertRaises(ConnectionRefusedError, worker.run)
	# self.assertGreater(len_to_crawl_before, 0)
	# self.assertGreater(len_to_crawl_after, 0)
	# self.assertNotEqual(len_to_crawl_after, len_to_crawl_before)