mkaranasou/test_feature_a_to_b_ratio.py

## test_feature_a_to_b_ratio.py
from pyspark.sql.utils import AnalysisException
from pyspark_unittesting import SparkSQLTestCase


class TestFeatureAToBRatio(SparkSQLTestCase):

    def setUp(self):
        super(TestFeatureAToBRatio, self).setUp()
        self.feature = FeatureAToBRatio()

    def helper_compare_actual_with_expected_df(
            self, initial_data, expected_data
    ):
        """
        Creates two dataframes, one from the initial data and one from the
        expected data, runs feature calculation and compares the expected with
        the actual dataframe
        :param dict[str, T] initial_data:
        :param dict[str, T] expected_data:
        :return: None
        """
        df = self.session.createDataFrame([initial_data])
        expected_df = self.session.createDataFrame([expected_data])
        result_df = self.feature.calculate(df)

        self.assertDataFrameEqual(result_df, expected_df)

    def test_calculate_simple_case(self):
        data = {
            "a": 1,
            "b": 4,
            "c": 155,
        }
        expected_data = {
            "a": 1,
            "b": 4,
            "c": 155,
            FeatureAToBRatio.feature_name: 0.25
        }
        self.helper_compare_actual_with_expected_df(data, expected_data)

    def test_calculate_missing_column(self):
        data = {
            "a": 1,
            "f": 4,
            "c": 155,
        }
        df = self.session.createDataFrame([data])

        with self.assertRaises(AnalysisException) as t_err:
            self.feature.calculate(df)

        self.assertTrue(
            'cannot resolve \'`b`\' given input columns' in str(
                t_err.exception)
        )

    def test_calculate_zero_denominator(self):
        data = {
            "a": 1,
            "b": 0,
            "c": 155,
        }
        expected_data = {
            "a": 1,
            "b": 0,
            "c": 155,
            FeatureAToBRatio.feature_name: FeatureAToBRatio.default_value
        }
        self.helper_compare_actual_with_expected_df(data, expected_data)
	from pyspark.sql.utils import AnalysisException
	from pyspark_unittesting import SparkSQLTestCase


	class TestFeatureAToBRatio(SparkSQLTestCase):

	def setUp(self):
	super(TestFeatureAToBRatio, self).setUp()
	self.feature = FeatureAToBRatio()

	def helper_compare_actual_with_expected_df(
	self, initial_data, expected_data
	):
	"""
	Creates two dataframes, one from the initial data and one from the
	expected data, runs feature calculation and compares the expected with
	the actual dataframe
	:param dict[str, T] initial_data:
	:param dict[str, T] expected_data:
	:return: None
	"""
	df = self.session.createDataFrame([initial_data])
	expected_df = self.session.createDataFrame([expected_data])
	result_df = self.feature.calculate(df)

	self.assertDataFrameEqual(result_df, expected_df)

	def test_calculate_simple_case(self):
	data = {
	"a": 1,
	"b": 4,
	"c": 155,
	}
	expected_data = {
	"a": 1,
	"b": 4,
	"c": 155,
	FeatureAToBRatio.feature_name: 0.25
	}
	self.helper_compare_actual_with_expected_df(data, expected_data)

	def test_calculate_missing_column(self):
	data = {
	"a": 1,
	"f": 4,
	"c": 155,
	}
	df = self.session.createDataFrame([data])

	with self.assertRaises(AnalysisException) as t_err:
	self.feature.calculate(df)

	self.assertTrue(
	'cannot resolve \'`b`\' given input columns' in str(
	t_err.exception)
	)

	def test_calculate_zero_denominator(self):
	data = {
	"a": 1,
	"b": 0,
	"c": 155,
	}
	expected_data = {
	"a": 1,
	"b": 0,
	"c": 155,
	FeatureAToBRatio.feature_name: FeatureAToBRatio.default_value
	}
	self.helper_compare_actual_with_expected_df(data, expected_data)