Last active
October 13, 2019 13:07
-
-
Save mkaranasou/9eb4d3bea3672784c5f1d242a3e4dc76 to your computer and use it in GitHub Desktop.
Example of pyspark unittest test case for feature a to b ratio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pyspark.sql.utils import AnalysisException | |
from pyspark_unittesting import SparkSQLTestCase | |
class TestFeatureAToBRatio(SparkSQLTestCase): | |
def setUp(self): | |
super(TestFeatureAToBRatio, self).setUp() | |
self.feature = FeatureAToBRatio() | |
def helper_compare_actual_with_expected_df( | |
self, initial_data, expected_data | |
): | |
""" | |
Creates two dataframes, one from the initial data and one from the | |
expected data, runs feature calculation and compares the expected with | |
the actual dataframe | |
:param dict[str, T] initial_data: | |
:param dict[str, T] expected_data: | |
:return: None | |
""" | |
df = self.session.createDataFrame([initial_data]) | |
expected_df = self.session.createDataFrame([expected_data]) | |
result_df = self.feature.calculate(df) | |
self.assertDataFrameEqual(result_df, expected_df) | |
def test_calculate_simple_case(self): | |
data = { | |
"a": 1, | |
"b": 4, | |
"c": 155, | |
} | |
expected_data = { | |
"a": 1, | |
"b": 4, | |
"c": 155, | |
FeatureAToBRatio.feature_name: 0.25 | |
} | |
self.helper_compare_actual_with_expected_df(data, expected_data) | |
def test_calculate_missing_column(self): | |
data = { | |
"a": 1, | |
"f": 4, | |
"c": 155, | |
} | |
df = self.session.createDataFrame([data]) | |
with self.assertRaises(AnalysisException) as t_err: | |
self.feature.calculate(df) | |
self.assertTrue( | |
'cannot resolve \'`b`\' given input columns' in str( | |
t_err.exception) | |
) | |
def test_calculate_zero_denominator(self): | |
data = { | |
"a": 1, | |
"b": 0, | |
"c": 155, | |
} | |
expected_data = { | |
"a": 1, | |
"b": 0, | |
"c": 155, | |
FeatureAToBRatio.feature_name: FeatureAToBRatio.default_value | |
} | |
self.helper_compare_actual_with_expected_df(data, expected_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment