Skip to content

Instantly share code, notes, and snippets.

@mkaranasou
Last active October 13, 2019 13:07
Show Gist options
  • Save mkaranasou/9eb4d3bea3672784c5f1d242a3e4dc76 to your computer and use it in GitHub Desktop.
Save mkaranasou/9eb4d3bea3672784c5f1d242a3e4dc76 to your computer and use it in GitHub Desktop.
Example of pyspark unittest test case for feature a to b ratio
from pyspark.sql.utils import AnalysisException
from pyspark_unittesting import SparkSQLTestCase
class TestFeatureAToBRatio(SparkSQLTestCase):
def setUp(self):
super(TestFeatureAToBRatio, self).setUp()
self.feature = FeatureAToBRatio()
def helper_compare_actual_with_expected_df(
self, initial_data, expected_data
):
"""
Creates two dataframes, one from the initial data and one from the
expected data, runs feature calculation and compares the expected with
the actual dataframe
:param dict[str, T] initial_data:
:param dict[str, T] expected_data:
:return: None
"""
df = self.session.createDataFrame([initial_data])
expected_df = self.session.createDataFrame([expected_data])
result_df = self.feature.calculate(df)
self.assertDataFrameEqual(result_df, expected_df)
def test_calculate_simple_case(self):
data = {
"a": 1,
"b": 4,
"c": 155,
}
expected_data = {
"a": 1,
"b": 4,
"c": 155,
FeatureAToBRatio.feature_name: 0.25
}
self.helper_compare_actual_with_expected_df(data, expected_data)
def test_calculate_missing_column(self):
data = {
"a": 1,
"f": 4,
"c": 155,
}
df = self.session.createDataFrame([data])
with self.assertRaises(AnalysisException) as t_err:
self.feature.calculate(df)
self.assertTrue(
'cannot resolve \'`b`\' given input columns' in str(
t_err.exception)
)
def test_calculate_zero_denominator(self):
data = {
"a": 1,
"b": 0,
"c": 155,
}
expected_data = {
"a": 1,
"b": 0,
"c": 155,
FeatureAToBRatio.feature_name: FeatureAToBRatio.default_value
}
self.helper_compare_actual_with_expected_df(data, expected_data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment