Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
# test_new_york_taxi_feature_eng.py
import pytest
import os
from poc.ny_taxi import new_york_taxi_feature_eng
from poc.ny_taxi.constants import TAXI_PARQUET_FILE
DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'ny_taxi/data/')
def test_feature_eng(
spark,
tmp_path_factory
):
new_york_taxi_feature_eng.run(
source_bucket=f'file://{DATA_PATH}',
release_bucket=f'file://{tmp_path_factory.getbasetemp()}/release/'
)
parquet_df = spark.read.parquet(f'file://{tmp_path_factory.getbasetemp()}//release/{TAXI_PARQUET_FILE}')
assert parquet_df.count() > 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment