Skip to content

Instantly share code, notes, and snippets.

@priyanlc
Created May 10, 2020 16:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save priyanlc/a7705e7d3d646066b75eb38d64af2809 to your computer and use it in GitHub Desktop.
Save priyanlc/a7705e7d3d646066b75eb38d64af2809 to your computer and use it in GitHub Desktop.
# test_new_york_taxi_feature_eng.py
import pytest
import os
from poc.ny_taxi import new_york_taxi_feature_eng
from poc.ny_taxi.constants import TAXI_PARQUET_FILE
DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), os.pardir, 'ny_taxi/data/')
def test_feature_eng(
spark,
tmp_path_factory
):
new_york_taxi_feature_eng.run(
source_bucket=f'file://{DATA_PATH}',
release_bucket=f'file://{tmp_path_factory.getbasetemp()}/release/'
)
parquet_df = spark.read.parquet(f'file://{tmp_path_factory.getbasetemp()}//release/{TAXI_PARQUET_FILE}')
assert parquet_df.count() > 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment