Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
train_pd_df=read_parquet_folder_as_pandas('/dbfs/mnt/blogs_pl/taxi_fare_feature_eng_train_sample1')
validate_pd_df=read_parquet_folder_as_pandas('/dbfs/mnt/blogs_pl/taxi_fare_feature_eng_validate_sample1')
test_pd_df=read_parquet_folder_as_pandas('/dbfs/mnt/blogs_pl/taxi_fare_feature_eng_test_sample1')
train_labels = train_pd_df['fare_amount'].values
validation_labels = validate_pd_df['fare_amount'].values
train_pandas = train_pd_df.drop(['fare_amount','key'], axis=1)
validation_pandas = validate_pd_df.drop(['fare_amount','key'], axis=1)
test_pandas = test_pd_df.drop(['passenger_count','key'], axis=1)
scaler = preprocessing.MinMaxScaler()
train_df_scaled = scaler.fit_transform(train_pandas).astype(np.float32)
validation_df_scaled = scaler.transform(validation_pandas).astype(np.float32)
test_scaled = scaler.fit_transform(test_pandas).astype(np.float32)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment