Skip to content

Instantly share code, notes, and snippets.

@cereniyim
Created April 29, 2020 14:15
Show Gist options
  • Save cereniyim/165b4ed180418bc852fb691e7148056c to your computer and use it in GitHub Desktop.
Save cereniyim/165b4ed180418bc852fb691e7148056c to your computer and use it in GitHub Desktop.
impute missing values function
def ImputeMissingValues(train_df, test_df):
# separete non-NA cols
is_features = [col for col in train_df.columns
if col.find("is_") != -1]
interim_train_1 = train_df[is_features]
interim_test_1 = test_df[is_features]
# impute taster_name NA with 0 as "Unknown"
constant_impute = ImputeWithConstant(train_df,
test_df)
interim_train_2 = constant_impute[0]
interim_test_2 = constant_impute[1]
# impute year and price with median
median_impute = ImputeWithMedian(train_df,
test_df)
interim_train_3 = median_impute[0]
interim_test_3 = median_impute[1]
# impute country, province, region_1,
# variety with most_frequent
most_frequent_impute = ImputeWithMostFrequent(train_df,
test_df)
interim_train_4 = most_frequent_impute[0]
interim_test_4 = most_frequent_impute[1]
train_features = (interim_train_4
.join(interim_train_3)
.join(interim_train_2)
.join(interim_train_1))
train_target = pd.DataFrame(
train_df["points"])
test_features = (interim_test_4
.join(interim_test_3)
.join(interim_test_2)
.join(interim_test_1))
test_target = pd.DataFrame(
test_df["points"])
return train_features, train_target, test_features, test_target
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment