Skip to content

Instantly share code, notes, and snippets.

@dekromp
Last active May 31, 2018 08:49
Show Gist options
  • Save dekromp/a9628dc707687ce86e62aff89397af41 to your computer and use it in GitHub Desktop.
Save dekromp/a9628dc707687ce86e62aff89397af41 to your computer and use it in GitHub Desktop.
def encode_categorical(train_data, test_data, feature_name):
# Get the unique elements from the training set.
unique_elements = train_data[feature_name].dropna().unique()
for data in [train_data, test_data]:
element_indices = []
for unique_element in unique_elements:
# Collect all row indices the element occurs in the data.
element_indices += [data[feature_name].index[
data[feature_name].apply(lambda x: x == unique_element)]]
# Encode the elements with a category index.
for element_encoding, element_indices_i in enumerate(element_indices):
data.loc[element_indices_i, feature_name] = element_encoding
data[feature_name] = data[feature_name].astype(np.int)
# Encode categorical columns as categorical indices.
encode_categorical(train_data, test_data, 'Pclass')
encode_categorical(train_data, test_data, 'Embarked')
encode_categorical(train_data, test_data, 'Sex')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment