Skip to content

Instantly share code, notes, and snippets.

@jurand71
Created June 16, 2022 10:27
Embed
What would you like to do?
# Import libraries
import numpy as np
import pandas as pd
# Display all columns
pd.set_option('display.max_columns', None)
# Import data from GitHub
df = pd.read_csv('https://github.com/jurand71/datasets/raw/master/HouseSalePriceCompetition/houseprice.csv')
# Calculate mean price in neighborhood
ordered_neighborhood = df.groupby(by=['Neighborhood'])['SalePrice'].mean().sort_values(ascending=True)
ordered_neighborhood
# Generate an ordered list with the labels
ordered_neighborhood = ordered_neighborhood.index
# OrdinalEncoder class requires a matrix as an input parameter in fit
ordered_neighborhood_array = np.array(ordered_neighborhood).reshape(25,1)
# Import OrdinalEncoder class
from sklearn.preprocessing import OrdinalEncoder as OE
enc = OE(categories = [ordered_neighborhood])
integer_coding = enc.fit_transform(ordered_neighborhood_array)
# Concatenate
coding_assigment = np.concatenate((ordered_neighborhood_array, integer_coding), axis=1)
# Create dictionary for map function
ordinal_mapping = {code_elem[0]:code_elem[1] for code_elem in coding_assigment}
# Apply coding for variable
df['Neighborhood'] = df['Neighborhood'].map(ordinal_mapping)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment