Skip to content

Instantly share code, notes, and snippets.

@jiobu1
Created April 28, 2021 19:10
Show Gist options
  • Save jiobu1/cb306053d1e08d2f1cbe8c9cc21b734c to your computer and use it in GitHub Desktop.
Save jiobu1/cb306053d1e08d2f1cbe8c9cc21b734c to your computer and use it in GitHub Desktop.
updated nn model
# Nearest Neighbor
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
# Select columns that will be used to calculate nearest neighbors
# Added data points
# new = [Transit Score, Bike Score, Total_Schools, Private, Public district, Public Charter,
# % Private, % Public, % Charter, Above Average or Better, % Performing Above Average or Better]
numeric = df.select_dtypes(['number'])
numeric = numeric.drop(columns=['lat', 'lon'])
# scaling data since the values are not the same, i.e 1000s for rent and populutaion versus numbers shown as percentages
scaler = StandardScaler()
standard_df = scaler.fit_transform(numeric)
standard_df = pd.DataFrame(standard_df, columns = numeric.columns)
# instantiate nearest neighbors algorithm and fit on scaled df
nn = NearestNeighbors(n_neighbors=6, algorithm='kd_tree', n_jobs=8)
nn.fit(standard_df)
# function to join list of nearest neighbors by id
def nearest(idx):
return ','.join(map(str, nn.kneighbors([standard_df.iloc[idx]])[1][0][1:].tolist()))
# Apply function to each row of merged dataframe and create a Nearest column
df['Index'] = df.index
df['Nearest'] = df['Index'].apply(nearest)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment