Created
April 28, 2021 19:10
-
-
Save jiobu1/cb306053d1e08d2f1cbe8c9cc21b734c to your computer and use it in GitHub Desktop.
updated nn model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Nearest Neighbor | |
import numpy as np | |
from sklearn.neighbors import NearestNeighbors | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.pipeline import make_pipeline | |
# Select columns that will be used to calculate nearest neighbors | |
# Added data points | |
# new = [Transit Score, Bike Score, Total_Schools, Private, Public district, Public Charter, | |
# % Private, % Public, % Charter, Above Average or Better, % Performing Above Average or Better] | |
numeric = df.select_dtypes(['number']) | |
numeric = numeric.drop(columns=['lat', 'lon']) | |
# scaling data since the values are not the same, i.e 1000s for rent and populutaion versus numbers shown as percentages | |
scaler = StandardScaler() | |
standard_df = scaler.fit_transform(numeric) | |
standard_df = pd.DataFrame(standard_df, columns = numeric.columns) | |
# instantiate nearest neighbors algorithm and fit on scaled df | |
nn = NearestNeighbors(n_neighbors=6, algorithm='kd_tree', n_jobs=8) | |
nn.fit(standard_df) | |
# function to join list of nearest neighbors by id | |
def nearest(idx): | |
return ','.join(map(str, nn.kneighbors([standard_df.iloc[idx]])[1][0][1:].tolist())) | |
# Apply function to each row of merged dataframe and create a Nearest column | |
df['Index'] = df.index | |
df['Nearest'] = df['Index'].apply(nearest) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment