Last active
April 26, 2021 00:43
-
-
Save jiobu1/719a1016f6e6c6fcd994d04115dd80be to your computer and use it in GitHub Desktop.
Nearest Neigbor algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Nearest Neighbor | |
import numpy as np | |
from sklearn.neighbors import NearestNeighbors | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.pipeline import make_pipeline | |
# Select columns that will be used to calculate nearest neighbors | |
X = merged[[ | |
'TotalPop', 'Men', 'Women', 'Hispanic', 'White', | |
'Black', 'Native', 'Asian', 'Pacific', 'Diversity Index', | |
'Income', 'IncomeErr', 'IncomePerCap', 'IncomePerCapErr', | |
'Poverty', 'ChildPoverty', | |
'Employed', 'Unemployment', | |
'PrivateWork', 'PublicWork', 'SelfEmployed','FamilyWork', | |
'Professional', 'Service', 'Office', 'Construction','Production', | |
'Drive', 'Carpool', 'Transit', 'Walk', 'OtherTransp', 'WorkAtHome', 'MeanCommute', | |
'Violent crime', 'Murder and nonnegligent manslaughter', 'Rape', | |
'Robbery', 'Aggravated assault', 'Property crime', 'Burglary', | |
'Larceny- theft', 'Motor vehicle theft', 'Arson', 'Crime Rate per 1000', | |
'Rent', | |
'Days with AQI','Good Days', 'Moderate Days', | |
'Unhealthy for Sensitive Groups Days', | |
'Unhealthy Days', 'Very Unhealthy Days', 'Hazardous Days', | |
'Max AQI', '90th Percentile AQI', 'Median AQI', 'Days CO', | |
'Days NO2','Days Ozone', 'Days SO2', 'Days PM2.5', 'Days PM10' | |
]] | |
# scaling data since the values are not the same, i.e 1000s for rent and populutaion versus numbers shown as percentages | |
scaler = StandardScaler() | |
standard_df = scaler.fit_transform(X) | |
standard_df = pd.DataFrame(standard_df, columns = X.columns) | |
# instantiate nearest neighbors algorithm and fit on scaled df | |
nn = NearestNeighbors(n_neighbors=6, algorithm='kd_tree', n_jobs=8) | |
nn.fit(standard_df) | |
# function to join list of nearest neighbors by id | |
def nearest(idx): | |
return ','.join(map(str, nn.kneighbors([standard_df.iloc[idx]])[1][0][1:].tolist())) | |
# Apply function to each row of merged dataframe and create a Nearest column | |
merged['Index'] = merged.index | |
merged['Nearest'] = merged['Index'].apply(nearest) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment