jiobu1/updated_nn.py

## updated_nn.py
# Nearest Neighbor

import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

# Select columns that will be used to calculate nearest neighbors
# Added data points
# new = [Transit Score, Bike Score, Total_Schools, Private, Public district, Public Charter,
#        % Private, % Public, % Charter, Above Average or Better, % Performing Above Average or Better]

numeric = df.select_dtypes(['number'])
numeric = numeric.drop(columns=['lat', 'lon'])

# scaling data since the values are not the same, i.e 1000s for rent and populutaion versus numbers shown as percentages
scaler = StandardScaler()
standard_df = scaler.fit_transform(numeric)
standard_df = pd.DataFrame(standard_df, columns = numeric.columns)

# instantiate nearest neighbors algorithm and fit on scaled df
nn = NearestNeighbors(n_neighbors=6, algorithm='kd_tree', n_jobs=8)
nn.fit(standard_df)

# function to join list of nearest neighbors by id
def nearest(idx):
    return ','.join(map(str, nn.kneighbors([standard_df.iloc[idx]])[1][0][1:].tolist()))

# Apply function to each row of merged dataframe and create a Nearest column
df['Index'] = df.index
df['Nearest'] = df['Index'].apply(nearest)
	# Nearest Neighbor

	import numpy as np
	from sklearn.neighbors import NearestNeighbors
	from sklearn.preprocessing import StandardScaler
	from sklearn.pipeline import make_pipeline

	# Select columns that will be used to calculate nearest neighbors
	# Added data points
	# new = [Transit Score, Bike Score, Total_Schools, Private, Public district, Public Charter,
	# % Private, % Public, % Charter, Above Average or Better, % Performing Above Average or Better]

	numeric = df.select_dtypes(['number'])
	numeric = numeric.drop(columns=['lat', 'lon'])

	# scaling data since the values are not the same, i.e 1000s for rent and populutaion versus numbers shown as percentages
	scaler = StandardScaler()
	standard_df = scaler.fit_transform(numeric)
	standard_df = pd.DataFrame(standard_df, columns = numeric.columns)

	# instantiate nearest neighbors algorithm and fit on scaled df
	nn = NearestNeighbors(n_neighbors=6, algorithm='kd_tree', n_jobs=8)
	nn.fit(standard_df)

	# function to join list of nearest neighbors by id
	def nearest(idx):
	return ','.join(map(str, nn.kneighbors([standard_df.iloc[idx]])[1][0][1:].tolist()))

	# Apply function to each row of merged dataframe and create a Nearest column
	df['Index'] = df.index
	df['Nearest'] = df['Index'].apply(nearest)