Last active
December 14, 2018 20:19
-
-
Save ianlcassidy/04b884d6664acac89f1425cb169bcb1f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sklearn.metrics import euclidean_distances | |
from sklearn.preprocessing import StandardScaler | |
def get_hotel_recommendations(df: pd.DataFrame, anchor_id: int) -> pd.DataFrame: | |
# features used to compute the similarity | |
features = ['lat', 'lng', 'avg_rate', 'star_rating', 'user_rating'] | |
# create the features - make the anchor be the first row in the dataframe | |
df_sorted = df.copy() | |
df_sorted = pd.concat([df_sorted[df_sorted['hotel_id'] == anchor_id], | |
df_sorted[df_sorted['hotel_id'] != anchor_id]]) | |
df_features = df_sorted[features].copy() | |
df_features = normalize_features(df_features) | |
# compute the distances | |
X = df_features.values | |
Y = df_features.values[0].reshape(1, -1) | |
distances = euclidean_distances(X, Y) | |
df_sorted['similarity_distance'] = distances | |
return df_sorted.sort_values('similarity_distance').reset_index(drop=True) | |
def normalize_features(df: pd.DataFrame) -> pd.DataFrame: | |
df_norm = df.copy() | |
for col in df_norm.columns: | |
# fill any NaN's with the mean | |
df_norm[col] = df_norm[col].fillna(df_norm[col].mean()) | |
df_norm[col] = StandardScaler().fit_transform(df_norm[col].values.reshape(-1, 1)) | |
return df_norm |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment