Skip to content

Instantly share code, notes, and snippets.

@ianlcassidy
Last active December 14, 2018 20:19
Show Gist options
  • Save ianlcassidy/04b884d6664acac89f1425cb169bcb1f to your computer and use it in GitHub Desktop.
Save ianlcassidy/04b884d6664acac89f1425cb169bcb1f to your computer and use it in GitHub Desktop.
import pandas as pd
from sklearn.metrics import euclidean_distances
from sklearn.preprocessing import StandardScaler
def get_hotel_recommendations(df: pd.DataFrame, anchor_id: int) -> pd.DataFrame:
# features used to compute the similarity
features = ['lat', 'lng', 'avg_rate', 'star_rating', 'user_rating']
# create the features - make the anchor be the first row in the dataframe
df_sorted = df.copy()
df_sorted = pd.concat([df_sorted[df_sorted['hotel_id'] == anchor_id],
df_sorted[df_sorted['hotel_id'] != anchor_id]])
df_features = df_sorted[features].copy()
df_features = normalize_features(df_features)
# compute the distances
X = df_features.values
Y = df_features.values[0].reshape(1, -1)
distances = euclidean_distances(X, Y)
df_sorted['similarity_distance'] = distances
return df_sorted.sort_values('similarity_distance').reset_index(drop=True)
def normalize_features(df: pd.DataFrame) -> pd.DataFrame:
df_norm = df.copy()
for col in df_norm.columns:
# fill any NaN's with the mean
df_norm[col] = df_norm[col].fillna(df_norm[col].mean())
df_norm[col] = StandardScaler().fit_transform(df_norm[col].values.reshape(-1, 1))
return df_norm
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment