ianlcassidy/hotel_recommendations1.py

## hotel_recommendations1.py
import pandas as pd
from sklearn.metrics import euclidean_distances
from sklearn.preprocessing import StandardScaler


def get_hotel_recommendations(df: pd.DataFrame, anchor_id: int) -> pd.DataFrame:
    # features used to compute the similarity
    features = ['lat', 'lng', 'avg_rate', 'star_rating', 'user_rating']

    # create the features - make the anchor be the first row in the dataframe
    df_sorted = df.copy()
    df_sorted = pd.concat([df_sorted[df_sorted['hotel_id'] == anchor_id],
                           df_sorted[df_sorted['hotel_id'] != anchor_id]])
    df_features = df_sorted[features].copy()
    df_features = normalize_features(df_features)

    # compute the distances
    X = df_features.values
    Y = df_features.values[0].reshape(1, -1)
    distances = euclidean_distances(X, Y)

    df_sorted['similarity_distance'] = distances
    return df_sorted.sort_values('similarity_distance').reset_index(drop=True)


def normalize_features(df: pd.DataFrame) -> pd.DataFrame:
    df_norm = df.copy()
    for col in df_norm.columns:
        # fill any NaN's with the mean
        df_norm[col] = df_norm[col].fillna(df_norm[col].mean())
        df_norm[col] = StandardScaler().fit_transform(df_norm[col].values.reshape(-1, 1))
    return df_norm
	import pandas as pd
	from sklearn.metrics import euclidean_distances
	from sklearn.preprocessing import StandardScaler


	def get_hotel_recommendations(df: pd.DataFrame, anchor_id: int) -> pd.DataFrame:
	# features used to compute the similarity
	features = ['lat', 'lng', 'avg_rate', 'star_rating', 'user_rating']

	# create the features - make the anchor be the first row in the dataframe
	df_sorted = df.copy()
	df_sorted = pd.concat([df_sorted[df_sorted['hotel_id'] == anchor_id],
	df_sorted[df_sorted['hotel_id'] != anchor_id]])
	df_features = df_sorted[features].copy()
	df_features = normalize_features(df_features)

	# compute the distances
	X = df_features.values
	Y = df_features.values[0].reshape(1, -1)
	distances = euclidean_distances(X, Y)

	df_sorted['similarity_distance'] = distances
	return df_sorted.sort_values('similarity_distance').reset_index(drop=True)


	def normalize_features(df: pd.DataFrame) -> pd.DataFrame:
	df_norm = df.copy()
	for col in df_norm.columns:
	# fill any NaN's with the mean
	df_norm[col] = df_norm[col].fillna(df_norm[col].mean())
	df_norm[col] = StandardScaler().fit_transform(df_norm[col].values.reshape(-1, 1))
	return df_norm