Skip to content

Instantly share code, notes, and snippets.

@tatocaster
Last active August 15, 2023 09:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tatocaster/dd9ac131123a742a690d41768ac6bcab to your computer and use it in GitHub Desktop.
Save tatocaster/dd9ac131123a742a690d41768ac6bcab to your computer and use it in GitHub Desktop.
based on Nintendo game data, cluster by features and get similar games
import pandas as pd
from sklearn.cluster import KMeans
# Read the video games dataset into a DataFrame
data = pd.read_json('../data/nintendo-games.json')
features = data[['meta_score', 'esrb_rating', 'genres']]
# convert categorical features to numerical using one-hot encoding
features = pd.get_dummies(features, columns=['meta_score', 'esrb_rating', 'genres'])
# use k-means clustering to group similar games together
km = KMeans(n_clusters=20, n_init=10)
km.fit(features)
# add the cluster labels to the dataframe
data['cluster'] = km.labels_
def recommend_games(favorite_games, num_recommendations=5):
"""
Recommends games based on input favorite games using KMeans clustering.
Parameters:
favorite_games (list): List of favorite game names.
features (list): List of feature names to use for clustering.
num_clusters (int): Number of clusters to use in KMeans clustering. Default is 5.
num_recommendations (int): Number of recommended games to return for each favorite game. Default is 10.
Returns:
dict: A set containing a list of names of recommended games from the same cluster as the game.
"""
# Identify the clusters of the favorite games
favorite_games_data = data[data['title'].isin(favorite_games)]
if favorite_games_data.empty:
raise ValueError("No data found for the favorite games.")
# Find other games in the same clusters as favorite games
recommended_games_set = set()
for favorite_game_row in favorite_games_data.itertuples():
similar_games = data[data['cluster'] == favorite_game_row.cluster]
similar_games = similar_games[similar_games['title'] != favorite_game_row.title] # Exclude favorite game
similar_games = data.loc[similar_games.index] # Get the full rows of similar games
similar_games_sorted = similar_games.sort_values(['meta_score', 'msrp'], ascending=False)
recommended_games = similar_games_sorted.head(num_recommendations)['title'].tolist()
recommended_games_set.update(recommended_games)
# Remove the favorite games from the recommended games set
recommended_games_set.difference_update(set(favorite_games))
return recommended_games_set
# Example usage:
favorite_games = [
]
recommended_games = recommend_games(favorite_games)
print(recommended_games)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment