Skip to content

Instantly share code, notes, and snippets.

@onelharrison
Last active May 31, 2023 02:41
Show Gist options
  • Save onelharrison/15b25f0b4ece968be938ddaf8a5a64ad to your computer and use it in GitHub Desktop.
Save onelharrison/15b25f0b4ece968be938ddaf8a5a64ad to your computer and use it in GitHub Desktop.
from knn_from_scratch import knn, euclidean_distance
def recommend_movies(movie_query, k_recommendations):
raw_movies_data = []
with open('movies_recommendation_data.csv', 'r') as md:
# Discard the first line (headings)
next(md)
# Read the data into memory
for line in md.readlines():
data_row = line.strip().split(',')
raw_movies_data.append(data_row)
# Prepare the data for use in the knn algorithm by picking
# the relevant columns and converting the numeric columns
# to numbers since they were read in as strings
movies_recommendation_data = []
for row in raw_movies_data:
data_row = list(map(float, row[2:]))
movies_recommendation_data.append(data_row)
# Use the KNN algorithm to get the 5 movies that are most
# similar to The Post.
recommendation_indices, _ = knn(
movies_recommendation_data, movie_query, k=k_recommendations,
distance_fn=euclidean_distance, choice_fn=lambda x: None
)
movie_recommendations = []
for _, index in recommendation_indices:
movie_recommendations.append(raw_movies_data[index])
return movie_recommendations
if __name__ == '__main__':
the_post = [7.2, 1, 1, 0, 0, 0, 0, 1, 0] # feature vector for The Post
recommended_movies = recommend_movies(movie_query=the_post, k_recommendations=5)
# Print recommended movie titles
for recommendation in recommended_movies:
print(recommendation[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment