Skip to content

Instantly share code, notes, and snippets.

@jfkirk
Created January 18, 2019 01:42
Show Gist options
  • Save jfkirk/90928701b75ea1ddf6f1ec64fd4bd969 to your computer and use it in GitHub Desktop.
Save jfkirk/90928701b75ea1ddf6f1ec64fd4bd969 to your computer and use it in GitHub Desktop.
# Open and read in the ratings file
print('Loading ratings')
with open('ratings.csv', 'r') as ratings_file:
ratings_file_reader = csv.reader(ratings_file)
raw_ratings = list(ratings_file_reader)
raw_ratings_header = raw_ratings.pop(0)
# Iterate through the input to map MovieLens IDs to new internal IDs
# The new internal IDs will be created by the defaultdict on insertion
movielens_to_internal_user_ids = defaultdict(lambda: len(movielens_to_internal_user_ids))
movielens_to_internal_item_ids = defaultdict(lambda: len(movielens_to_internal_item_ids))
for row in raw_ratings:
row[0] = movielens_to_internal_user_ids[int(row[0])]
row[1] = movielens_to_internal_item_ids[int(row[1])]
row[2] = float(row[2])
n_users = len(movielens_to_internal_user_ids)
n_items = len(movielens_to_internal_item_ids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment