Skip to content

Instantly share code, notes, and snippets.

@singhrahuldps
singhrahuldps / recsys.py
Last active June 1, 2019 08:05
An implementation of a basic Recommendation System built using Embedding Matrices in a Neural Net
# required libraries - numpy, pandas, pytorch
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
# laoding the table as a pandas dataframe
ratings = pd.read_csv('ratings.csv')
# required libraries - numpy, pandas, pytorch
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random
# laoding the table as a pandas dataframe
ratings = pd.read_csv('ratings.csv')
# getting the three column names from a pandas dataframe
user_col, item_col, rating_col = ratings.columns
# this function returns a python dictionary
# which maps each id to a corresponding index value
def list_2_dict(id_list:list):
d={}
for id, index in zip(id_list, range(len(id_list))):
d[id] = index
return d
# splits ratings dataframe to training and validation dataframes
def get_data(ratings, valid_pct:float = 0.2):
# shuffle the indexes
ln = random.sample(range(0, len(ratings)), len(ratings))
# split based on the given validation set percentage
part = int(len(ln)*valid_pct)
valid_index = ln[0:part]
train_index = ln[part:]
# get a batch -> (user, item and rating arrays) from the dataframe
def get_batch(ratings, start:int, end:int):
return ratings[user_col][start:end].values, ratings[item_col][start:end].values, ratings[rating_col][start:end].values
# get list of unique user ids
users = sorted(list(set(ratings[user_col].values)))
# get list of unique item ids
items = sorted(list(set(ratings[item_col].values)))
# generate dict of correponding indexes for the user ids
user2idx = list_2_dict(users)
# generate dict of correponding indexes for the item ids
# neural net based on Embedding matrices
# model reference -> https://github.com/fastai/fastai/
class EmbeddingModel(nn.Module):
def __init__(self, n_factors, n_users, n_items, y_range, initialise = 0.01):
super().__init__()
self.y_range = y_range
self.u_weight = nn.Embedding(n_users, n_factors)
self.i_weight = nn.Embedding(n_items, n_factors)
self.u_bias = nn.Embedding(n_users, 1)
self.i_bias = nn.Embedding(n_items, 1)
# create a model object
# y_range has been extended(0-11) than required(1-10) to make the
# values lie in the linear region of the sigmoid function
model = EmbeddingModel(10, len(users), len(items), [0,11], initialise = 0.01).cuda()
# split the data, returns a list [train, valid]
data = get_data(ratings, 0.1)
# loss = mean((target_rating - predicted_rating)**2)
loss_function = nn.MSELoss()
def train(epochs = 10, bs = 64):
for epoch in range(epochs):
# training the model
i=0
total_loss = 0.0
ct = 0
while i < len(data[0]):
x1,x2,y = get_batch(data[0],i,i+bs)
i+=bs