Rahul singhrahuldps

## recsys.py
# required libraries - numpy, pandas, pytorch
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random

# laoding the table as a pandas dataframe
ratings = pd.read_csv('ratings.csv')

## recsys1.py
# required libraries - numpy, pandas, pytorch
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import random

## recsys2.py
# laoding the table as a pandas dataframe
ratings = pd.read_csv('ratings.csv')

# getting the three column names from a pandas dataframe
user_col, item_col, rating_col = ratings.columns

## recsys3.py
# this function returns a python dictionary
# which maps each id to a corresponding index value
def list_2_dict(id_list:list):
    d={}
    for id, index in zip(id_list, range(len(id_list))):
        d[id] = index
    return d

## recsys4.py
# splits ratings dataframe to training and validation dataframes
def get_data(ratings, valid_pct:float = 0.2):
    # shuffle the indexes
    ln = random.sample(range(0, len(ratings)), len(ratings))

    # split based on the given validation set percentage
    part = int(len(ln)*valid_pct)

    valid_index = ln[0:part]
    train_index = ln[part:]

## recsys5.py
# get a batch -> (user, item and rating arrays) from the dataframe
def get_batch(ratings, start:int, end:int):
    return ratings[user_col][start:end].values, ratings[item_col][start:end].values, ratings[rating_col][start:end].values

## recsys6.py
# get list of unique user ids
users = sorted(list(set(ratings[user_col].values)))

# get list of unique item ids
items = sorted(list(set(ratings[item_col].values)))

# generate dict of correponding indexes for the user ids
user2idx = list_2_dict(users)

# generate dict of correponding indexes for the item ids

## recsys7.py
# neural net based on Embedding matrices
# model reference -> https://github.com/fastai/fastai/
class EmbeddingModel(nn.Module):
    def __init__(self, n_factors, n_users, n_items, y_range, initialise = 0.01):
        super().__init__()
        self.y_range = y_range
        self.u_weight = nn.Embedding(n_users, n_factors)
        self.i_weight = nn.Embedding(n_items, n_factors)
        self.u_bias = nn.Embedding(n_users, 1)
        self.i_bias = nn.Embedding(n_items, 1)

## recsys8.py
# create a model object
# y_range has been extended(0-11) than required(1-10) to make the
# values lie in the linear region of the sigmoid function
model = EmbeddingModel(10, len(users), len(items), [0,11], initialise = 0.01).cuda()

# split the data, returns a list [train, valid]
data = get_data(ratings, 0.1)

# loss = mean((target_rating - predicted_rating)**2)
loss_function = nn.MSELoss()

## recsys9.py
def train(epochs = 10, bs = 64):
    for epoch in range(epochs):

        # training the model
        i=0
        total_loss = 0.0
        ct = 0
        while i < len(data[0]):
            x1,x2,y = get_batch(data[0],i,i+bs)
            i+=bs
	# required libraries - numpy, pandas, pytorch
	import numpy as np
	import pandas as pd
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import random

	# laoding the table as a pandas dataframe
	ratings = pd.read_csv('ratings.csv')
	# laoding the table as a pandas dataframe
	ratings = pd.read_csv('ratings.csv')

	# getting the three column names from a pandas dataframe
	user_col, item_col, rating_col = ratings.columns
	# this function returns a python dictionary
	# which maps each id to a corresponding index value
	def list_2_dict(id_list:list):
	d={}
	for id, index in zip(id_list, range(len(id_list))):
	d[id] = index
	return d
	# splits ratings dataframe to training and validation dataframes
	def get_data(ratings, valid_pct:float = 0.2):
	# shuffle the indexes
	ln = random.sample(range(0, len(ratings)), len(ratings))

	# split based on the given validation set percentage
	part = int(len(ln)*valid_pct)

	valid_index = ln[0:part]
	train_index = ln[part:]
	# get a batch -> (user, item and rating arrays) from the dataframe
	def get_batch(ratings, start:int, end:int):
	return ratings[user_col][start:end].values, ratings[item_col][start:end].values, ratings[rating_col][start:end].values
	# get list of unique user ids
	users = sorted(list(set(ratings[user_col].values)))

	# get list of unique item ids
	items = sorted(list(set(ratings[item_col].values)))

	# generate dict of correponding indexes for the user ids
	user2idx = list_2_dict(users)

	# generate dict of correponding indexes for the item ids
	# neural net based on Embedding matrices
	# model reference -> https://github.com/fastai/fastai/
	class EmbeddingModel(nn.Module):
	def __init__(self, n_factors, n_users, n_items, y_range, initialise = 0.01):
	super().__init__()
	self.y_range = y_range
	self.u_weight = nn.Embedding(n_users, n_factors)
	self.i_weight = nn.Embedding(n_items, n_factors)
	self.u_bias = nn.Embedding(n_users, 1)
	self.i_bias = nn.Embedding(n_items, 1)
	# create a model object
	# y_range has been extended(0-11) than required(1-10) to make the
	# values lie in the linear region of the sigmoid function
	model = EmbeddingModel(10, len(users), len(items), [0,11], initialise = 0.01).cuda()

	# split the data, returns a list [train, valid]
	data = get_data(ratings, 0.1)

	# loss = mean((target_rating - predicted_rating)**2)
	loss_function = nn.MSELoss()
	def train(epochs = 10, bs = 64):
	for epoch in range(epochs):

	# training the model
	i=0
	total_loss = 0.0
	ct = 0
	while i < len(data[0]):
	x1,x2,y = get_batch(data[0],i,i+bs)
	i+=bs