Skip to content

Instantly share code, notes, and snippets.

@badalnabizade
Last active August 11, 2019 14:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save badalnabizade/d190e8016b2ee5d7f773cc7cefd84ceb to your computer and use it in GitHub Desktop.
Save badalnabizade/d190e8016b2ee5d7f773cc7cefd84ceb to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import Input, add, dot, Flatten, Embedding,Dropout, concatenate
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
ratings = pd.read_csv('./data/ratings.csv')
ratings.drop('timestamp', 1, inplace=True)
movies = pd.read_csv('./data/movies.csv')
# Those are the movies in movies.csv that doesn't exist in ratings.csv
movies_not_in_ratings = [i for i in movies.movieId if i not in ratings.movieId.unique()]
indcs = movies.set_index('movieId').loc[movies_not_in_ratings].index
movies = movies.set_index('movieId').drop(index=indcs).reset_index()
def proc_data_for_model(ratings_data, movie_col, user_col):
"""
Process ratings and movies datasets.
Changes userIds in ratings data with corresponding indices.
Changes movieIds in ratings and movies data with corresponding indices.
Returns: Number of unique users and number of unique movies in ratings dataset.
"""
unique_users = ratings[user_col].unique()
user_to_indx = {v:i for i,v in enumerate(unique_users)}
ratings[user_col] = ratings[user_col].apply(lambda val: user_to_indx[val])
n_users = len(unique_users)
unique_movies = ratings[movie_col].unique()
movie_to_indx = {o:i for i,o in enumerate(unique_movies)}
movies[movie_col] = movies[movie_col].apply(lambda val: movie_to_indx[val])
ratings[movie_col] = ratings[movie_col].apply(lambda val: movie_to_indx[val])
n_movies=len(unique_movies)
return n_users, n_movies
n_users, n_movies = proc_data_for_model(ratings, 'movieId', 'userId')
n_factors = 50 # size of embedding matrix.
def get_input(n_inputs, shape=(1,)):
inputs = [Input(shape=shape) for arg in range(n_inputs)]
return inputs
def get_emb(input_, input_dim, output_dim, inp_length, reg_func, name):
emb = Embedding(input_dim, output_dim, input_length=inp_length, embeddings_regularizer=reg_func, name=name)(input_)
return emb
def get_bias_emb(input_, input_dim):
bias_emb = Embedding(input_dim=input_dim, output_dim=1, input_length=1)(input_)
bias_emb = Flatten()(bias_emb)
return bias_emb
u_inp, m_inp = get_input(2)
u_emb = get_emb(u_inp, n_users, n_factors, 1, l2(1e-4), 'user_embedding')
m_emb = get_emb(m_inp, n_movies, n_factors, 1, l2(1e-4), 'movie_embedding')
u_b, m_b = get_bias_emb(u_inp, n_users), get_bias_emb(m_inp, n_movies)
opt = optimizers.SGD(1e-1, decay=2e-4, momentum=0.9)
x = dot([u_emb, m_emb], axes=2)
x = Flatten()(x)
x = add([x, u_b])
x = add([x, m_b])
model = models.Model([u_inp, m_inp], x)
model.compile(opt, loss='mse')
model.fit([train.userId, train.movieId], train.rating, batch_size=512, epochs=15,
validation_data=([val.userId, val.movieId], val.rating))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment