Skip to content

Instantly share code, notes, and snippets.

@maheshakya
Last active August 29, 2015 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save maheshakya/05e22a38c03cbda18f2b to your computer and use it in GitHub Desktop.
Save maheshakya/05e22a38c03cbda18f2b to your computer and use it in GitHub Desktop.
from scipy import sparse as sp
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
import pickle
#loads data from movielens data matrix.
#After extracting the compressed file, you will get a ratings.dat file
#movilelens site(http://grouplens.org/datasets/movielens/) has all information you need to read
#here I have used 10M data set
data_file = pd.read_table(r'ratings.dat', sep = '::', header=None)
users = np.unique(data_file[0])
movies = np.unique(data_file[1])
number_of_rows = len(users)
number_of_columns = len(movies)
#scipy sparse matrix to store the 10M matrix
V = sp.lil_matrix((number_of_rows, number_of_columns))
movie_indices, user_indices = {}, {}
for i in range(len(movies)):
movie_indices[movies.values[i]] = i
for i in range(len(users)):
user_indices[users.values[i]] = i
#adds data into the sparse matrix
for line in data_file.values:
u, i , r , gona = map(int,line)
V[user_indices[u], movie_indices[i]] = r
#as these operations consume a lot of time, it's better to save processed data
with open('movielens_10M.pickle', 'wb') as handle:
pickle.dump(V, handle)
#gets SVD components from 10M matrix
u,s, vt = svds(V, k = 100)
with open('movielens_10M_svd_u.pickle', 'wb') as handle:
pickle.dump(u, handle)
with open('movielens_10M_svd_s.pickle', 'wb') as handle:
pickle.dump(s, handle)
with open('movielens_10M_svd_vt.pickle', 'wb') as handle:
pickle.dump(vt, handle)
@maheshakya
Copy link
Author

Shape of V = (69878, 10677)

Shapes of u, s and vt are as follows:

u : (69878, 100)
s : (100, )
vt: (100, 10677)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment