Skip to content

Instantly share code, notes, and snippets.

@Ricket
Created May 5, 2011 05:49
Show Gist options
  • Save Ricket/956598 to your computer and use it in GitHub Desktop.
Save Ricket/956598 to your computer and use it in GitHub Desktop.
MA 493 Assignment 7
clear all;
% load the "MLtraining" matrix
load('Examtraining.mat');
% movies.dat: replace :: with @
tic;
movies_dat = fopen('movies.dat');
movies = textscan(movies_dat, '%d %s %s', 'delimiter', '@');
[m_ids m_names m_genres] = movies{:};
m_num = length(movies{1});
disp(['Loaded movies.dat in ',num2str(toc),' sec']);
% movies: {id name genres}
% ratings.dat: replace :: with @ (it will take a while!)
tic;
ratings_dat = fopen('ratings.dat');
ratings = textscan(ratings_dat, '%d %d %d %d', 'delimiter', '@');
[r_userids r_movieids r_ratings r_timestamps] = ratings{:};
r_num = length(ratings{1});
disp(['Loaded ratings.dat in ',num2str(toc),' sec']);
% ratings: {userid movieid rating timestamp}
% users.dat: replace :: with @, then replace - with nothing
tic;
users_dat = fopen('users.dat');
users = textscan(users_dat, '%d %c %d %d %s', 'delimiter', '@');
[u_ids u_genders u_ages u_occupations u_zipcodes] = users{:};
u_num = length(users{1});
disp(['Loaded users.dat in ',num2str(toc),' sec']);
% users: {id gender age occupation zipcode}
fclose('all');
% Now begin the training process
% Build a cell array of all genres
% Also simultaneously generate the movies-genres matrix
tic;
genres = {};
mat_movies_genres = zeros(1, 1);
for movidx=1:m_num
% Split the genres of this movie into a cell array of individual genres
this_genres = regexp(m_genres{movidx},'[|]','split');
this_genres_num = length(this_genres);
for m_genre_idx=1:this_genres_num
if isempty(find(ismember(genres,this_genres(m_genre_idx))==1, 1))
% Genre hadn't yet been found; add it to genres
% To concatenate: genres = [genres 'Blah'];
genres = [genres this_genres(m_genre_idx)];
genre_idx = length(genres);
else
genre_idx = find(ismember(genres,this_genres(m_genre_idx))==1, 1);
end
mat_movies_genres(movidx,genre_idx) = 1;
end
end
genres_num = length(genres);
disp(['Generated movies-genres matrix in ',num2str(toc),' sec']);
% Build the genres-[user attributes] matrices
tic;
mat_genres_genders = zeros(genres_num, 2, 2);
mat_genres_ages = zeros(genres_num, 1, 2);
mat_genres_occupations = zeros(genres_num, 1, 2);
% (ignore zipcodes, it's mostly irrelevant)
% The third dimension is used to store sum, num respectively; then
% sum/num = avg which is what we want.
% TODO: the loop
disp(['Generated genres-userattribs matrices in ',num2str(toc),' sec']);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment