katipogluMustafa/load_netflix_movies.py

## load_netflix_movies.py
class NetflixDataset(Dataset):
    def load_movies(movies_path, movies_col_names=('item_id', 'year', 'title')):
        movies = pd.read_csv(movies_path, encoding='ISO-8859-1', header=None, names=movies_col_names).set_index('item_id')
        movies['year'].replace([np.inf, -np.inf, np.nan], 0, inplace=True)
        movies['year'] = movies['year'].astype(int)
        movies = movies.reindex(columns=['title', 'year'])

        # From the netflix prize dataset, I will only be using the first part which contains 4449 unique movies.
        # That is why I will be truncating the other movies but if you load all the netflix data, remove this line.
        movies = movies[:4499]    # Keep only the first 4499 movies of the dataset

        return movies
	class NetflixDataset(Dataset):
	def load_movies(movies_path, movies_col_names=('item_id', 'year', 'title')):
	movies = pd.read_csv(movies_path, encoding='ISO-8859-1', header=None, names=movies_col_names).set_index('item_id')
	movies['year'].replace([np.inf, -np.inf, np.nan], 0, inplace=True)
	movies['year'] = movies['year'].astype(int)
	movies = movies.reindex(columns=['title', 'year'])

	# From the netflix prize dataset, I will only be using the first part which contains 4449 unique movies.
	# That is why I will be truncating the other movies but if you load all the netflix data, remove this line.
	movies = movies[:4499] # Keep only the first 4499 movies of the dataset

	return movies