Last active
November 15, 2020 22:11
-
-
Save katipogluMustafa/45879c6314798a5270fb8d881f4f767f to your computer and use it in GitHub Desktop.
Loads Netflix Prize Dataset Movies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NetflixDataset(Dataset): | |
def load_movies(self): | |
movies = self.__read_movies_data_from_file() | |
movies = NetflixDataset.__replace_invalid_years_with_zero(movies) | |
movies = NetflixDataset.__convert_movies_year_format_to_int(movies) | |
movies = NetflixDataset.__interchange_movies_title_and_year(movies) | |
return NetflixDataset.__get_only_first_quarter_of_all_movies_for_performance(movies) | |
def __init__(self, movies_file_path): | |
self.movies_file_path = movies_file_path | |
self.movies_column_names = ('item_id', 'year', 'title') | |
def __read_movies_data_from_file(self): | |
return pd.read_csv(self.movies_file_path, encoding='ISO-8859-1', header=None, | |
names=self.movies_column_names).set_index('item_id') | |
def __replace_invalid_years_with_zero(movies): | |
movies['year'].replace([np.inf, -np.inf, np.nan], 0, inplace=True) | |
return movies | |
def __convert_movies_year_format_to_int(movies): | |
movies['year'] = movies['year'].astype(int) | |
return movies | |
def __interchange_movies_title_and_year(movies): | |
return movies.reindex(columns=['title', 'year']) | |
def __get_only_first_quarter_of_all_movies_for_performance(movies): | |
return movies[:4499] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment