This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NetflixDataset(Dataset): | |
def load_movies(movies_path, movies_col_names=('item_id', 'year', 'title')): | |
movies = pd.read_csv(movies_path, encoding='ISO-8859-1', header=None, names=movies_col_names).set_index('item_id') | |
movies['year'].replace([np.inf, -np.inf, np.nan], 0, inplace=True) | |
movies['year'] = movies['year'].astype(int) | |
movies = movies.reindex(columns=['title', 'year']) | |
# From the netflix prize dataset, I will only be using the first part which contains 4449 unique movies. | |
# That is why I will be truncating the other movies but if you load all the netflix data, remove this line. | |
movies = movies[:4499] # Keep only the first 4499 movies of the dataset |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NetflixDataset(Dataset): | |
def load_movies(self): | |
movies = self.__read_movies_data_from_file() | |
movies = NetflixDataset.__replace_invalid_years_with_zero(movies) | |
movies = NetflixDataset.__convert_movies_year_format_to_int(movies) | |
movies = NetflixDataset.__interchange_movies_title_and_year(movies) | |
return NetflixDataset.__get_only_first_quarter_of_all_movies_for_performance(movies) | |
def __init__(self, movies_file_path): | |
self.movies_file_path = movies_file_path |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class TimeConstraint: | |
""" | |
TimeConstraint is a constraint on the timestamp of the movie ratings. | |
We classify a TimeConstraint as either max_time_constraint or time_bin_constraint. | |
max_time_constraint is used to simulate real life in which we do not know the future but all the data up until one point in time. | |
time_bin_constraint is used to grab a portion of a time interval where starting and ending points are strictly defined and data is well known. | |
""" | |
def __init__(self, end_dt, start_dt=None): | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from constraints.interval import Interval, TimebinInterval, MaxLimitInterval | |
class TimeConstraint: | |
def is_valid_timebin(interval: Interval): | |
return TimeConstraint.__is_expected_class(interval, TimebinInterval) and interval.is_valid() | |
def is_valid_max_limit(interval: Interval): | |
return TimeConstraint.__is_expected_class(interval, MaxLimitInterval) and interval.is_valid() | |
def __is_expected_class(interval: Interval, target_class: type): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class Interval: | |
def __init__(self, interval_beginning_datetime=None, interval_end_datetime=None): | |
self.__interval_beginning = interval_beginning_datetime | |
self.__interval_end = interval_end_datetime | |
def get_interval(self): | |
return self.__interval_beginning, self.__interval_end | |
def is_valid(self): | |
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NetflixDataset(Dataset): | |
def load_ratings(ratings_path, ratings_col_names): | |
if not os.path.isfile(ratings_path) or not ratings_col_names: | |
return None | |
ratings_raw = pd.read_csv(ratings_path, header=None, names=['user_id', 'rating', 'timestamp'], usecols=[0, 1, 2]) | |
ratings_raw['rating'] = ratings_raw['rating'].astype(float) | |
# Find empty rows to slice dataframe for each movie | |
temp_movies = ratings_raw[ratings_raw['rating'].isna()]['user_id'].reset_index() | |
movie_indexes = [[index, int(movie[:-1])] for index, movie in temp_movies.values] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
class NetflixDataset(Dataset): | |
def load_ratings(self): | |
if not Dataset.is_valid_input_file(self.ratings_file_path): | |
raise InvalidDatasetInputFilePath | |
unstructured_ratings = self.__read_ratings_data_from_file() | |
ratings = NetflixDataset.__structure_ratings_dataframe(unstructured_ratings) | |
ratings = NetflixDataset.__reduce_dataset_size_by_removing_low_active_user_data(ratings) | |
Dataset.sort_ratings_by_timestamp(ratings) | |
return ratings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Source: https://github.com/NicolasHug/Surprise/blob/master/surprise/prediction_algorithms/matrix_factorization.pyx | |
""" | |
the :mod:`matrix_factorization` module includes some algorithms using matrix | |
factorization. | |
""" | |
from __future__ import (absolute_import, division, print_function, | |
unicode_literals) | |
cimport numpy as np # noqa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="utf-8"?> | |
<manifest xmlns:android="http://schemas.android.com/apk/res/android" | |
package="com.teknoarktik.android.greenhouse"> | |
<!-- Put the following two uses permissions statemens for remote db connection --> | |
<uses-permission android:name="android.permission.INTERNET" /> | |
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" /> | |
<application |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.sql.Connection; | |
import java.sql.DriverManager; | |
public class Database { | |
private Connection connection; | |
// For Amazon Postgresql | |
// private final String host = "ssprojectinstance.csv2nbvvgbcb.us-east-2.rds.amazonaws.com" | |
OlderNewer