Skip to content

Instantly share code, notes, and snippets.

View katipogluMustafa's full-sized avatar
🎯
Focusing

Mustafa Katipoğlu katipogluMustafa

🎯
Focusing
View GitHub Profile
@katipogluMustafa
katipogluMustafa / load_netflix_movies.py
Last active November 15, 2020 22:10
Loads Netflix Prize Dataset Movies
class NetflixDataset(Dataset):
def load_movies(movies_path, movies_col_names=('item_id', 'year', 'title')):
movies = pd.read_csv(movies_path, encoding='ISO-8859-1', header=None, names=movies_col_names).set_index('item_id')
movies['year'].replace([np.inf, -np.inf, np.nan], 0, inplace=True)
movies['year'] = movies['year'].astype(int)
movies = movies.reindex(columns=['title', 'year'])
# From the netflix prize dataset, I will only be using the first part which contains 4449 unique movies.
# That is why I will be truncating the other movies but if you load all the netflix data, remove this line.
movies = movies[:4499] # Keep only the first 4499 movies of the dataset
@katipogluMustafa
katipogluMustafa / load_netflix_movies2.py
Last active November 15, 2020 22:11
Loads Netflix Prize Dataset Movies
class NetflixDataset(Dataset):
def load_movies(self):
movies = self.__read_movies_data_from_file()
movies = NetflixDataset.__replace_invalid_years_with_zero(movies)
movies = NetflixDataset.__convert_movies_year_format_to_int(movies)
movies = NetflixDataset.__interchange_movies_title_and_year(movies)
return NetflixDataset.__get_only_first_quarter_of_all_movies_for_performance(movies)
def __init__(self, movies_file_path):
self.movies_file_path = movies_file_path
class TimeConstraint:
"""
TimeConstraint is a constraint on the timestamp of the movie ratings.
We classify a TimeConstraint as either max_time_constraint or time_bin_constraint.
max_time_constraint is used to simulate real life in which we do not know the future but all the data up until one point in time.
time_bin_constraint is used to grab a portion of a time interval where starting and ending points are strictly defined and data is well known.
"""
def __init__(self, end_dt, start_dt=None):
"""
from constraints.interval import Interval, TimebinInterval, MaxLimitInterval
class TimeConstraint:
def is_valid_timebin(interval: Interval):
return TimeConstraint.__is_expected_class(interval, TimebinInterval) and interval.is_valid()
def is_valid_max_limit(interval: Interval):
return TimeConstraint.__is_expected_class(interval, MaxLimitInterval) and interval.is_valid()
def __is_expected_class(interval: Interval, target_class: type):
class Interval:
def __init__(self, interval_beginning_datetime=None, interval_end_datetime=None):
self.__interval_beginning = interval_beginning_datetime
self.__interval_end = interval_end_datetime
def get_interval(self):
return self.__interval_beginning, self.__interval_end
def is_valid(self):
return False
class NetflixDataset(Dataset):
def load_ratings(ratings_path, ratings_col_names):
if not os.path.isfile(ratings_path) or not ratings_col_names:
return None
ratings_raw = pd.read_csv(ratings_path, header=None, names=['user_id', 'rating', 'timestamp'], usecols=[0, 1, 2])
ratings_raw['rating'] = ratings_raw['rating'].astype(float)
# Find empty rows to slice dataframe for each movie
temp_movies = ratings_raw[ratings_raw['rating'].isna()]['user_id'].reset_index()
movie_indexes = [[index, int(movie[:-1])] for index, movie in temp_movies.values]
class NetflixDataset(Dataset):
def load_ratings(self):
if not Dataset.is_valid_input_file(self.ratings_file_path):
raise InvalidDatasetInputFilePath
unstructured_ratings = self.__read_ratings_data_from_file()
ratings = NetflixDataset.__structure_ratings_dataframe(unstructured_ratings)
ratings = NetflixDataset.__reduce_dataset_size_by_removing_low_active_user_data(ratings)
Dataset.sort_ratings_by_timestamp(ratings)
return ratings
# Source: https://github.com/NicolasHug/Surprise/blob/master/surprise/prediction_algorithms/matrix_factorization.pyx
"""
the :mod:`matrix_factorization` module includes some algorithms using matrix
factorization.
"""
from __future__ import (absolute_import, division, print_function,
unicode_literals)
cimport numpy as np # noqa
@katipogluMustafa
katipogluMustafa / AndroidManifest.xml
Created November 30, 2020 19:17
android_jdbc_connection_1
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
package="com.teknoarktik.android.greenhouse">
<!-- Put the following two uses permissions statemens for remote db connection -->
<uses-permission android:name="android.permission.INTERNET" />
<uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
<application
@katipogluMustafa
katipogluMustafa / Database.java
Last active January 22, 2022 09:10
android_jdbc_connection_2
import java.sql.Connection;
import java.sql.DriverManager;
public class Database {
private Connection connection;
// For Amazon Postgresql
// private final String host = "ssprojectinstance.csv2nbvvgbcb.us-east-2.rds.amazonaws.com"