This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from metaflow import FlowSpec, step | |
class CalculateMean(FlowSpec): | |
@step | |
def start(self): | |
""" | |
Initializes a random dataset. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def check_prime(x): | |
""" | |
Convenient function that checks if a number is prime. | |
""" | |
if x > 1: | |
for i in range(2, x): | |
if (x % i) == 0: | |
return False | |
else: | |
return True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from metaflow import FlowSpec, Parameter, step | |
class CheckNumbers(FlowSpec): | |
cores = Parameter('cores', | |
help="Parallelize the operation in that many CPU cores.", | |
default=4) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from metaflow import FlowSpec, step | |
class CheckNumbers(FlowSpec): | |
@step | |
def start(self): | |
""" | |
Initializes a random dataset. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# load the data | |
col_names = ['user_id', 'movie_id', 'rating', 'timestamp'] | |
ratings_df = pd.read_csv('/tmp/ratings.dat', delimiter='::', names=col_names, engine='python') | |
# transform users and movies to categorical features | |
ratings_df['user_id'] = ratings_df['user_id'].astype('category') | |
ratings_df['movie_id'] = ratings_df['movie_id'].astype('category') | |
# use the category codes to avoid creating separate vocabularies | |
ratings_df['user_code'] = ratings_df['user_id'].cat.codes.astype(int) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
n_users = ratings_df['user_code'].max() + 1 | |
n_movies = ratings_df['movie_code'].max() + 1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data_df = ratings_df.sort_values(by='timestamp') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# more than 4 -> 1, less than 5 -> 0 | |
data_df['preference'] = np.where(data_df['rating'] > 4, 1, 0) | |
# keep only ones and discard the others | |
data_df_cleaned = data_df[(data_df['preference'] == 1)] | |
data_df_cleaned.head() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
net = SimpleCF(n_users, n_movies, factors=128, init=torch.nn.init.normal_, mean=0., std=.1) | |
objective = lambda pred, target: target - pred | |
optimizer = SGD(net.parameters(), lr=6e-2) | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
model = Step(net, objective, optimizer, device=device) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
pct = int(data_df_cleaned.shape[0] * .2) | |
bootstrapping_data = data_df_cleaned[:pct] | |
features = ['user_code', 'movie_code', 'rating'] | |
target = ['preference'] | |
data_set = TensorDataset(torch.tensor(bootstrapping_data[features].values), torch.tensor(bootstrapping_data[target].values)) | |
data_loader = DataLoader(data_set, batch_size=BATCH_SIZE, shuffle=False) | |
model.batch_fit(data_loader) |
OlderNewer