This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import numpy as np | |
from functools import reduce | |
import pyspark.sql.functions as F | |
from pyspark.sql import Row | |
from pyspark.sql.functions import rand,col,when,concat,substring,lit,udf,lower,sum as ps_sum,count as ps_count,row_number | |
from pyspark.sql.window import * | |
from pyspark.sql import DataFrame | |
from pyspark.ml.feature import VectorAssembler,BucketedRandomProjectionLSH,VectorSlicer | |
from pyspark.sql.window import Window |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pandas to spark df, loading data | |
def equivalent_type(f): | |
''' | |
add more spark sql types like bigint ... | |
''' | |
if f == 'datetime64[ns]': return DateType() | |
elif f == 'int64': return LongType() | |
elif f == 'int32': return IntegerType() | |
elif f == 'float64': return FloatType() | |
else: return StringType() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
autoencoder_network = EN.Encoder([nb_movies,20,10],'sigmoid',0.1) | |
criterion = nn.MSELoss() | |
optimizer = optim.RMSprop(autoencoder_network.parameters(), lr = 0.01, weight_decay = 0.5) | |
nb_epoch = 10 | |
for epoch in range(1, nb_epoch + 1): | |
train_loss = 0 | |
s = 0. | |
# s is the number of users who rated at least 1 movies | |
for id_user in range(nb_users): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
def create_index_mapping(L): | |
''' | |
return reindexed dict on user and items | |
encoded indices starts from 1 | |
input: | |
* L: list of str | |
outputs: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import torch | |
import torch.nn as nn | |
import torch.nn.parallel | |
import torch.optim as optim | |
import torch.utils.data | |
from torch.autograd import Variable | |
import torch.nn.init as weight_init |