Skip to content

Instantly share code, notes, and snippets.

View hwang018's full-sized avatar
🎯
Focusing

hwang hwang018

🎯
Focusing
View GitHub Profile
import random
import numpy as np
from functools import reduce
import pyspark.sql.functions as F
from pyspark.sql import Row
from pyspark.sql.functions import rand,col,when,concat,substring,lit,udf,lower,sum as ps_sum,count as ps_count,row_number
from pyspark.sql.window import *
from pyspark.sql import DataFrame
from pyspark.ml.feature import VectorAssembler,BucketedRandomProjectionLSH,VectorSlicer
from pyspark.sql.window import Window
# pandas to spark df, loading data
def equivalent_type(f):
'''
add more spark sql types like bigint ...
'''
if f == 'datetime64[ns]': return DateType()
elif f == 'int64': return LongType()
elif f == 'int32': return IntegerType()
elif f == 'float64': return FloatType()
else: return StringType()
autoencoder_network = EN.Encoder([nb_movies,20,10],'sigmoid',0.1)
criterion = nn.MSELoss()
optimizer = optim.RMSprop(autoencoder_network.parameters(), lr = 0.01, weight_decay = 0.5)
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
train_loss = 0
s = 0.
# s is the number of users who rated at least 1 movies
for id_user in range(nb_users):
@hwang018
hwang018 / load_movielens.py
Created July 21, 2020 02:08
Snippet to load movielens data and preprocess
import pandas as pd
def create_index_mapping(L):
'''
return reindexed dict on user and items
encoded indices starts from 1
input:
* L: list of str
outputs:
@hwang018
hwang018 / autoencoder_class.py
Last active July 24, 2020 03:28
Autoencoder
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
import torch.nn.init as weight_init