Skip to content

Instantly share code, notes, and snippets.

View Wann-Jiun's full-sized avatar

Wann-Jiun Ma Wann-Jiun

View GitHub Profile
# creating matrices for sklearn:
x_train = np.array(train_df_munged)
x_test = np.array(test_df_munged)
y_train = label_df.values
ntrain = x_train.shape[0]
ntest = x_test.shape[0]
kf = KFold(ntrain, n_folds=NFOLDS, shuffle=True, random_state=SEED)
from sklearn.metrics import mean_squared_error
prediction = similarity_user.dot(train_matrix) / np.array([np.abs(similarity_user).sum(axis=1)]).T
prediction = prediction[test_matrix.nonzero()].flatten()
test_vector = test_matrix[test_matrix.nonzero()].flatten()
mse = mean_squared_error(prediction, test_vector)
print 'MSE = ' + str(mse)
@Wann-Jiun
Wann-Jiun / xgboost_wind.ipynb
Last active April 18, 2017 02:30
Xgboost wind power prediction
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@Wann-Jiun
Wann-Jiun / time_series.ipynb
Created April 18, 2017 02:26
Time series models
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
quality_dict = {None: 0, "Po": 1, "Fa": 2, "TA": 3, "Gd": 4, "Ex": 5}
train_df["ExterQual"] = df["ExterQual"].map(quality_dict).astype(int)
train_df["ExterCond"] = df["ExterCond"].map(quality_dict).astype(int)
train_df["BsmtQual"] = df["BsmtQual"].map(quality_dict).astype(int)
train_df["BsmtCond"] = df["BsmtCond"].map(quality_dict).astype(int)
train_df["HeatingQC"] = df["HeatingQC"].map(quality_dict).astype(int)
train_df["KitchenQual"] = df["KitchenQual"].map(quality_dict).astype(int)
train_df["FireplaceQu"] = df["FireplaceQu"].map(quality_dict).astype(int)
train_df["GarageQual"] = df["GarageQual"].map(quality_dict).astype(int)
import requests
import json
from IPython.display import Image
from IPython.display import display
from IPython.display import HTML
idx_to_movie = {}
for row in df_id.itertuples():
idx_to_movie[row[1]-1] = row[2]
similarity_user = train_matrix.dot(train_matrix.T) + 1e-9
norms = np.array([np.sqrt(np.diagonal(similarity_user))])
similarity_user = ( similarity_user / (norms * norms.T) )
similarity_movie = train_matrix.T.dot(train_matrix) + 1e-9
norms = np.array([np.sqrt(np.diagonal(similarity_movie))])
similarity_movie = ( similarity_movie / (norms * norms.T) )
df_id = pd.read_csv('links.csv', sep=',')
idx_to_movie = {}
for row in df_id.itertuples():
idx_to_movie[row[1]-1] = row[2]
total_movies = 9000
movies = [0]*total_movies
for i in range(len(movies)):
sparsity = float(len(ratings.nonzero()[0]))
sparsity /= (ratings.shape[0] * ratings.shape[1])
sparsity *= 100
train_matrix = rating_matrix.copy()
test_matrix = np.zeros(ratings_matrix.shape)
for i in xrange(rating_matrix.shape[0]):
rating_idx = np.random.choice(
rating_matrix[i, :].nonzero()[0],
size=10,
replace=True)
train_matrix[i, rating_idx] = 0.0
test_matrix[i, rating_idx] = rating_matrix[i, rating_idx]