Last active
December 10, 2017 05:06
-
-
Save imironhead/7031fb20bf6f8d0d4abca95fe5246ee2 to your computer and use it in GitHub Desktop.
code to get best weights for linear regression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def linear(): | |
""" | |
""" | |
eigens = np.load('../dataset/v0_eigens.npz') | |
train_eigens = eigens['train_eigens'] | |
issue_eigens = eigens['issue_eigens'] | |
# 28 boolean a week, labels are the last week | |
# train = training, issue = testing, eigens = features | |
# it's my convention :) | |
train_labels = train_eigens[:, -28:] | |
train_eigens = train_eigens[:, :-28] | |
issue_eigens = issue_eigens[:, :-28] | |
# flatten the last week (of all users') | |
train_labels = np.reshape(train_labels, (np.prod(train_labels.shape), 1)) | |
train_eigens_temp = [] | |
issue_eigens_temp = [] | |
# reshape, there may be some better ways like np.split? | |
for i in range(32): | |
k = i * 28 | |
a = train_eigens[:, k:k+28].reshape((train_eigens.shape[0] * 28, 1)) | |
b = issue_eigens[:, k:k+28].reshape((issue_eigens.shape[0] * 28, 1)) | |
train_eigens_temp.append(a) | |
issue_eigens_temp.append(b) | |
train_eigens = np.concatenate(train_eigens_temp, axis=1) | |
issue_eigens = np.concatenate(issue_eigens_temp, axis=1) | |
# as the picture, assume there are N users in training set: | |
# xW = y | |
# x: (N * 28) rows * (32 weeks) columns | |
# W: (32) rows * (1) column | |
# y: (N * 28) rows * (1 week) columns | |
model = sklearn.linear_model.LinearRegression() | |
model.fit(train_eigens, train_labels) | |
guess = model.predict(issue_eigens) | |
save_labels(guess) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment