Skip to content

Instantly share code, notes, and snippets.

@imironhead
Last active December 10, 2017 05:06
Show Gist options
  • Save imironhead/7031fb20bf6f8d0d4abca95fe5246ee2 to your computer and use it in GitHub Desktop.
Save imironhead/7031fb20bf6f8d0d4abca95fe5246ee2 to your computer and use it in GitHub Desktop.
code to get best weights for linear regression
def linear():
"""
"""
eigens = np.load('../dataset/v0_eigens.npz')
train_eigens = eigens['train_eigens']
issue_eigens = eigens['issue_eigens']
# 28 boolean a week, labels are the last week
# train = training, issue = testing, eigens = features
# it's my convention :)
train_labels = train_eigens[:, -28:]
train_eigens = train_eigens[:, :-28]
issue_eigens = issue_eigens[:, :-28]
# flatten the last week (of all users')
train_labels = np.reshape(train_labels, (np.prod(train_labels.shape), 1))
train_eigens_temp = []
issue_eigens_temp = []
# reshape, there may be some better ways like np.split?
for i in range(32):
k = i * 28
a = train_eigens[:, k:k+28].reshape((train_eigens.shape[0] * 28, 1))
b = issue_eigens[:, k:k+28].reshape((issue_eigens.shape[0] * 28, 1))
train_eigens_temp.append(a)
issue_eigens_temp.append(b)
train_eigens = np.concatenate(train_eigens_temp, axis=1)
issue_eigens = np.concatenate(issue_eigens_temp, axis=1)
# as the picture, assume there are N users in training set:
# xW = y
# x: (N * 28) rows * (32 weeks) columns
# W: (32) rows * (1) column
# y: (N * 28) rows * (1 week) columns
model = sklearn.linear_model.LinearRegression()
model.fit(train_eigens, train_labels)
guess = model.predict(issue_eigens)
save_labels(guess)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment