Last active
December 1, 2016 12:56
-
-
Save jscrane/4591a1d11a655a20d53f5d3263d6cbd2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
# Predicts the label based on the sum of the observations with tolerance around 0 | |
# | |
# ('Training accuracy for prediction +1:', 0.64809384164222872) | |
# ('Training accuracy for prediction -1:', 0.37745098039215685) | |
# ('Training accuracy for prediction 0:', 0.3524945770065076) | |
# | |
# Grader Score: 44.29% | |
def get_labeled_windowed_data(observations, window_size=7): | |
""" | |
Split up the observations into windowed chunks. Each windowed chunk of | |
observations is associated with a label vector of what the price change is | |
per market *immediately after* the windowed chunk (+1 for price goes up, | |
0 for no change, and -1 for price goes down). Thus, a classifier's task | |
for the data is given a windowed chunk, to predict what its label is | |
(i.e., given recent percent changes in all the markets, predict the | |
directions of the next price changes per market). | |
Inputs | |
------ | |
- observations: 2D array; each column is a percent-change time series data | |
for a specific market | |
- window_size: how large the window is (in number of time points) | |
Outputs | |
------- | |
- windows: 3D array; each element of the outermost array is a 2D array | |
of the same format as `observations` except where the number of time | |
points is exactly `window_size` | |
- window_labels: 2D array; `window_labels[i]` is a 1D vector of labels | |
corresponding to the time point *after* the window specified by | |
`windows[i]`; `window_labels[i]` says what the price change is for | |
each market (+1 for going up, 0 for staying the same, and -1 for going | |
down) | |
*WARNING*: Note that the training data produced here is inherently not | |
i.i.d. in that `windows[0]` and `windows[1]`, for instance, will largely | |
overlap! | |
""" | |
num_time_points, num_markets = observations.shape | |
windows = [] | |
window_labels = [] | |
for start_idx in range(num_time_points-window_size): | |
windows.append(observations[start_idx:start_idx+window_size]) | |
window_labels.append(1*(observations[start_idx+window_size] > 0) | |
-1*(observations[start_idx+window_size] < 0)) | |
windows = np.array(windows) | |
window_labels = np.array(window_labels) | |
return windows, window_labels | |
# global variables to be saved for the trained classifier | |
guess = None | |
def label_of(x): | |
if x == 0: | |
return 0 | |
elif x > 0: | |
return 1 | |
else: | |
return -1 | |
def train(windows, window_labels): | |
""" | |
Your training procedure goes here! It should train a classifier where you | |
store whatever you want to store for the trained classifier as *global* | |
variables. `train` will get called exactly once on the exact same training | |
data you have access to. However, you will not get access to the mystery | |
test data. | |
Inputs | |
------ | |
- windows, window_labels: see the documentation for the output of | |
`get_labeled_windowed_data` | |
""" | |
# ------------------------------------------------------------------------- | |
# YOUR CODE HERE | |
# | |
# The autograder wants you to explicitly state which variables are global | |
# and are supposed to thus be saved after training for use with prediction. | |
global guess | |
guess = {-1: -10, 1: 10} | |
emps = {} | |
for i in range(len(windows)): | |
window = windows[i] | |
labels = window_labels[i] | |
for m in range(len(labels)): | |
w = window[:,m] | |
s = sum(w) | |
if s < 0 and guess[-1] < s: | |
guess[-1] = s | |
elif s > 0 and guess[1] > s: | |
guess[1] = s | |
print(guess) | |
# | |
# END OF YOUR CODE | |
# ------------------------------------------------------------------------- | |
def forecast(window): | |
""" | |
Your forecasting method goes here! You may assume that `train` has already | |
been called on training data and so any global variables you stored as a | |
result of running `train` are available to you here for prediction | |
purposes. | |
Input | |
----- | |
- window: 2D array; each column is 7 days worth of percent changes in | |
price for a specific market | |
Output | |
------ | |
1D array; the i-th entry is a prediction for whether the percentage | |
return will go up (+1), stay the same (0), or go down (-1) for the i-th | |
market | |
""" | |
# ------------------------------------------------------------------------- | |
# YOUR CODE HERE | |
# | |
def predict(m, w): | |
s = sum(w) / 7 | |
if s > 0: | |
if s < guess[1]: | |
return 0 | |
return 1 | |
elif s < 0: | |
if s > guess[-1]: | |
return 0 | |
return -1 | |
return 0 | |
predicted_labels = np.array([predict(idx, window[:,idx]) for idx in range(window.shape[1])]) | |
# | |
# END OF YOUR CODE | |
# ------------------------------------------------------------------------- | |
return predicted_labels | |
def main(): | |
# get coconut oil challenge training data | |
observations = [] | |
with open('coconut_challenge.csv', 'r') as f: | |
for line in f.readlines(): | |
pieces = line.split(',') | |
if len(pieces) == 5: | |
observations.append([float(pieces[1]), | |
float(pieces[2]), | |
float(pieces[3]), | |
float(pieces[4])]) | |
observations = np.array(observations) | |
train_windows, train_window_labels = \ | |
get_labeled_windowed_data(observations, window_size=7) | |
train(train_windows, train_window_labels) | |
# figure out accuracy of the trained classifier on predicting labels for | |
# the training data | |
train_predictions = [] | |
for window, window_label in zip(train_windows, train_window_labels): | |
train_predictions.append(forecast(window)) | |
train_predictions = np.array(train_predictions) | |
train_prediction_accuracy_plus1 = \ | |
np.mean(train_predictions[train_window_labels == 1] | |
== train_window_labels[train_window_labels == 1]) | |
train_prediction_accuracy_minus1 = \ | |
np.mean(train_predictions[train_window_labels == -1] | |
== train_window_labels[train_window_labels == -1]) | |
train_prediction_accuracy_0 = \ | |
np.mean(train_predictions[train_window_labels == 0] | |
== train_window_labels[train_window_labels == 0]) | |
print('Training accuracy for prediction +1:', | |
train_prediction_accuracy_plus1) | |
print('Training accuracy for prediction -1:', | |
train_prediction_accuracy_minus1) | |
print('Training accuracy for prediction 0:', | |
train_prediction_accuracy_0) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment