# Code adapted from http://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html
# originally due to Gaƫl Varoquaux
# Modified by Brad Deutsch
# License: BSD 3 clause

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import matplotlib.image as mpimg
from sklearn import linear_model, datasets, svm, metrics

%matplotlib inline

# Import data
data = pd.read_csv("../input/train.csv")
data_shuffled = data.sample(len(data.index)).values

# create training, validation, test sets

# decide what proportion of samples we want in validation and test sets.
val_prop = 0.15
test_prop = 0.15
# convert to integer locations
val_index = int(val_prop*len(data.index))
test_index = val_index + int(test_prop*len(data.index))

# select the three sets by indexing the data.
val = data_shuffled[:val_index,:]
test = data_shuffled[val_index:test_index,:]
train = data_shuffled[test_index:,:]

# split each set into x and y, where y is the digit label.
train_x = train[:,1:]
train_y = train[:,0]
val_x = val[:,1:]
val_y = val[:,0]
test_x = test[:,1:]
test_y = test[:,0]