# Code adapted from http://scikit-learn.org/stable/auto_examples/linear_model/plot_iris_logistic.html # originally due to Gaƫl Varoquaux # Modified by Brad Deutsch # License: BSD 3 clause import numpy as np import pandas as pd import matplotlib.pyplot as plt import matplotlib.cm as cm import matplotlib.image as mpimg from sklearn import linear_model, datasets, svm, metrics %matplotlib inline # Import data data = pd.read_csv("../input/train.csv") data_shuffled = data.sample(len(data.index)).values # create training, validation, test sets # decide what proportion of samples we want in validation and test sets. val_prop = 0.15 test_prop = 0.15 # convert to integer locations val_index = int(val_prop*len(data.index)) test_index = val_index + int(test_prop*len(data.index)) # select the three sets by indexing the data. val = data_shuffled[:val_index,:] test = data_shuffled[val_index:test_index,:] train = data_shuffled[test_index:,:] # split each set into x and y, where y is the digit label. train_x = train[:,1:] train_y = train[:,0] val_x = val[:,1:] val_y = val[:,0] test_x = test[:,1:] test_y = test[:,0]