Nishank Sharma gitshanks

## gist:155d5b32c96351d782ad30b6ec7b2196
pip install numpy
pip install pandas
pip install sklearn

## gist:8e42a18e504df0af28d593e9a5c67bd8
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn import tree

## gist:94932e56a158ec565e9cbc07c541c8b2
dataset_url = 'http://mlr.cs.umass.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
data = pd.read_csv(dataset_url, sep=';')

## gist:cd76d9a88a1ee58cbe115c7f7511e312
   fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076
1            7.8              0.88         0.00             2.6      0.098
2            7.8              0.76         0.04             2.3      0.092
3           11.2              0.28         0.56             1.9      0.075
4            7.4              0.70         0.00             1.9      0.076

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56
1                 25.0                  67.0   0.9968  3.20       0.68

## gist:ba81623b6a482403e37da36604526118
y = data.quality
X = data.drop('quality', axis=1)

## gist:a07d4a76f9b07a888d61e1c777188925
X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.2)

## gist:302070fbf9bb4354ec0b848d211240e0
X_train:

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
591             6.6              0.39         0.49             1.7      0.070
1196            7.9              0.58         0.23             2.3      0.076
1128           10.0              0.43         0.33             2.7      0.095
640             9.9              0.54         0.45             2.3      0.071
389             9.6              0.38         0.31             2.5      0.096

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \

## gist:c61c2071cf95fa9616541bed5e804355
X_train_scaled = preprocessing.scale(X_train)
print X_train_scaled

## gist:ffbec2c5f6d14f36cbd5eec5fde9f92f
After preprocessing:

[[-0.53815281  0.63268848 -0.51940197 ..., -0.19976907 -0.92134183
  -0.87130124]
 [-1.05705689  0.46528334 -1.39710129 ...,  1.94678214  0.23712868
   1.00404675]
 [-1.92189703 -0.59494924 -0.15799637 ...,  2.66229921  0.46882278
   3.34823175]
 ...,
 [-0.36518478  2.41834335 -0.36451386 ...,  0.32060698 -1.26888298

## gist:8df4259658da408b4d0ef94e9d07d4fd
clf=tree.DecisionTreeClassifier()
clf.fit(X_train, y_train)
	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split
	from sklearn import preprocessing
	from sklearn import tree
	dataset_url = 'http://mlr.cs.umass.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
	data = pd.read_csv(dataset_url, sep=';')
	fixed acidity volatile acidity citric acid residual sugar chlorides \
	0 7.4 0.70 0.00 1.9 0.076
	1 7.8 0.88 0.00 2.6 0.098
	2 7.8 0.76 0.04 2.3 0.092
	3 11.2 0.28 0.56 1.9 0.075
	4 7.4 0.70 0.00 1.9 0.076

	free sulfur dioxide total sulfur dioxide density pH sulphates \
	0 11.0 34.0 0.9978 3.51 0.56
	1 25.0 67.0 0.9968 3.20 0.68
	X_train:

	fixed acidity volatile acidity citric acid residual sugar chlorides \
	591 6.6 0.39 0.49 1.7 0.070
	1196 7.9 0.58 0.23 2.3 0.076
	1128 10.0 0.43 0.33 2.7 0.095
	640 9.9 0.54 0.45 2.3 0.071
	389 9.6 0.38 0.31 2.5 0.096

	free sulfur dioxide total sulfur dioxide density pH sulphates \
	X_train_scaled = preprocessing.scale(X_train)
	print X_train_scaled
	After preprocessing:

	[[-0.53815281 0.63268848 -0.51940197 ..., -0.19976907 -0.92134183
	-0.87130124]
	[-1.05705689 0.46528334 -1.39710129 ..., 1.94678214 0.23712868
	1.00404675]
	[-1.92189703 -0.59494924 -0.15799637 ..., 2.66229921 0.46882278
	3.34823175]
	...,
	[-0.36518478 2.41834335 -0.36451386 ..., 0.32060698 -1.26888298