justmarkham/iris_pandas.py Secret

## iris_pandas.py
import pandas as pd

# read the iris data into a pandas DataFrame, including column names
col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
iris = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=col_names)

# map species to a numeric value
iris['species_num'] = iris.species.map({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2})

# use LabelEncoder to accomplish the same thing
from sklearn.preprocessing import LabelEncoder
labelenc = LabelEncoder()
iris['species_num'] = labelenc.fit_transform(iris.species)

# create X (features) three different ways
X = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
X = iris.loc[:, 'sepal_length':'petal_width']
X = iris.iloc[:, 0:4]

# create y (response)
y = iris.species_num

# check the shape of X and y
X.shape     # 150 by 4
y.shape     # 150 (must match first dimension of X)
	import pandas as pd

	# read the iris data into a pandas DataFrame, including column names
	col_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']
	iris = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', names=col_names)

	# map species to a numeric value
	iris['species_num'] = iris.species.map({'Iris-setosa':0, 'Iris-versicolor':1, 'Iris-virginica':2})

	# use LabelEncoder to accomplish the same thing
	from sklearn.preprocessing import LabelEncoder
	labelenc = LabelEncoder()
	iris['species_num'] = labelenc.fit_transform(iris.species)

	# create X (features) three different ways
	X = iris[['sepal_length', 'sepal_width', 'petal_length', 'petal_width']]
	X = iris.loc[:, 'sepal_length':'petal_width']
	X = iris.iloc[:, 0:4]

	# create y (response)
	y = iris.species_num

	# check the shape of X and y
	X.shape # 150 by 4
	y.shape # 150 (must match first dimension of X)