Created
October 12, 2015 07:18
-
-
Save willard-yuan/c827db48b9c414716b90 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>class</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.5</td>\n", | |
" <td>1.8</td>\n", | |
" <td>6.4</td>\n", | |
" <td>3.1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.9</td>\n", | |
" <td>2.3</td>\n", | |
" <td>6.8</td>\n", | |
" <td>3.2</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.4</td>\n", | |
" <td>2.3</td>\n", | |
" <td>6.2</td>\n", | |
" <td>3.4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>4.8</td>\n", | |
" <td>1.8</td>\n", | |
" <td>6.0</td>\n", | |
" <td>3.0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.1</td>\n", | |
" <td>2.3</td>\n", | |
" <td>6.9</td>\n", | |
" <td>3.1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" class petal_length petal_width sepal_length sepal_width\n", | |
"0 Iris-virginica 5.5 1.8 6.4 3.1\n", | |
"1 Iris-virginica 5.9 2.3 6.8 3.2\n", | |
"2 Iris-virginica 5.4 2.3 6.2 3.4\n", | |
"3 Iris-virginica 4.8 1.8 6.0 3.0\n", | |
"4 Iris-virginica 5.1 2.3 6.9 3.1" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# First let's import the dataset, using Pandas.\n", | |
"import pandas as pd\n", | |
"\n", | |
"train = pd.read_csv(\"train.csv\") # make sure you're in the right directory if using iPython!\n", | |
"test = pd.read_csv(\"test.csv\") \n", | |
"\n", | |
"train.head() # ignore the first column, it's how I split the data." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/Users/alexwoods/Downloads/ipython-3.2.0/IPython/kernel/__main__.py:16: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", | |
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n", | |
" min_samples_leaf=1, min_samples_split=2,\n", | |
" min_weight_fraction_leaf=0.0, n_estimators=100, n_jobs=1,\n", | |
" oob_score=False, random_state=None, verbose=0,\n", | |
" warm_start=False)" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"\n", | |
"\n", | |
"\n", | |
"# however, are data has to be in a numpy array in order for the random forest algorithm to except it!\n", | |
"cols = ['petal_length', 'petal_width', 'sepal_length', 'sepal_width']\n", | |
"colsRes = ['class']\n", | |
"trainArr = train.as_matrix(cols) # training array\n", | |
"trainRes = train.as_matrix(colsRes) # training results\n", | |
"\n", | |
"\n", | |
"\n", | |
"## Training!\n", | |
"\n", | |
"rf = RandomForestClassifier(n_estimators=100) # 100 decision trees is a good enough number\n", | |
"rf.fit(trainArr, trainRes) # finally, we fit the data to the algorithm!!! :)\n", | |
"\n", | |
"# note - you might get an warning saying you entered a 2 column vector..ignore it." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"collapsed": false | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>class</th>\n", | |
" <th>petal_length</th>\n", | |
" <th>petal_width</th>\n", | |
" <th>sepal_length</th>\n", | |
" <th>sepal_width</th>\n", | |
" <th>predictions</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>6.6</td>\n", | |
" <td>2.1</td>\n", | |
" <td>7.6</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>6.3</td>\n", | |
" <td>1.8</td>\n", | |
" <td>7.3</td>\n", | |
" <td>2.9</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.5</td>\n", | |
" <td>2.1</td>\n", | |
" <td>6.8</td>\n", | |
" <td>3.0</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.1</td>\n", | |
" <td>2.4</td>\n", | |
" <td>5.8</td>\n", | |
" <td>2.8</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Iris-virginica</td>\n", | |
" <td>5.3</td>\n", | |
" <td>2.3</td>\n", | |
" <td>6.4</td>\n", | |
" <td>3.2</td>\n", | |
" <td>Iris-virginica</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" class petal_length petal_width sepal_length sepal_width \\\n", | |
"0 Iris-virginica 6.6 2.1 7.6 3.0 \n", | |
"1 Iris-virginica 6.3 1.8 7.3 2.9 \n", | |
"2 Iris-virginica 5.5 2.1 6.8 3.0 \n", | |
"3 Iris-virginica 5.1 2.4 5.8 2.8 \n", | |
"4 Iris-virginica 5.3 2.3 6.4 3.2 \n", | |
"\n", | |
" predictions \n", | |
"0 Iris-virginica \n", | |
"1 Iris-virginica \n", | |
"2 Iris-virginica \n", | |
"3 Iris-virginica \n", | |
"4 Iris-virginica " | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"## Testing!\n", | |
"\n", | |
"# put the test results in the same format!\n", | |
"testArr = test.as_matrix(cols)\n", | |
"\n", | |
"results = rf.predict(testArr)\n", | |
"\n", | |
"# something I like to do is to add it back to the dataframe, so I can compare side-by-side\n", | |
"test['predictions'] = results\n", | |
"test.head()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.4.1" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment