h5li/Housing_Prices_Solution.ipynb

## Housing_Prices_Solution.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.ensemble import RandomForestRegressor"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Read in the testing and training data into two dataframes\n",
    "train_data = pd.read_csv('train.csv')\n",
    "test_data = pd.read_csv('test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Id</th>\n",
       "      <th>MSSubClass</th>\n",
       "      <th>MSZoning</th>\n",
       "      <th>LotFrontage</th>\n",
       "      <th>LotArea</th>\n",
       "      <th>Street</th>\n",
       "      <th>Alley</th>\n",
       "      <th>LotShape</th>\n",
       "      <th>LandContour</th>\n",
       "      <th>Utilities</th>\n",
       "      <th>...</th>\n",
       "      <th>ScreenPorch</th>\n",
       "      <th>PoolArea</th>\n",
       "      <th>PoolQC</th>\n",
       "      <th>Fence</th>\n",
       "      <th>MiscFeature</th>\n",
       "      <th>MiscVal</th>\n",
       "      <th>MoSold</th>\n",
       "      <th>YrSold</th>\n",
       "      <th>SaleType</th>\n",
       "      <th>SaleCondition</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1461</td>\n",
       "      <td>20</td>\n",
       "      <td>RH</td>\n",
       "      <td>80.0</td>\n",
       "      <td>11622</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Reg</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>120</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1462</td>\n",
       "      <td>20</td>\n",
       "      <td>RL</td>\n",
       "      <td>81.0</td>\n",
       "      <td>14267</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Gar2</td>\n",
       "      <td>12500</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1463</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>74.0</td>\n",
       "      <td>13830</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>MnPrv</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>3</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1464</td>\n",
       "      <td>60</td>\n",
       "      <td>RL</td>\n",
       "      <td>78.0</td>\n",
       "      <td>9978</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>Lvl</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1465</td>\n",
       "      <td>120</td>\n",
       "      <td>RL</td>\n",
       "      <td>43.0</td>\n",
       "      <td>5005</td>\n",
       "      <td>Pave</td>\n",
       "      <td>NaN</td>\n",
       "      <td>IR1</td>\n",
       "      <td>HLS</td>\n",
       "      <td>AllPub</td>\n",
       "      <td>...</td>\n",
       "      <td>144</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>2010</td>\n",
       "      <td>WD</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 80 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
       "0  1461          20       RH         80.0    11622   Pave   NaN      Reg   \n",
       "1  1462          20       RL         81.0    14267   Pave   NaN      IR1   \n",
       "2  1463          60       RL         74.0    13830   Pave   NaN      IR1   \n",
       "3  1464          60       RL         78.0     9978   Pave   NaN      IR1   \n",
       "4  1465         120       RL         43.0     5005   Pave   NaN      IR1   \n",
       "\n",
       "  LandContour Utilities      ...       ScreenPorch PoolArea PoolQC  Fence  \\\n",
       "0         Lvl    AllPub      ...               120        0    NaN  MnPrv   \n",
       "1         Lvl    AllPub      ...                 0        0    NaN    NaN   \n",
       "2         Lvl    AllPub      ...                 0        0    NaN  MnPrv   \n",
       "3         Lvl    AllPub      ...                 0        0    NaN    NaN   \n",
       "4         HLS    AllPub      ...               144        0    NaN    NaN   \n",
       "\n",
       "  MiscFeature MiscVal MoSold  YrSold  SaleType  SaleCondition  \n",
       "0         NaN       0      6    2010        WD         Normal  \n",
       "1        Gar2   12500      6    2010        WD         Normal  \n",
       "2         NaN       0      3    2010        WD         Normal  \n",
       "3         NaN       0      6    2010        WD         Normal  \n",
       "4         NaN       0      1    2010        WD         Normal  \n",
       "\n",
       "[5 rows x 80 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Look at the data we will be working with. Dang. Look at all those missing values and String values\n",
    "test_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Train data processing\n",
    "train_data.dropna(axis=1,inplace=True) # Drops all columns with NaN values in them\n",
    "train_data = train_data.select_dtypes(exclude=['object']) # Gets rid of all columns that don't have int data in them"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 122,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Test data processing\n",
    "test_data.dropna(axis=1,inplace=True)\n",
    "test_data = test_data.select_dtypes(exclude=['object'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 123,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index([u'Id', u'MSSubClass', u'LotArea', u'OverallQual', u'OverallCond',\n",
       "       u'YearBuilt', u'YearRemodAdd', u'BsmtFinSF1', u'BsmtFinSF2',\n",
       "       u'BsmtUnfSF', u'TotalBsmtSF', u'1stFlrSF', u'2ndFlrSF', u'LowQualFinSF',\n",
       "       u'GrLivArea', u'BsmtFullBath', u'BsmtHalfBath', u'FullBath',\n",
       "       u'HalfBath', u'BedroomAbvGr', u'KitchenAbvGr', u'TotRmsAbvGrd',\n",
       "       u'Fireplaces', u'GarageCars', u'GarageArea', u'WoodDeckSF',\n",
       "       u'OpenPorchSF', u'EnclosedPorch', u'3SsnPorch', u'ScreenPorch',\n",
       "       u'PoolArea', u'MiscVal', u'MoSold', u'YrSold', u'SalePrice'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 123,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Training dataframe has more columns than test dataframe, need to keep them both the same size\n",
    "train_data.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 124,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Drop all columns in the training dataframe that aren't in the test dataframe\n",
    "train_data = train_data.drop(['BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'GarageCars', 'GarageArea'],axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 133,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Separate test and train into feature and label dataframes\n",
    "columns_minus_saleprice = train_data.columns[1:-1]\n",
    "X_train = train_data[columns_minus_saleprice] # All columns minus saleprice will make up the X_train dataframe\n",
    "y_train = train_data['SalePrice'] # The column with saleprice will make up the y_train dataframe\n",
    "X_test = test_data[test_data.columns[1:]] # All columns except the id column will be inputted into the machine learning model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 135,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Initialize a classifier\n",
    "regr = RandomForestRegressor() "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 136,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
       "           max_features='auto', max_leaf_nodes=None,\n",
       "           min_impurity_decrease=0.0, min_impurity_split=None,\n",
       "           min_samples_leaf=1, min_samples_split=2,\n",
       "           min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
       "           oob_score=False, random_state=None, verbose=0, warm_start=False)"
      ]
     },
     "execution_count": 136,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Fit the classifier with X and y data\n",
    "regr.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 138,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Use fitted classifier to predict saleprice of test data\n",
    "predictions = regr.predict(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 140,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Format the output so that it can be submitted to Kaggle as a csv\n",
    "list_of_house_ids = test_data[\"Id\"]\n",
    "submission = zip(list_of_house_ids, predictions.astype(int)) # Zip together predicted house prices and ids\n",
    "submission = pd.DataFrame(submission, columns=[\"Id\", \"Saleprice\"]) # Turn into a dataframe and add column titles\n",
    "submission.to_csv('submission1.csv', header=True, index=False) # Output to csv and remove index column"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"import pandas as pd\n",
	"import numpy as np\n",
	"from sklearn.ensemble import RandomForestRegressor"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Read in the testing and training data into two dataframes\n",
	"train_data = pd.read_csv('train.csv')\n",
	"test_data = pd.read_csv('test.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/html": [
	"<div>\n",
	"<style>\n",
	" .dataframe thead tr:only-child th {\n",
	" text-align: right;\n",
	" }\n",
	"\n",
	" .dataframe thead th {\n",
	" text-align: left;\n",
	" }\n",
	"\n",
	" .dataframe tbody tr th {\n",
	" vertical-align: top;\n",
	" }\n",
	"</style>\n",
	"<table border=\"1\" class=\"dataframe\">\n",
	" <thead>\n",
	" <tr style=\"text-align: right;\">\n",
	" <th></th>\n",
	" <th>Id</th>\n",
	" <th>MSSubClass</th>\n",
	" <th>MSZoning</th>\n",
	" <th>LotFrontage</th>\n",
	" <th>LotArea</th>\n",
	" <th>Street</th>\n",
	" <th>Alley</th>\n",
	" <th>LotShape</th>\n",
	" <th>LandContour</th>\n",
	" <th>Utilities</th>\n",
	" <th>...</th>\n",
	" <th>ScreenPorch</th>\n",
	" <th>PoolArea</th>\n",
	" <th>PoolQC</th>\n",
	" <th>Fence</th>\n",
	" <th>MiscFeature</th>\n",
	" <th>MiscVal</th>\n",
	" <th>MoSold</th>\n",
	" <th>YrSold</th>\n",
	" <th>SaleType</th>\n",
	" <th>SaleCondition</th>\n",
	" </tr>\n",
	" </thead>\n",
	" <tbody>\n",
	" <tr>\n",
	" <th>0</th>\n",
	" <td>1461</td>\n",
	" <td>20</td>\n",
	" <td>RH</td>\n",
	" <td>80.0</td>\n",
	" <td>11622</td>\n",
	" <td>Pave</td>\n",
	" <td>NaN</td>\n",
	" <td>Reg</td>\n",
	" <td>Lvl</td>\n",
	" <td>AllPub</td>\n",
	" <td>...</td>\n",
	" <td>120</td>\n",
	" <td>0</td>\n",
	" <td>NaN</td>\n",
	" <td>MnPrv</td>\n",
	" <td>NaN</td>\n",
	" <td>0</td>\n",
	" <td>6</td>\n",
	" <td>2010</td>\n",
	" <td>WD</td>\n",
	" <td>Normal</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>1</th>\n",
	" <td>1462</td>\n",
	" <td>20</td>\n",
	" <td>RL</td>\n",
	" <td>81.0</td>\n",
	" <td>14267</td>\n",
	" <td>Pave</td>\n",
	" <td>NaN</td>\n",
	" <td>IR1</td>\n",
	" <td>Lvl</td>\n",
	" <td>AllPub</td>\n",
	" <td>...</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>Gar2</td>\n",
	" <td>12500</td>\n",
	" <td>6</td>\n",
	" <td>2010</td>\n",
	" <td>WD</td>\n",
	" <td>Normal</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>2</th>\n",
	" <td>1463</td>\n",
	" <td>60</td>\n",
	" <td>RL</td>\n",
	" <td>74.0</td>\n",
	" <td>13830</td>\n",
	" <td>Pave</td>\n",
	" <td>NaN</td>\n",
	" <td>IR1</td>\n",
	" <td>Lvl</td>\n",
	" <td>AllPub</td>\n",
	" <td>...</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>NaN</td>\n",
	" <td>MnPrv</td>\n",
	" <td>NaN</td>\n",
	" <td>0</td>\n",
	" <td>3</td>\n",
	" <td>2010</td>\n",
	" <td>WD</td>\n",
	" <td>Normal</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>3</th>\n",
	" <td>1464</td>\n",
	" <td>60</td>\n",
	" <td>RL</td>\n",
	" <td>78.0</td>\n",
	" <td>9978</td>\n",
	" <td>Pave</td>\n",
	" <td>NaN</td>\n",
	" <td>IR1</td>\n",
	" <td>Lvl</td>\n",
	" <td>AllPub</td>\n",
	" <td>...</td>\n",
	" <td>0</td>\n",
	" <td>0</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>0</td>\n",
	" <td>6</td>\n",
	" <td>2010</td>\n",
	" <td>WD</td>\n",
	" <td>Normal</td>\n",
	" </tr>\n",
	" <tr>\n",
	" <th>4</th>\n",
	" <td>1465</td>\n",
	" <td>120</td>\n",
	" <td>RL</td>\n",
	" <td>43.0</td>\n",
	" <td>5005</td>\n",
	" <td>Pave</td>\n",
	" <td>NaN</td>\n",
	" <td>IR1</td>\n",
	" <td>HLS</td>\n",
	" <td>AllPub</td>\n",
	" <td>...</td>\n",
	" <td>144</td>\n",
	" <td>0</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>NaN</td>\n",
	" <td>0</td>\n",
	" <td>1</td>\n",
	" <td>2010</td>\n",
	" <td>WD</td>\n",
	" <td>Normal</td>\n",
	" </tr>\n",
	" </tbody>\n",
	"</table>\n",
	"<p>5 rows × 80 columns</p>\n",
	"</div>"
	],
	"text/plain": [
	" Id MSSubClass MSZoning LotFrontage LotArea Street Alley LotShape \\\n",
	"0 1461 20 RH 80.0 11622 Pave NaN Reg \n",
	"1 1462 20 RL 81.0 14267 Pave NaN IR1 \n",
	"2 1463 60 RL 74.0 13830 Pave NaN IR1 \n",
	"3 1464 60 RL 78.0 9978 Pave NaN IR1 \n",
	"4 1465 120 RL 43.0 5005 Pave NaN IR1 \n",
	"\n",
	" LandContour Utilities ... ScreenPorch PoolArea PoolQC Fence \\\n",
	"0 Lvl AllPub ... 120 0 NaN MnPrv \n",
	"1 Lvl AllPub ... 0 0 NaN NaN \n",
	"2 Lvl AllPub ... 0 0 NaN MnPrv \n",
	"3 Lvl AllPub ... 0 0 NaN NaN \n",
	"4 HLS AllPub ... 144 0 NaN NaN \n",
	"\n",
	" MiscFeature MiscVal MoSold YrSold SaleType SaleCondition \n",
	"0 NaN 0 6 2010 WD Normal \n",
	"1 Gar2 12500 6 2010 WD Normal \n",
	"2 NaN 0 3 2010 WD Normal \n",
	"3 NaN 0 6 2010 WD Normal \n",
	"4 NaN 0 1 2010 WD Normal \n",
	"\n",
	"[5 rows x 80 columns]"
	]
	},
	"execution_count": 4,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Look at the data we will be working with. Dang. Look at all those missing values and String values\n",
	"test_data.head()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Train data processing\n",
	"train_data.dropna(axis=1,inplace=True) # Drops all columns with NaN values in them\n",
	"train_data = train_data.select_dtypes(exclude=['object']) # Gets rid of all columns that don't have int data in them"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 122,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Test data processing\n",
	"test_data.dropna(axis=1,inplace=True)\n",
	"test_data = test_data.select_dtypes(exclude=['object'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 123,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"Index([u'Id', u'MSSubClass', u'LotArea', u'OverallQual', u'OverallCond',\n",
	" u'YearBuilt', u'YearRemodAdd', u'BsmtFinSF1', u'BsmtFinSF2',\n",
	" u'BsmtUnfSF', u'TotalBsmtSF', u'1stFlrSF', u'2ndFlrSF', u'LowQualFinSF',\n",
	" u'GrLivArea', u'BsmtFullBath', u'BsmtHalfBath', u'FullBath',\n",
	" u'HalfBath', u'BedroomAbvGr', u'KitchenAbvGr', u'TotRmsAbvGrd',\n",
	" u'Fireplaces', u'GarageCars', u'GarageArea', u'WoodDeckSF',\n",
	" u'OpenPorchSF', u'EnclosedPorch', u'3SsnPorch', u'ScreenPorch',\n",
	" u'PoolArea', u'MiscVal', u'MoSold', u'YrSold', u'SalePrice'],\n",
	" dtype='object')"
	]
	},
	"execution_count": 123,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Training dataframe has more columns than test dataframe, need to keep them both the same size\n",
	"train_data.columns"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 124,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Drop all columns in the training dataframe that aren't in the test dataframe\n",
	"train_data = train_data.drop(['BsmtFinSF1', 'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'BsmtFullBath', 'BsmtHalfBath', 'GarageCars', 'GarageArea'],axis=1)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 133,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Separate test and train into feature and label dataframes\n",
	"columns_minus_saleprice = train_data.columns[1:-1]\n",
	"X_train = train_data[columns_minus_saleprice] # All columns minus saleprice will make up the X_train dataframe\n",
	"y_train = train_data['SalePrice'] # The column with saleprice will make up the y_train dataframe\n",
	"X_test = test_data[test_data.columns[1:]] # All columns except the id column will be inputted into the machine learning model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 135,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Initialize a classifier\n",
	"regr = RandomForestRegressor() "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 136,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
	" max_features='auto', max_leaf_nodes=None,\n",
	" min_impurity_decrease=0.0, min_impurity_split=None,\n",
	" min_samples_leaf=1, min_samples_split=2,\n",
	" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
	" oob_score=False, random_state=None, verbose=0, warm_start=False)"
	]
	},
	"execution_count": 136,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"# Fit the classifier with X and y data\n",
	"regr.fit(X_train, y_train)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 138,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Use fitted classifier to predict saleprice of test data\n",
	"predictions = regr.predict(X_test)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 140,
	"metadata": {
	"collapsed": true
	},
	"outputs": [],
	"source": [
	"# Format the output so that it can be submitted to Kaggle as a csv\n",
	"list_of_house_ids = test_data[\"Id\"]\n",
	"submission = zip(list_of_house_ids, predictions.astype(int)) # Zip together predicted house prices and ids\n",
	"submission = pd.DataFrame(submission, columns=[\"Id\", \"Saleprice\"]) # Turn into a dataframe and add column titles\n",
	"submission.to_csv('submission1.csv', header=True, index=False) # Output to csv and remove index column"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 2",
	"language": "python",
	"name": "python2"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 2
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython2",
	"version": "2.7.13"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}