Skip to content

Instantly share code, notes, and snippets.

@h5li
Last active October 24, 2018 20:54
Show Gist options
  • Save h5li/73f33a9bda6a0f4aca3c9272e809e45b to your computer and use it in GitHub Desktop.
Save h5li/73f33a9bda6a0f4aca3c9272e809e45b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd \n",
"import numpy as np\n",
"import random as rand \n",
"import matplotlib.pyplot as plt\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"% matplotlib inline "
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_df = pd.read_csv(\"train.csv\")\n",
"test_df = pd.read_csv(\"test.csv\", )"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>label</th>\n",
" <th>pixel0</th>\n",
" <th>pixel1</th>\n",
" <th>pixel2</th>\n",
" <th>pixel3</th>\n",
" <th>pixel4</th>\n",
" <th>pixel5</th>\n",
" <th>pixel6</th>\n",
" <th>pixel7</th>\n",
" <th>pixel8</th>\n",
" <th>...</th>\n",
" <th>pixel774</th>\n",
" <th>pixel775</th>\n",
" <th>pixel776</th>\n",
" <th>pixel777</th>\n",
" <th>pixel778</th>\n",
" <th>pixel779</th>\n",
" <th>pixel780</th>\n",
" <th>pixel781</th>\n",
" <th>pixel782</th>\n",
" <th>pixel783</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 785 columns</p>\n",
"</div>"
],
"text/plain": [
" label pixel0 pixel1 pixel2 pixel3 pixel4 pixel5 pixel6 pixel7 \\\n",
"0 1 0 0 0 0 0 0 0 0 \n",
"1 0 0 0 0 0 0 0 0 0 \n",
"2 1 0 0 0 0 0 0 0 0 \n",
"3 4 0 0 0 0 0 0 0 0 \n",
"4 0 0 0 0 0 0 0 0 0 \n",
"\n",
" pixel8 ... pixel774 pixel775 pixel776 pixel777 pixel778 \\\n",
"0 0 ... 0 0 0 0 0 \n",
"1 0 ... 0 0 0 0 0 \n",
"2 0 ... 0 0 0 0 0 \n",
"3 0 ... 0 0 0 0 0 \n",
"4 0 ... 0 0 0 0 0 \n",
"\n",
" pixel779 pixel780 pixel781 pixel782 pixel783 \n",
"0 0 0 0 0 0 \n",
"1 0 0 0 0 0 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 0 0 \n",
"\n",
"[5 rows x 785 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAP8AAAD8CAYAAAC4nHJkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAADyNJREFUeJzt3X+QVfV5x/HP47IshmgianBZMIQUwxDSQNygpoyNY3TQ\n2kEmrRMmsaSmoNGkWmmrtdOomcmUJhp0bJrJ2jBiq2IdY6QT0g4QGrSJyMogIJiAdlEQWRyMQCuw\nwNM/9pBsdM/3LvfXucvzfs3s7L3nueeex4ufPffe7znna+4uAPGcVHQDAIpB+IGgCD8QFOEHgiL8\nQFCEHwiK8ANBEX4gKMIPBDWknhsbai0+TMPruUkglAP6Xx3ygzaQx1YUfjObLuleSU2S/tnd56ce\nP0zDdZ5dXMkmASSs9hUDfmzZb/vNrEnSdyRdJmmipFlmNrHc5wNQX5V85p8qaau7v+zuhyQtljSj\nOm0BqLVKwt8m6dU+97dny36Lmc01s04z6+zRwQo2B6Caav5tv7t3uHu7u7c3q6XWmwMwQJWEf4ek\nMX3uj86WARgEKgn/GknjzexDZjZU0uckLalOWwBqreyhPnc/bGZfkfSf6h3qW+juL1StMwA1VdE4\nv7svlbS0Sr0AqCMO7wWCIvxAUIQfCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQhB8IivADQRF+\nICjCDwRF+IGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIrwA0ER\nfiCoimbpNbMuSfskHZF02N3bq9FUNNu+fkH6AW5lP/cXZv4kWb/l9PSs6s3WlKx/+NHrkvW//4NH\ncmsThu5Krnvzn16frDetXJusI62i8Gcucvc3qvA8AOqIt/1AUJWG3yUtN7PnzGxuNRoCUB+Vvu2f\n5u47zOwDkpaZ2YvuvqrvA7I/CnMlaZjeU+HmAFRLRXt+d9+R/e6W9ISkqf08psPd2929vVktlWwO\nQBWVHX4zG25mpxy7LelSSRur1RiA2qrkbf9ISU+Y2bHnedjd/6MqXQGoOXP3um3sVBvh59nFddte\nNQ1pG5Vbe+XzY5PrfmPOA8n6Ze/Zl6wf1dFkvZZOKvHmsJa9/aLnSLL+V2PPr9m2B6vVvkJ7fc+A\nDgxhqA8IivADQRF+ICjCDwRF+IGgCD8QVDXO6jsh7Jz3qWT9819cllu7acSTFW175dvpw54f35M+\nU/qZ18bm1oYvfl9y3Y/d/Hyy/suvTUrWd09uTtYfu+6u3Nq45vS6H2lOn078yu3pf7Oz7/xZsh4d\ne34gKMIPBEX4gaAIPxAU4QeCIvxAUIQfCIpx/sz+s9Onpt40YlPZz/3x/74mWT/86vBk/cPznknW\nz9Lm4+7pmK7F6fpQrUnW20pcweE7n70ot3b3qKeT624+lP43Gf2Tt9MbRxJ7fiAowg8ERfiBoAg/\nEBThB4Ii/EBQhB8IinH+zDkPpi+fPfnNG8t+7tE/PZCsN61Mj+M3sqO/PyVZv+K0h3Nrpcbx//Ka\nLyfrTU8xRXcl2PMDQRF+ICjCDwRF+IGgCD8QFOEHgiL8QFAlp+g2s4WSrpDU7e6TsmUjJD0qaayk\nLklXufubpTY2mKforsTbV06taP2Tf/hs2ev+6uoLkvW3fic9m/OLc/4pWe/x9DTaKZ949upkfdTM\n8q+hEFW1p+h+QNL0dyy7VdIKdx8vaUV2H8AgUjL87r5K0p53LJ4haVF2e5GkK6vcF4AaK/cz/0h3\n35ndfl3SyCr1A6BOKv7Cz3u/NMj94sDM5ppZp5l19uhgpZsDUCXlhn+XmbVKUva7O++B7t7h7u3u\n3t6sljI3B6Dayg3/Ekmzs9uzJVU2TS2AuisZfjN7RNLPJX3EzLab2ZckzZd0iZltkfSZ7D6AQaTk\n+fzuPiunFG/APsdLD6XPaV86bUGy3pT/lYkk6YVvfeC4ezpmckv62vgjm9IfxXo8vX84qvQ5+R99\n+M9za+O/vrHEc6OWOMIPCIrwA0ERfiAowg8ERfiBoAg/EBSX7q6CEe/fn6x/cMjQZP2kEn+DRw95\n67h7+o1ij6r89z++O7c2r/2Pkuse/OaEZL3lx+npw5HGnh8IivADQRF+ICjCDwRF+IGgCD8QFOEH\ngip56e5qOlEv3X1o+ieT9X1jijuc4sBle5P1YT8+NVn/2l8vStZ7vPz/tk+0vJasjxqSPkZh6rfS\n06afdc/Pjrunwa7al+4GcAIi/EBQhB8IivADQRF+ICjCDwRF+IGgGOdHYbq/8qlk/Zm/uTdZv7P7\n3GR93aX5U0ge2b07ue5gxTg/gJIIPxAU4QeCIvxAUIQfCIrwA0ERfiCokidjm9lCSVdI6nb3Sdmy\nOyTNkXRssPQ2d19aqyZxYmp9bGuyPuF3r0/W/+eK+5P1O5dPzK09O/Oc5LqHX+5K1k8EA9nzPyBp\nej/LF7j75OyH4AODTMnwu/sqSXvq0AuAOqrkM/9XzWy9mS00s9Oq1hGAuig3/N+VNE7SZEk7JeVO\nyGZmc82s08w6e3SwzM0BqLaywu/uu9z9iLsflXS/pKmJx3a4e7u7tzcXPGkkgN8oK/xm1trn7kxJ\nG6vTDoB6GchQ3yOSPi3pDDPbLul2SZ82s8mSXFKXpGtr2COAGuB8fgxaWxecn6xvuuq+3Nq596Wv\n+d82f3Be85/z+QGURPiBoAg/EBThB4Ii/EBQhB8Iqri5o4EKTbjv9WT9lgsvyK09dt1dyXXnbPmL\nZH3446uT9cGAPT8QFOEHgiL8QFCEHwiK8ANBEX4gKMIPBMU4/wD1XNqeW9t/41sVPffBZWcm62fd\nMzhPL621UpfXfup7+eP8/3D7z5Prtnx5Z3rjj6fLgwF7fiAowg8ERfiBoAg/EBThB4Ii/EBQhB8I\ninH+AZr4jQ25tT87Y1Vy3VmL0ueGn9l1uKyeUDvXn/1fyXqHxtWnkRpizw8ERfiBoAg/EBThB4Ii\n/EBQhB8IivADQZUc5zezMZIelDRSkkvqcPd7zWyEpEcljZXUJekqd3+zdq0W6yQ7mqilpzn3EhMm\nn/zDZ8tpCRU4qcR+r9lO/GMvBrLnPyxpnrtPlHS+pBvMbKKkWyWtcPfxklZk9wEMEiXD7+473X1t\ndnufpM2S2iTNkLQoe9giSVfWqkkA1Xdcn/nNbKykKZJWSxrp7seudfS6ej8WABgkBhx+M3uveq9c\ndpO77+1bc3dX7/cB/a0318w6zayzRwcrahZA9Qwo/GbWrN7gP+TuP8gW7zKz1qzeKqm7v3XdvcPd\n2929vVkt1egZQBWUDL+ZmaTvS9rs7t/uU1oiaXZ2e7akJ6vfHoBasd537IkHmE2T9JSkDZKOjXfd\npt7P/f8m6WxJ29Q71Lcn9Vyn2gg/zy6utOdCbP3XKbm1TRd1VPTcn1zzJ8l689L3J+tn/Whbbu3w\njtfK6qlahrSNyq358JOT6/a0npqsv3RNegx18YXfy619fGhyVbUvuDFZH3VXY15OfbWv0F7fU2Jw\nuVfJcX53f1pS3pMNziQD4Ag/ICrCDwRF+IGgCD8QFOEHgiL8QFAlx/mraTCP8zedPiK39qtLzkmu\n+7Gbn0/W/7Ht6WT9qPJPJ5ake/ZMzK29eiC/74FIncosST96cVKyPq71jdzaZ1vXJtcdOzR/XUm6\n+OT/S9afSxxNfu36LyTXHX1Detr1oo+fyHM84/zs+YGgCD8QFOEHgiL8QFCEHwiK8ANBEX4gKMb5\nG8CWB85N1h++MH29gCkt6bH4SpS6xHWpYxAqsflQ+rlfOXxasj7/7/Kvk3DK4mfK6qnRMc4PoCTC\nDwRF+IGgCD8QFOEHgiL8QFCEHwiKcf5BwM79aLK+7Q/fV/ZzT7lkc7K+aOzyZL3UOP+Epdfn1obt\naE6u2/bTA8l608r09QAiYpwfQEmEHwiK8ANBEX4gKMIPBEX4gaAIPxBUyXF+Mxsj6UFJIyW5pA53\nv9fM7pA0R9Lu7KG3ufvS1HMxzg/U1vGM8w8ZwGMOS5rn7mvN7BRJz5nZsqy2wN3vKrdRAMUpGX53\n3ylpZ3Z7n5ltltRW68YA1NZxfeY3s7GSpkhanS36qpmtN7OFZtbvNZXMbK6ZdZpZZ48S8ycBqKsB\nh9/M3ivpcUk3ufteSd+VNE7SZPW+M7i7v/XcvcPd2929vVktVWgZQDUMKPxm1qze4D/k7j+QJHff\n5e5H3P2opPslTa1dmwCqrWT4zcwkfV/SZnf/dp/lrX0eNlPSxuq3B6BWBvJt/+9JulrSBjNbly27\nTdIsM5us3uG/LknX1qRDADUxkG/7n5bU37hhckwfQGPjCD8gKMIPBEX4gaAIPxAU4QeCIvxAUIQf\nCIrwA0ERfiAowg8ERfiBoAg/EBThB4Ii/EBQdZ2i28x2S9rWZ9EZkt6oWwPHp1F7a9S+JHorVzV7\n+6C7nzmQB9Y1/O/auFmnu7cX1kBCo/bWqH1J9FauonrjbT8QFOEHgio6/B0Fbz+lUXtr1L4keitX\nIb0V+pkfQHGK3vMDKEgh4Tez6Wb2CzPbama3FtFDHjPrMrMNZrbOzDoL7mWhmXWb2cY+y0aY2TIz\n25L97neatIJ6u8PMdmSv3Tozu7yg3saY2Uoz22RmL5jZjdnyQl+7RF+FvG51f9tvZk2SfinpEknb\nJa2RNMvdN9W1kRxm1iWp3d0LHxM2swsl7Zf0oLtPypZ9U9Ied5+f/eE8zd1vaZDe7pC0v+iZm7MJ\nZVr7ziwt6UpJX1SBr12ir6tUwOtWxJ5/qqSt7v6yux+StFjSjAL6aHjuvkrSnncsniFpUXZ7kXr/\n56m7nN4agrvvdPe12e19ko7NLF3oa5foqxBFhL9N0qt97m9XY0357ZKWm9lzZja36Gb6MTKbNl2S\nXpc0sshm+lFy5uZ6esfM0g3z2pUz43W18YXfu01z98mSLpN0Q/b2tiF572e2RhquGdDMzfXSz8zS\nv1bka1fujNfVVkT4d0ga0+f+6GxZQ3D3HdnvbklPqPFmH951bJLU7Hd3wf38WiPN3NzfzNJqgNeu\nkWa8LiL8aySNN7MPmdlQSZ+TtKSAPt7FzIZnX8TIzIZLulSNN/vwEkmzs9uzJT1ZYC+/pVFmbs6b\nWVoFv3YNN+O1u9f9R9Ll6v3G/yVJf1tEDzl9jZP0fPbzQtG9SXpEvW8De9T73ciXJJ0uaYWkLZKW\nSxrRQL39i6QNktarN2itBfU2Tb1v6ddLWpf9XF70a5foq5DXjSP8gKD4wg8IivADQRF+ICjCDwRF\n+IGgCD8QFOEHgiL8QFD/D7ttx5PQf7WWAAAAAElFTkSuQmCC\n",
"text/plain": [
"<matplotlib.figure.Figure at 0x7f6aa155ee10>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"vector = train_df.loc[30000, :][1:]\n",
"img = np.array(vector).reshape(28,28)\n",
"plt.imshow(img)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"clf = RandomForestClassifier()"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X, y = train_df.iloc[:, 1:] ,train_df['label']"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,\n",
" oob_score=False, random_state=None, verbose=0,\n",
" warm_start=False)"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"clf.fit(X, y)"
]
},
{
"cell_type": "code",
"execution_count": 120,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"test_predictions = clf.predict(test_df)"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"test_predictions = pd.DataFrame(test_predictions)\n",
"test_predictions.index+=1\n",
"test_predictions.index.name ='ImageId'\n",
"test_predictions.columns = ['Label']\n",
"test_predictions.to_csv('final_submission.csv', index=True)"
]
},
{
"cell_type": "code",
"execution_count": 121,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"DigitRecognizer.ipynb sample_submission.csv test_xgboost.ipynb\r\n",
"final_submission.csv test.csv\t\t train.csv\r\n"
]
}
],
"source": [
"!ls"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment