Skip to content

Instantly share code, notes, and snippets.

@darthgera123
Created March 27, 2020 11:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darthgera123/ac22597cef7108df0a9514fcb733ae15 to your computer and use it in GitHub Desktop.
Save darthgera123/ac22597cef7108df0a9514fcb733ae15 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Baseline submission for the challenge CRDSM"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split \n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn import metrics"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"train_data = pd.read_csv('../data/public/train.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Analyse Data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>max_ndvi</th>\n",
" <th>20150720_N</th>\n",
" <th>20150602_N</th>\n",
" <th>20150517_N</th>\n",
" <th>20150501_N</th>\n",
" <th>20150415_N</th>\n",
" <th>20150330_N</th>\n",
" <th>20150314_N</th>\n",
" <th>20150226_N</th>\n",
" <th>20150210_N</th>\n",
" <th>...</th>\n",
" <th>20140525_N</th>\n",
" <th>20140509_N</th>\n",
" <th>20140423_N</th>\n",
" <th>20140407_N</th>\n",
" <th>20140322_N</th>\n",
" <th>20140218_N</th>\n",
" <th>20140202_N</th>\n",
" <th>20140117_N</th>\n",
" <th>20140101_N</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>997.904</td>\n",
" <td>637.5950</td>\n",
" <td>658.668</td>\n",
" <td>-1882.030</td>\n",
" <td>-1924.36</td>\n",
" <td>997.904</td>\n",
" <td>-1739.990</td>\n",
" <td>630.087</td>\n",
" <td>-1628.240</td>\n",
" <td>-1325.64</td>\n",
" <td>...</td>\n",
" <td>-1043.160</td>\n",
" <td>-1942.490</td>\n",
" <td>267.138</td>\n",
" <td>366.608</td>\n",
" <td>452.238</td>\n",
" <td>211.328</td>\n",
" <td>-2203.02</td>\n",
" <td>-1180.190</td>\n",
" <td>433.906</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>914.198</td>\n",
" <td>634.2400</td>\n",
" <td>593.705</td>\n",
" <td>-1625.790</td>\n",
" <td>-1672.32</td>\n",
" <td>914.198</td>\n",
" <td>-692.386</td>\n",
" <td>707.626</td>\n",
" <td>-1670.590</td>\n",
" <td>-1408.64</td>\n",
" <td>...</td>\n",
" <td>-933.934</td>\n",
" <td>-625.385</td>\n",
" <td>120.059</td>\n",
" <td>364.858</td>\n",
" <td>476.972</td>\n",
" <td>220.878</td>\n",
" <td>-2250.00</td>\n",
" <td>-1360.560</td>\n",
" <td>524.075</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3800.810</td>\n",
" <td>1671.3400</td>\n",
" <td>1206.880</td>\n",
" <td>449.735</td>\n",
" <td>1071.21</td>\n",
" <td>546.371</td>\n",
" <td>1077.840</td>\n",
" <td>214.564</td>\n",
" <td>849.599</td>\n",
" <td>1283.63</td>\n",
" <td>...</td>\n",
" <td>1566.160</td>\n",
" <td>2208.440</td>\n",
" <td>1056.600</td>\n",
" <td>385.203</td>\n",
" <td>300.560</td>\n",
" <td>293.730</td>\n",
" <td>2762.57</td>\n",
" <td>150.931</td>\n",
" <td>3800.810</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>952.178</td>\n",
" <td>58.0174</td>\n",
" <td>-1599.160</td>\n",
" <td>210.714</td>\n",
" <td>-1052.63</td>\n",
" <td>578.807</td>\n",
" <td>-1564.630</td>\n",
" <td>-858.390</td>\n",
" <td>729.790</td>\n",
" <td>-3162.14</td>\n",
" <td>...</td>\n",
" <td>368.622</td>\n",
" <td>-1786.950</td>\n",
" <td>-1227.800</td>\n",
" <td>304.621</td>\n",
" <td>291.336</td>\n",
" <td>369.214</td>\n",
" <td>-2202.12</td>\n",
" <td>600.359</td>\n",
" <td>-1343.550</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1232.120</td>\n",
" <td>72.5180</td>\n",
" <td>-1220.880</td>\n",
" <td>380.436</td>\n",
" <td>-1256.93</td>\n",
" <td>515.805</td>\n",
" <td>-1413.180</td>\n",
" <td>-802.942</td>\n",
" <td>683.254</td>\n",
" <td>-2829.40</td>\n",
" <td>...</td>\n",
" <td>155.624</td>\n",
" <td>-1189.710</td>\n",
" <td>-924.073</td>\n",
" <td>432.150</td>\n",
" <td>282.833</td>\n",
" <td>298.320</td>\n",
" <td>-2197.36</td>\n",
" <td>626.379</td>\n",
" <td>-826.727</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" max_ndvi 20150720_N 20150602_N 20150517_N 20150501_N 20150415_N \\\n",
"0 997.904 637.5950 658.668 -1882.030 -1924.36 997.904 \n",
"1 914.198 634.2400 593.705 -1625.790 -1672.32 914.198 \n",
"2 3800.810 1671.3400 1206.880 449.735 1071.21 546.371 \n",
"3 952.178 58.0174 -1599.160 210.714 -1052.63 578.807 \n",
"4 1232.120 72.5180 -1220.880 380.436 -1256.93 515.805 \n",
"\n",
" 20150330_N 20150314_N 20150226_N 20150210_N ... 20140525_N \\\n",
"0 -1739.990 630.087 -1628.240 -1325.64 ... -1043.160 \n",
"1 -692.386 707.626 -1670.590 -1408.64 ... -933.934 \n",
"2 1077.840 214.564 849.599 1283.63 ... 1566.160 \n",
"3 -1564.630 -858.390 729.790 -3162.14 ... 368.622 \n",
"4 -1413.180 -802.942 683.254 -2829.40 ... 155.624 \n",
"\n",
" 20140509_N 20140423_N 20140407_N 20140322_N 20140218_N 20140202_N \\\n",
"0 -1942.490 267.138 366.608 452.238 211.328 -2203.02 \n",
"1 -625.385 120.059 364.858 476.972 220.878 -2250.00 \n",
"2 2208.440 1056.600 385.203 300.560 293.730 2762.57 \n",
"3 -1786.950 -1227.800 304.621 291.336 369.214 -2202.12 \n",
"4 -1189.710 -924.073 432.150 282.833 298.320 -2197.36 \n",
"\n",
" 20140117_N 20140101_N class \n",
"0 -1180.190 433.906 4 \n",
"1 -1360.560 524.075 4 \n",
"2 150.931 3800.810 4 \n",
"3 600.359 -1343.550 4 \n",
"4 626.379 -826.727 4 \n",
"\n",
"[5 rows x 29 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>max_ndvi</th>\n",
" <th>20150720_N</th>\n",
" <th>20150602_N</th>\n",
" <th>20150517_N</th>\n",
" <th>20150501_N</th>\n",
" <th>20150415_N</th>\n",
" <th>20150330_N</th>\n",
" <th>20150314_N</th>\n",
" <th>20150226_N</th>\n",
" <th>20150210_N</th>\n",
" <th>...</th>\n",
" <th>20140525_N</th>\n",
" <th>20140509_N</th>\n",
" <th>20140423_N</th>\n",
" <th>20140407_N</th>\n",
" <th>20140322_N</th>\n",
" <th>20140218_N</th>\n",
" <th>20140202_N</th>\n",
" <th>20140117_N</th>\n",
" <th>20140101_N</th>\n",
" <th>class</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>...</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" <td>10545.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>7282.721268</td>\n",
" <td>5713.832981</td>\n",
" <td>4777.434284</td>\n",
" <td>4352.914883</td>\n",
" <td>5077.372030</td>\n",
" <td>2871.423540</td>\n",
" <td>4898.348680</td>\n",
" <td>3338.303406</td>\n",
" <td>4902.600296</td>\n",
" <td>4249.307925</td>\n",
" <td>...</td>\n",
" <td>3640.367446</td>\n",
" <td>3027.313647</td>\n",
" <td>3022.054677</td>\n",
" <td>2041.609136</td>\n",
" <td>2691.604363</td>\n",
" <td>2058.300423</td>\n",
" <td>6109.309315</td>\n",
" <td>2563.511596</td>\n",
" <td>2558.926018</td>\n",
" <td>0.550213</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>1603.782784</td>\n",
" <td>2283.945491</td>\n",
" <td>2735.244614</td>\n",
" <td>2870.619613</td>\n",
" <td>2512.162084</td>\n",
" <td>2675.074079</td>\n",
" <td>2578.318759</td>\n",
" <td>2421.309390</td>\n",
" <td>2691.397266</td>\n",
" <td>2777.809493</td>\n",
" <td>...</td>\n",
" <td>2298.281052</td>\n",
" <td>2054.223951</td>\n",
" <td>2176.307289</td>\n",
" <td>2020.499263</td>\n",
" <td>2408.279935</td>\n",
" <td>2212.018257</td>\n",
" <td>1944.613487</td>\n",
" <td>2336.052498</td>\n",
" <td>2413.851082</td>\n",
" <td>1.009424</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>563.444000</td>\n",
" <td>-433.735000</td>\n",
" <td>-1781.790000</td>\n",
" <td>-2939.740000</td>\n",
" <td>-3536.540000</td>\n",
" <td>-1815.630000</td>\n",
" <td>-5992.080000</td>\n",
" <td>-1677.600000</td>\n",
" <td>-2624.640000</td>\n",
" <td>-3403.050000</td>\n",
" <td>...</td>\n",
" <td>-1043.160000</td>\n",
" <td>-4869.010000</td>\n",
" <td>-1505.780000</td>\n",
" <td>-1445.370000</td>\n",
" <td>-4354.630000</td>\n",
" <td>-232.292000</td>\n",
" <td>-6807.550000</td>\n",
" <td>-2139.860000</td>\n",
" <td>-4145.250000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>7285.310000</td>\n",
" <td>4027.570000</td>\n",
" <td>2060.600000</td>\n",
" <td>1446.940000</td>\n",
" <td>2984.370000</td>\n",
" <td>526.911000</td>\n",
" <td>2456.310000</td>\n",
" <td>1017.710000</td>\n",
" <td>2321.550000</td>\n",
" <td>1379.210000</td>\n",
" <td>...</td>\n",
" <td>1392.390000</td>\n",
" <td>1405.020000</td>\n",
" <td>1010.180000</td>\n",
" <td>429.881000</td>\n",
" <td>766.451000</td>\n",
" <td>494.858000</td>\n",
" <td>5646.670000</td>\n",
" <td>689.922000</td>\n",
" <td>685.680000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>7886.260000</td>\n",
" <td>6737.730000</td>\n",
" <td>5270.020000</td>\n",
" <td>4394.340000</td>\n",
" <td>5584.070000</td>\n",
" <td>1584.970000</td>\n",
" <td>5638.400000</td>\n",
" <td>2872.980000</td>\n",
" <td>5672.730000</td>\n",
" <td>4278.880000</td>\n",
" <td>...</td>\n",
" <td>3596.680000</td>\n",
" <td>2671.400000</td>\n",
" <td>2619.180000</td>\n",
" <td>1245.900000</td>\n",
" <td>1511.180000</td>\n",
" <td>931.713000</td>\n",
" <td>6862.060000</td>\n",
" <td>1506.570000</td>\n",
" <td>1458.870000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>8121.780000</td>\n",
" <td>7589.020000</td>\n",
" <td>7484.110000</td>\n",
" <td>7317.950000</td>\n",
" <td>7440.210000</td>\n",
" <td>5460.080000</td>\n",
" <td>7245.040000</td>\n",
" <td>5516.610000</td>\n",
" <td>7395.610000</td>\n",
" <td>7144.480000</td>\n",
" <td>...</td>\n",
" <td>5817.750000</td>\n",
" <td>4174.010000</td>\n",
" <td>4837.610000</td>\n",
" <td>3016.520000</td>\n",
" <td>4508.510000</td>\n",
" <td>2950.880000</td>\n",
" <td>7378.020000</td>\n",
" <td>4208.730000</td>\n",
" <td>4112.550000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>8650.500000</td>\n",
" <td>8377.720000</td>\n",
" <td>8566.420000</td>\n",
" <td>8650.500000</td>\n",
" <td>8516.100000</td>\n",
" <td>8267.120000</td>\n",
" <td>8499.330000</td>\n",
" <td>8001.700000</td>\n",
" <td>8452.380000</td>\n",
" <td>8422.060000</td>\n",
" <td>...</td>\n",
" <td>7981.820000</td>\n",
" <td>8445.410000</td>\n",
" <td>7919.070000</td>\n",
" <td>8206.780000</td>\n",
" <td>8235.400000</td>\n",
" <td>8247.630000</td>\n",
" <td>8410.330000</td>\n",
" <td>8418.230000</td>\n",
" <td>8502.020000</td>\n",
" <td>5.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" max_ndvi 20150720_N 20150602_N 20150517_N 20150501_N \\\n",
"count 10545.000000 10545.000000 10545.000000 10545.000000 10545.000000 \n",
"mean 7282.721268 5713.832981 4777.434284 4352.914883 5077.372030 \n",
"std 1603.782784 2283.945491 2735.244614 2870.619613 2512.162084 \n",
"min 563.444000 -433.735000 -1781.790000 -2939.740000 -3536.540000 \n",
"25% 7285.310000 4027.570000 2060.600000 1446.940000 2984.370000 \n",
"50% 7886.260000 6737.730000 5270.020000 4394.340000 5584.070000 \n",
"75% 8121.780000 7589.020000 7484.110000 7317.950000 7440.210000 \n",
"max 8650.500000 8377.720000 8566.420000 8650.500000 8516.100000 \n",
"\n",
" 20150415_N 20150330_N 20150314_N 20150226_N 20150210_N \\\n",
"count 10545.000000 10545.000000 10545.000000 10545.000000 10545.000000 \n",
"mean 2871.423540 4898.348680 3338.303406 4902.600296 4249.307925 \n",
"std 2675.074079 2578.318759 2421.309390 2691.397266 2777.809493 \n",
"min -1815.630000 -5992.080000 -1677.600000 -2624.640000 -3403.050000 \n",
"25% 526.911000 2456.310000 1017.710000 2321.550000 1379.210000 \n",
"50% 1584.970000 5638.400000 2872.980000 5672.730000 4278.880000 \n",
"75% 5460.080000 7245.040000 5516.610000 7395.610000 7144.480000 \n",
"max 8267.120000 8499.330000 8001.700000 8452.380000 8422.060000 \n",
"\n",
" ... 20140525_N 20140509_N 20140423_N 20140407_N \\\n",
"count ... 10545.000000 10545.000000 10545.000000 10545.000000 \n",
"mean ... 3640.367446 3027.313647 3022.054677 2041.609136 \n",
"std ... 2298.281052 2054.223951 2176.307289 2020.499263 \n",
"min ... -1043.160000 -4869.010000 -1505.780000 -1445.370000 \n",
"25% ... 1392.390000 1405.020000 1010.180000 429.881000 \n",
"50% ... 3596.680000 2671.400000 2619.180000 1245.900000 \n",
"75% ... 5817.750000 4174.010000 4837.610000 3016.520000 \n",
"max ... 7981.820000 8445.410000 7919.070000 8206.780000 \n",
"\n",
" 20140322_N 20140218_N 20140202_N 20140117_N 20140101_N \\\n",
"count 10545.000000 10545.000000 10545.000000 10545.000000 10545.000000 \n",
"mean 2691.604363 2058.300423 6109.309315 2563.511596 2558.926018 \n",
"std 2408.279935 2212.018257 1944.613487 2336.052498 2413.851082 \n",
"min -4354.630000 -232.292000 -6807.550000 -2139.860000 -4145.250000 \n",
"25% 766.451000 494.858000 5646.670000 689.922000 685.680000 \n",
"50% 1511.180000 931.713000 6862.060000 1506.570000 1458.870000 \n",
"75% 4508.510000 2950.880000 7378.020000 4208.730000 4112.550000 \n",
"max 8235.400000 8247.630000 8410.330000 8418.230000 8502.020000 \n",
"\n",
" class \n",
"count 10545.000000 \n",
"mean 0.550213 \n",
"std 1.009424 \n",
"min 0.000000 \n",
"25% 0.000000 \n",
"50% 0.000000 \n",
"75% 1.000000 \n",
"max 5.000000 \n",
"\n",
"[8 rows x 29 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_data.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Split Data into Train and Validation"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"X = train_data.drop('class',1)\n",
"y = train_data['class']\n",
"# Validation testing\n",
"X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Define the Classifier and Train"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
" FutureWarning)\n",
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n",
" \"this warning.\", FutureWarning)\n"
]
},
{
"data": {
"text/plain": [
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n",
" multi_class='warn', n_jobs=None, penalty='l2',\n",
" random_state=None, solver='warn', tol=0.0001, verbose=0,\n",
" warm_start=False)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"classifier = LogisticRegression()\n",
"classifier.fit(X_train,y_train)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict on Validation"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_pred = classifier.predict(X_val)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Actual</th>\n",
" <th>Predicted</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1669</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4901</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3229</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6521</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2513</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8279</th>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>360</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2816</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7049</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6630</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4687</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7861</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1453</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1084</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10302</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4104</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9728</th>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3869</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4344</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7185</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5964</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2894</th>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4702</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>932</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Actual Predicted\n",
"1669 0 0\n",
"4901 0 0\n",
"3229 0 0\n",
"6521 0 0\n",
"2513 0 0\n",
"33 4 0\n",
"8279 2 0\n",
"360 0 0\n",
"2816 0 0\n",
"7049 0 0\n",
"6630 0 0\n",
"4687 0 0\n",
"7861 1 1\n",
"1453 0 0\n",
"1084 0 0\n",
"10302 3 0\n",
"4104 0 0\n",
"9728 3 0\n",
"3869 0 0\n",
"4344 0 0\n",
"7185 0 0\n",
"5964 0 0\n",
"2894 0 1\n",
"4702 0 0\n",
"932 0 0"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.DataFrame({'Actual': y_val, 'Predicted': y_pred})\n",
"df1 = df.head(25)\n",
"df1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Evaluate the Performance"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"F1 score Score: 0.8378378378378378\n",
"Log Loss Score: 5.424931880355455e-15\n"
]
}
],
"source": [
"print('F1 score Score:', metrics.f1_score(y_val, y_pred,average='micro')) \n",
"labels = [i for i in range(0,6)]\n",
"y_predy = np.zeros((len(y_val),len(labels)))\n",
"for index,val in enumerate(y_val):\n",
"\ty_predy[index][val] = 1\n",
"print('Log Loss Score:',metrics.log_loss(y_val,y_predy,labels=labels)) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load Test Set"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"test_data = pd.read_csv('../data/public/test.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Predict Test Set"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"y_test = classifier.predict(test_data)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame(y_test,columns=['class'])\n",
"df.to_csv('../data/public/submission.csv',index=False)"
]
},
{
"cell_type": "markdown",
"metadata": {
"collapsed": true
},
"source": [
"To participate in the challenge click [here](https://www.aicrowd.com/challenges/crdsm-crowdsourced-map-land-cover-prediction/)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.10"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment