Created
March 27, 2020 11:15
-
-
Save darthgera123/ac22597cef7108df0a9514fcb733ae15 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Baseline submission for the challenge CRDSM" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from sklearn.model_selection import train_test_split \n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"from sklearn import metrics" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train_data = pd.read_csv('../data/public/train.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Analyse Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>max_ndvi</th>\n", | |
" <th>20150720_N</th>\n", | |
" <th>20150602_N</th>\n", | |
" <th>20150517_N</th>\n", | |
" <th>20150501_N</th>\n", | |
" <th>20150415_N</th>\n", | |
" <th>20150330_N</th>\n", | |
" <th>20150314_N</th>\n", | |
" <th>20150226_N</th>\n", | |
" <th>20150210_N</th>\n", | |
" <th>...</th>\n", | |
" <th>20140525_N</th>\n", | |
" <th>20140509_N</th>\n", | |
" <th>20140423_N</th>\n", | |
" <th>20140407_N</th>\n", | |
" <th>20140322_N</th>\n", | |
" <th>20140218_N</th>\n", | |
" <th>20140202_N</th>\n", | |
" <th>20140117_N</th>\n", | |
" <th>20140101_N</th>\n", | |
" <th>class</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>997.904</td>\n", | |
" <td>637.5950</td>\n", | |
" <td>658.668</td>\n", | |
" <td>-1882.030</td>\n", | |
" <td>-1924.36</td>\n", | |
" <td>997.904</td>\n", | |
" <td>-1739.990</td>\n", | |
" <td>630.087</td>\n", | |
" <td>-1628.240</td>\n", | |
" <td>-1325.64</td>\n", | |
" <td>...</td>\n", | |
" <td>-1043.160</td>\n", | |
" <td>-1942.490</td>\n", | |
" <td>267.138</td>\n", | |
" <td>366.608</td>\n", | |
" <td>452.238</td>\n", | |
" <td>211.328</td>\n", | |
" <td>-2203.02</td>\n", | |
" <td>-1180.190</td>\n", | |
" <td>433.906</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>914.198</td>\n", | |
" <td>634.2400</td>\n", | |
" <td>593.705</td>\n", | |
" <td>-1625.790</td>\n", | |
" <td>-1672.32</td>\n", | |
" <td>914.198</td>\n", | |
" <td>-692.386</td>\n", | |
" <td>707.626</td>\n", | |
" <td>-1670.590</td>\n", | |
" <td>-1408.64</td>\n", | |
" <td>...</td>\n", | |
" <td>-933.934</td>\n", | |
" <td>-625.385</td>\n", | |
" <td>120.059</td>\n", | |
" <td>364.858</td>\n", | |
" <td>476.972</td>\n", | |
" <td>220.878</td>\n", | |
" <td>-2250.00</td>\n", | |
" <td>-1360.560</td>\n", | |
" <td>524.075</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>3800.810</td>\n", | |
" <td>1671.3400</td>\n", | |
" <td>1206.880</td>\n", | |
" <td>449.735</td>\n", | |
" <td>1071.21</td>\n", | |
" <td>546.371</td>\n", | |
" <td>1077.840</td>\n", | |
" <td>214.564</td>\n", | |
" <td>849.599</td>\n", | |
" <td>1283.63</td>\n", | |
" <td>...</td>\n", | |
" <td>1566.160</td>\n", | |
" <td>2208.440</td>\n", | |
" <td>1056.600</td>\n", | |
" <td>385.203</td>\n", | |
" <td>300.560</td>\n", | |
" <td>293.730</td>\n", | |
" <td>2762.57</td>\n", | |
" <td>150.931</td>\n", | |
" <td>3800.810</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>952.178</td>\n", | |
" <td>58.0174</td>\n", | |
" <td>-1599.160</td>\n", | |
" <td>210.714</td>\n", | |
" <td>-1052.63</td>\n", | |
" <td>578.807</td>\n", | |
" <td>-1564.630</td>\n", | |
" <td>-858.390</td>\n", | |
" <td>729.790</td>\n", | |
" <td>-3162.14</td>\n", | |
" <td>...</td>\n", | |
" <td>368.622</td>\n", | |
" <td>-1786.950</td>\n", | |
" <td>-1227.800</td>\n", | |
" <td>304.621</td>\n", | |
" <td>291.336</td>\n", | |
" <td>369.214</td>\n", | |
" <td>-2202.12</td>\n", | |
" <td>600.359</td>\n", | |
" <td>-1343.550</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1232.120</td>\n", | |
" <td>72.5180</td>\n", | |
" <td>-1220.880</td>\n", | |
" <td>380.436</td>\n", | |
" <td>-1256.93</td>\n", | |
" <td>515.805</td>\n", | |
" <td>-1413.180</td>\n", | |
" <td>-802.942</td>\n", | |
" <td>683.254</td>\n", | |
" <td>-2829.40</td>\n", | |
" <td>...</td>\n", | |
" <td>155.624</td>\n", | |
" <td>-1189.710</td>\n", | |
" <td>-924.073</td>\n", | |
" <td>432.150</td>\n", | |
" <td>282.833</td>\n", | |
" <td>298.320</td>\n", | |
" <td>-2197.36</td>\n", | |
" <td>626.379</td>\n", | |
" <td>-826.727</td>\n", | |
" <td>4</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 29 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" max_ndvi 20150720_N 20150602_N 20150517_N 20150501_N 20150415_N \\\n", | |
"0 997.904 637.5950 658.668 -1882.030 -1924.36 997.904 \n", | |
"1 914.198 634.2400 593.705 -1625.790 -1672.32 914.198 \n", | |
"2 3800.810 1671.3400 1206.880 449.735 1071.21 546.371 \n", | |
"3 952.178 58.0174 -1599.160 210.714 -1052.63 578.807 \n", | |
"4 1232.120 72.5180 -1220.880 380.436 -1256.93 515.805 \n", | |
"\n", | |
" 20150330_N 20150314_N 20150226_N 20150210_N ... 20140525_N \\\n", | |
"0 -1739.990 630.087 -1628.240 -1325.64 ... -1043.160 \n", | |
"1 -692.386 707.626 -1670.590 -1408.64 ... -933.934 \n", | |
"2 1077.840 214.564 849.599 1283.63 ... 1566.160 \n", | |
"3 -1564.630 -858.390 729.790 -3162.14 ... 368.622 \n", | |
"4 -1413.180 -802.942 683.254 -2829.40 ... 155.624 \n", | |
"\n", | |
" 20140509_N 20140423_N 20140407_N 20140322_N 20140218_N 20140202_N \\\n", | |
"0 -1942.490 267.138 366.608 452.238 211.328 -2203.02 \n", | |
"1 -625.385 120.059 364.858 476.972 220.878 -2250.00 \n", | |
"2 2208.440 1056.600 385.203 300.560 293.730 2762.57 \n", | |
"3 -1786.950 -1227.800 304.621 291.336 369.214 -2202.12 \n", | |
"4 -1189.710 -924.073 432.150 282.833 298.320 -2197.36 \n", | |
"\n", | |
" 20140117_N 20140101_N class \n", | |
"0 -1180.190 433.906 4 \n", | |
"1 -1360.560 524.075 4 \n", | |
"2 150.931 3800.810 4 \n", | |
"3 600.359 -1343.550 4 \n", | |
"4 626.379 -826.727 4 \n", | |
"\n", | |
"[5 rows x 29 columns]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>max_ndvi</th>\n", | |
" <th>20150720_N</th>\n", | |
" <th>20150602_N</th>\n", | |
" <th>20150517_N</th>\n", | |
" <th>20150501_N</th>\n", | |
" <th>20150415_N</th>\n", | |
" <th>20150330_N</th>\n", | |
" <th>20150314_N</th>\n", | |
" <th>20150226_N</th>\n", | |
" <th>20150210_N</th>\n", | |
" <th>...</th>\n", | |
" <th>20140525_N</th>\n", | |
" <th>20140509_N</th>\n", | |
" <th>20140423_N</th>\n", | |
" <th>20140407_N</th>\n", | |
" <th>20140322_N</th>\n", | |
" <th>20140218_N</th>\n", | |
" <th>20140202_N</th>\n", | |
" <th>20140117_N</th>\n", | |
" <th>20140101_N</th>\n", | |
" <th>class</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" <td>10545.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>7282.721268</td>\n", | |
" <td>5713.832981</td>\n", | |
" <td>4777.434284</td>\n", | |
" <td>4352.914883</td>\n", | |
" <td>5077.372030</td>\n", | |
" <td>2871.423540</td>\n", | |
" <td>4898.348680</td>\n", | |
" <td>3338.303406</td>\n", | |
" <td>4902.600296</td>\n", | |
" <td>4249.307925</td>\n", | |
" <td>...</td>\n", | |
" <td>3640.367446</td>\n", | |
" <td>3027.313647</td>\n", | |
" <td>3022.054677</td>\n", | |
" <td>2041.609136</td>\n", | |
" <td>2691.604363</td>\n", | |
" <td>2058.300423</td>\n", | |
" <td>6109.309315</td>\n", | |
" <td>2563.511596</td>\n", | |
" <td>2558.926018</td>\n", | |
" <td>0.550213</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>1603.782784</td>\n", | |
" <td>2283.945491</td>\n", | |
" <td>2735.244614</td>\n", | |
" <td>2870.619613</td>\n", | |
" <td>2512.162084</td>\n", | |
" <td>2675.074079</td>\n", | |
" <td>2578.318759</td>\n", | |
" <td>2421.309390</td>\n", | |
" <td>2691.397266</td>\n", | |
" <td>2777.809493</td>\n", | |
" <td>...</td>\n", | |
" <td>2298.281052</td>\n", | |
" <td>2054.223951</td>\n", | |
" <td>2176.307289</td>\n", | |
" <td>2020.499263</td>\n", | |
" <td>2408.279935</td>\n", | |
" <td>2212.018257</td>\n", | |
" <td>1944.613487</td>\n", | |
" <td>2336.052498</td>\n", | |
" <td>2413.851082</td>\n", | |
" <td>1.009424</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>563.444000</td>\n", | |
" <td>-433.735000</td>\n", | |
" <td>-1781.790000</td>\n", | |
" <td>-2939.740000</td>\n", | |
" <td>-3536.540000</td>\n", | |
" <td>-1815.630000</td>\n", | |
" <td>-5992.080000</td>\n", | |
" <td>-1677.600000</td>\n", | |
" <td>-2624.640000</td>\n", | |
" <td>-3403.050000</td>\n", | |
" <td>...</td>\n", | |
" <td>-1043.160000</td>\n", | |
" <td>-4869.010000</td>\n", | |
" <td>-1505.780000</td>\n", | |
" <td>-1445.370000</td>\n", | |
" <td>-4354.630000</td>\n", | |
" <td>-232.292000</td>\n", | |
" <td>-6807.550000</td>\n", | |
" <td>-2139.860000</td>\n", | |
" <td>-4145.250000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>7285.310000</td>\n", | |
" <td>4027.570000</td>\n", | |
" <td>2060.600000</td>\n", | |
" <td>1446.940000</td>\n", | |
" <td>2984.370000</td>\n", | |
" <td>526.911000</td>\n", | |
" <td>2456.310000</td>\n", | |
" <td>1017.710000</td>\n", | |
" <td>2321.550000</td>\n", | |
" <td>1379.210000</td>\n", | |
" <td>...</td>\n", | |
" <td>1392.390000</td>\n", | |
" <td>1405.020000</td>\n", | |
" <td>1010.180000</td>\n", | |
" <td>429.881000</td>\n", | |
" <td>766.451000</td>\n", | |
" <td>494.858000</td>\n", | |
" <td>5646.670000</td>\n", | |
" <td>689.922000</td>\n", | |
" <td>685.680000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>7886.260000</td>\n", | |
" <td>6737.730000</td>\n", | |
" <td>5270.020000</td>\n", | |
" <td>4394.340000</td>\n", | |
" <td>5584.070000</td>\n", | |
" <td>1584.970000</td>\n", | |
" <td>5638.400000</td>\n", | |
" <td>2872.980000</td>\n", | |
" <td>5672.730000</td>\n", | |
" <td>4278.880000</td>\n", | |
" <td>...</td>\n", | |
" <td>3596.680000</td>\n", | |
" <td>2671.400000</td>\n", | |
" <td>2619.180000</td>\n", | |
" <td>1245.900000</td>\n", | |
" <td>1511.180000</td>\n", | |
" <td>931.713000</td>\n", | |
" <td>6862.060000</td>\n", | |
" <td>1506.570000</td>\n", | |
" <td>1458.870000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>8121.780000</td>\n", | |
" <td>7589.020000</td>\n", | |
" <td>7484.110000</td>\n", | |
" <td>7317.950000</td>\n", | |
" <td>7440.210000</td>\n", | |
" <td>5460.080000</td>\n", | |
" <td>7245.040000</td>\n", | |
" <td>5516.610000</td>\n", | |
" <td>7395.610000</td>\n", | |
" <td>7144.480000</td>\n", | |
" <td>...</td>\n", | |
" <td>5817.750000</td>\n", | |
" <td>4174.010000</td>\n", | |
" <td>4837.610000</td>\n", | |
" <td>3016.520000</td>\n", | |
" <td>4508.510000</td>\n", | |
" <td>2950.880000</td>\n", | |
" <td>7378.020000</td>\n", | |
" <td>4208.730000</td>\n", | |
" <td>4112.550000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>8650.500000</td>\n", | |
" <td>8377.720000</td>\n", | |
" <td>8566.420000</td>\n", | |
" <td>8650.500000</td>\n", | |
" <td>8516.100000</td>\n", | |
" <td>8267.120000</td>\n", | |
" <td>8499.330000</td>\n", | |
" <td>8001.700000</td>\n", | |
" <td>8452.380000</td>\n", | |
" <td>8422.060000</td>\n", | |
" <td>...</td>\n", | |
" <td>7981.820000</td>\n", | |
" <td>8445.410000</td>\n", | |
" <td>7919.070000</td>\n", | |
" <td>8206.780000</td>\n", | |
" <td>8235.400000</td>\n", | |
" <td>8247.630000</td>\n", | |
" <td>8410.330000</td>\n", | |
" <td>8418.230000</td>\n", | |
" <td>8502.020000</td>\n", | |
" <td>5.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8 rows × 29 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" max_ndvi 20150720_N 20150602_N 20150517_N 20150501_N \\\n", | |
"count 10545.000000 10545.000000 10545.000000 10545.000000 10545.000000 \n", | |
"mean 7282.721268 5713.832981 4777.434284 4352.914883 5077.372030 \n", | |
"std 1603.782784 2283.945491 2735.244614 2870.619613 2512.162084 \n", | |
"min 563.444000 -433.735000 -1781.790000 -2939.740000 -3536.540000 \n", | |
"25% 7285.310000 4027.570000 2060.600000 1446.940000 2984.370000 \n", | |
"50% 7886.260000 6737.730000 5270.020000 4394.340000 5584.070000 \n", | |
"75% 8121.780000 7589.020000 7484.110000 7317.950000 7440.210000 \n", | |
"max 8650.500000 8377.720000 8566.420000 8650.500000 8516.100000 \n", | |
"\n", | |
" 20150415_N 20150330_N 20150314_N 20150226_N 20150210_N \\\n", | |
"count 10545.000000 10545.000000 10545.000000 10545.000000 10545.000000 \n", | |
"mean 2871.423540 4898.348680 3338.303406 4902.600296 4249.307925 \n", | |
"std 2675.074079 2578.318759 2421.309390 2691.397266 2777.809493 \n", | |
"min -1815.630000 -5992.080000 -1677.600000 -2624.640000 -3403.050000 \n", | |
"25% 526.911000 2456.310000 1017.710000 2321.550000 1379.210000 \n", | |
"50% 1584.970000 5638.400000 2872.980000 5672.730000 4278.880000 \n", | |
"75% 5460.080000 7245.040000 5516.610000 7395.610000 7144.480000 \n", | |
"max 8267.120000 8499.330000 8001.700000 8452.380000 8422.060000 \n", | |
"\n", | |
" ... 20140525_N 20140509_N 20140423_N 20140407_N \\\n", | |
"count ... 10545.000000 10545.000000 10545.000000 10545.000000 \n", | |
"mean ... 3640.367446 3027.313647 3022.054677 2041.609136 \n", | |
"std ... 2298.281052 2054.223951 2176.307289 2020.499263 \n", | |
"min ... -1043.160000 -4869.010000 -1505.780000 -1445.370000 \n", | |
"25% ... 1392.390000 1405.020000 1010.180000 429.881000 \n", | |
"50% ... 3596.680000 2671.400000 2619.180000 1245.900000 \n", | |
"75% ... 5817.750000 4174.010000 4837.610000 3016.520000 \n", | |
"max ... 7981.820000 8445.410000 7919.070000 8206.780000 \n", | |
"\n", | |
" 20140322_N 20140218_N 20140202_N 20140117_N 20140101_N \\\n", | |
"count 10545.000000 10545.000000 10545.000000 10545.000000 10545.000000 \n", | |
"mean 2691.604363 2058.300423 6109.309315 2563.511596 2558.926018 \n", | |
"std 2408.279935 2212.018257 1944.613487 2336.052498 2413.851082 \n", | |
"min -4354.630000 -232.292000 -6807.550000 -2139.860000 -4145.250000 \n", | |
"25% 766.451000 494.858000 5646.670000 689.922000 685.680000 \n", | |
"50% 1511.180000 931.713000 6862.060000 1506.570000 1458.870000 \n", | |
"75% 4508.510000 2950.880000 7378.020000 4208.730000 4112.550000 \n", | |
"max 8235.400000 8247.630000 8410.330000 8418.230000 8502.020000 \n", | |
"\n", | |
" class \n", | |
"count 10545.000000 \n", | |
"mean 0.550213 \n", | |
"std 1.009424 \n", | |
"min 0.000000 \n", | |
"25% 0.000000 \n", | |
"50% 0.000000 \n", | |
"75% 1.000000 \n", | |
"max 5.000000 \n", | |
"\n", | |
"[8 rows x 29 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_data.describe()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Split Data into Train and Validation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"X = train_data.drop('class',1)\n", | |
"y = train_data['class']\n", | |
"# Validation testing\n", | |
"X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Define the Classifier and Train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", | |
" FutureWarning)\n", | |
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:469: FutureWarning: Default multi_class will be changed to 'auto' in 0.22. Specify the multi_class option to silence this warning.\n", | |
" \"this warning.\", FutureWarning)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", | |
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n", | |
" multi_class='warn', n_jobs=None, penalty='l2',\n", | |
" random_state=None, solver='warn', tol=0.0001, verbose=0,\n", | |
" warm_start=False)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"classifier = LogisticRegression()\n", | |
"classifier.fit(X_train,y_train)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Predict on Validation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"y_pred = classifier.predict(X_val)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Actual</th>\n", | |
" <th>Predicted</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>1669</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4901</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3229</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6521</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2513</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>33</th>\n", | |
" <td>4</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8279</th>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>360</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2816</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7049</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6630</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4687</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7861</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1453</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1084</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10302</th>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4104</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9728</th>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3869</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4344</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7185</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5964</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2894</th>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4702</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>932</th>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Actual Predicted\n", | |
"1669 0 0\n", | |
"4901 0 0\n", | |
"3229 0 0\n", | |
"6521 0 0\n", | |
"2513 0 0\n", | |
"33 4 0\n", | |
"8279 2 0\n", | |
"360 0 0\n", | |
"2816 0 0\n", | |
"7049 0 0\n", | |
"6630 0 0\n", | |
"4687 0 0\n", | |
"7861 1 1\n", | |
"1453 0 0\n", | |
"1084 0 0\n", | |
"10302 3 0\n", | |
"4104 0 0\n", | |
"9728 3 0\n", | |
"3869 0 0\n", | |
"4344 0 0\n", | |
"7185 0 0\n", | |
"5964 0 0\n", | |
"2894 0 1\n", | |
"4702 0 0\n", | |
"932 0 0" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame({'Actual': y_val, 'Predicted': y_pred})\n", | |
"df1 = df.head(25)\n", | |
"df1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Evaluate the Performance" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 23, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"F1 score Score: 0.8378378378378378\n", | |
"Log Loss Score: 5.424931880355455e-15\n" | |
] | |
} | |
], | |
"source": [ | |
"print('F1 score Score:', metrics.f1_score(y_val, y_pred,average='micro')) \n", | |
"labels = [i for i in range(0,6)]\n", | |
"y_predy = np.zeros((len(y_val),len(labels)))\n", | |
"for index,val in enumerate(y_val):\n", | |
"\ty_predy[index][val] = 1\n", | |
"print('Log Loss Score:',metrics.log_loss(y_val,y_predy,labels=labels)) " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load Test Set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test_data = pd.read_csv('../data/public/test.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Predict Test Set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 26, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"y_test = classifier.predict(test_data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 28, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(y_test,columns=['class'])\n", | |
"df.to_csv('../data/public/submission.csv',index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"To participate in the challenge click [here](https://www.aicrowd.com/challenges/crdsm-crowdsourced-map-land-cover-prediction/)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment