Created
March 27, 2020 11:17
-
-
Save darthgera123/73fbe516c7e3d3aea1c0f7f26dd6f7d5 to your computer and use it in GitHub Desktop.
Baseline for DOTAW
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"# Baseline for the challenge DOTAW" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"import numpy as np\n", | |
"import pandas as pd\n", | |
"from sklearn.model_selection import train_test_split \n", | |
"from sklearn.linear_model import LogisticRegression\n", | |
"from sklearn import metrics" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"train_data = pd.read_csv('../data/public/train.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Analyse Data" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>winner</th>\n", | |
" <th>cluster_id</th>\n", | |
" <th>game_mode</th>\n", | |
" <th>game_type</th>\n", | |
" <th>hero_0</th>\n", | |
" <th>hero_1</th>\n", | |
" <th>hero_2</th>\n", | |
" <th>hero_3</th>\n", | |
" <th>hero_4</th>\n", | |
" <th>hero_5</th>\n", | |
" <th>...</th>\n", | |
" <th>hero_103</th>\n", | |
" <th>hero_104</th>\n", | |
" <th>hero_105</th>\n", | |
" <th>hero_106</th>\n", | |
" <th>hero_107</th>\n", | |
" <th>hero_108</th>\n", | |
" <th>hero_109</th>\n", | |
" <th>hero_110</th>\n", | |
" <th>hero_111</th>\n", | |
" <th>hero_112</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>-1</td>\n", | |
" <td>223</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1</td>\n", | |
" <td>152</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>-1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1</td>\n", | |
" <td>131</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>1</td>\n", | |
" <td>0</td>\n", | |
" <td>-1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>1</td>\n", | |
" <td>154</td>\n", | |
" <td>2</td>\n", | |
" <td>2</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>-1</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>-1</td>\n", | |
" <td>171</td>\n", | |
" <td>2</td>\n", | |
" <td>3</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>-1</td>\n", | |
" <td>...</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 117 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" winner cluster_id game_mode game_type hero_0 hero_1 hero_2 hero_3 \\\n", | |
"0 -1 223 2 2 0 0 0 0 \n", | |
"1 1 152 2 2 0 0 0 1 \n", | |
"2 1 131 2 2 0 0 0 1 \n", | |
"3 1 154 2 2 0 0 0 0 \n", | |
"4 -1 171 2 3 0 0 0 0 \n", | |
"\n", | |
" hero_4 hero_5 ... hero_103 hero_104 hero_105 hero_106 hero_107 \\\n", | |
"0 0 0 ... 0 0 0 0 0 \n", | |
"1 0 -1 ... 0 0 0 0 0 \n", | |
"2 0 -1 ... 0 0 0 0 0 \n", | |
"3 0 0 ... -1 0 0 0 0 \n", | |
"4 0 -1 ... 0 0 0 0 0 \n", | |
"\n", | |
" hero_108 hero_109 hero_110 hero_111 hero_112 \n", | |
"0 0 0 0 0 0 \n", | |
"1 0 0 0 0 0 \n", | |
"2 0 0 0 0 0 \n", | |
"3 0 0 0 0 0 \n", | |
"4 0 0 0 0 0 \n", | |
"\n", | |
"[5 rows x 117 columns]" | |
] | |
}, | |
"execution_count": 4, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_data.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>winner</th>\n", | |
" <th>cluster_id</th>\n", | |
" <th>game_mode</th>\n", | |
" <th>game_type</th>\n", | |
" <th>hero_0</th>\n", | |
" <th>hero_1</th>\n", | |
" <th>hero_2</th>\n", | |
" <th>hero_3</th>\n", | |
" <th>hero_4</th>\n", | |
" <th>hero_5</th>\n", | |
" <th>...</th>\n", | |
" <th>hero_103</th>\n", | |
" <th>hero_104</th>\n", | |
" <th>hero_105</th>\n", | |
" <th>hero_106</th>\n", | |
" <th>hero_107</th>\n", | |
" <th>hero_108</th>\n", | |
" <th>hero_109</th>\n", | |
" <th>hero_110</th>\n", | |
" <th>hero_111</th>\n", | |
" <th>hero_112</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.0</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" <td>92650.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>0.053038</td>\n", | |
" <td>175.864145</td>\n", | |
" <td>3.317572</td>\n", | |
" <td>2.384587</td>\n", | |
" <td>-0.001630</td>\n", | |
" <td>-0.000971</td>\n", | |
" <td>0.000691</td>\n", | |
" <td>-0.000799</td>\n", | |
" <td>-0.002008</td>\n", | |
" <td>0.003173</td>\n", | |
" <td>...</td>\n", | |
" <td>-0.001371</td>\n", | |
" <td>-0.000950</td>\n", | |
" <td>0.000885</td>\n", | |
" <td>0.000594</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.001025</td>\n", | |
" <td>0.000648</td>\n", | |
" <td>-0.000227</td>\n", | |
" <td>-0.000043</td>\n", | |
" <td>0.000896</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>0.998598</td>\n", | |
" <td>35.658214</td>\n", | |
" <td>2.633070</td>\n", | |
" <td>0.486833</td>\n", | |
" <td>0.402004</td>\n", | |
" <td>0.467672</td>\n", | |
" <td>0.165052</td>\n", | |
" <td>0.355393</td>\n", | |
" <td>0.329348</td>\n", | |
" <td>0.483950</td>\n", | |
" <td>...</td>\n", | |
" <td>0.535024</td>\n", | |
" <td>0.206112</td>\n", | |
" <td>0.283985</td>\n", | |
" <td>0.155940</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.220703</td>\n", | |
" <td>0.204166</td>\n", | |
" <td>0.168707</td>\n", | |
" <td>0.189868</td>\n", | |
" <td>0.139033</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>-1.000000</td>\n", | |
" <td>111.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" <td>-1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>-1.000000</td>\n", | |
" <td>152.000000</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>156.000000</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>223.000000</td>\n", | |
" <td>2.000000</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" <td>0.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>1.000000</td>\n", | |
" <td>261.000000</td>\n", | |
" <td>9.000000</td>\n", | |
" <td>3.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>...</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>8 rows × 117 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" winner cluster_id game_mode game_type hero_0 \\\n", | |
"count 92650.000000 92650.000000 92650.000000 92650.000000 92650.000000 \n", | |
"mean 0.053038 175.864145 3.317572 2.384587 -0.001630 \n", | |
"std 0.998598 35.658214 2.633070 0.486833 0.402004 \n", | |
"min -1.000000 111.000000 1.000000 1.000000 -1.000000 \n", | |
"25% -1.000000 152.000000 2.000000 2.000000 0.000000 \n", | |
"50% 1.000000 156.000000 2.000000 2.000000 0.000000 \n", | |
"75% 1.000000 223.000000 2.000000 3.000000 0.000000 \n", | |
"max 1.000000 261.000000 9.000000 3.000000 1.000000 \n", | |
"\n", | |
" hero_1 hero_2 hero_3 hero_4 hero_5 \\\n", | |
"count 92650.000000 92650.000000 92650.000000 92650.000000 92650.000000 \n", | |
"mean -0.000971 0.000691 -0.000799 -0.002008 0.003173 \n", | |
"std 0.467672 0.165052 0.355393 0.329348 0.483950 \n", | |
"min -1.000000 -1.000000 -1.000000 -1.000000 -1.000000 \n", | |
"25% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"50% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"75% 0.000000 0.000000 0.000000 0.000000 0.000000 \n", | |
"max 1.000000 1.000000 1.000000 1.000000 1.000000 \n", | |
"\n", | |
" ... hero_103 hero_104 hero_105 hero_106 \\\n", | |
"count ... 92650.000000 92650.000000 92650.000000 92650.000000 \n", | |
"mean ... -0.001371 -0.000950 0.000885 0.000594 \n", | |
"std ... 0.535024 0.206112 0.283985 0.155940 \n", | |
"min ... -1.000000 -1.000000 -1.000000 -1.000000 \n", | |
"25% ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"50% ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"75% ... 0.000000 0.000000 0.000000 0.000000 \n", | |
"max ... 1.000000 1.000000 1.000000 1.000000 \n", | |
"\n", | |
" hero_107 hero_108 hero_109 hero_110 hero_111 \\\n", | |
"count 92650.0 92650.000000 92650.000000 92650.000000 92650.000000 \n", | |
"mean 0.0 0.001025 0.000648 -0.000227 -0.000043 \n", | |
"std 0.0 0.220703 0.204166 0.168707 0.189868 \n", | |
"min 0.0 -1.000000 -1.000000 -1.000000 -1.000000 \n", | |
"25% 0.0 0.000000 0.000000 0.000000 0.000000 \n", | |
"50% 0.0 0.000000 0.000000 0.000000 0.000000 \n", | |
"75% 0.0 0.000000 0.000000 0.000000 0.000000 \n", | |
"max 0.0 1.000000 1.000000 1.000000 1.000000 \n", | |
"\n", | |
" hero_112 \n", | |
"count 92650.000000 \n", | |
"mean 0.000896 \n", | |
"std 0.139033 \n", | |
"min -1.000000 \n", | |
"25% 0.000000 \n", | |
"50% 0.000000 \n", | |
"75% 0.000000 \n", | |
"max 1.000000 \n", | |
"\n", | |
"[8 rows x 117 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"train_data.describe()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Split Data into Train and Validation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"X = train_data.drop('winner',1)\n", | |
"y = train_data['winner']\n", | |
"# Validation testing\n", | |
"X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Define the Classifier and Train" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/gera/anaconda3/lib/python3.6/site-packages/sklearn/linear_model/logistic.py:432: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n", | |
" FutureWarning)\n" | |
] | |
}, | |
{ | |
"data": { | |
"text/plain": [ | |
"LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n", | |
" intercept_scaling=1, l1_ratio=None, max_iter=100,\n", | |
" multi_class='warn', n_jobs=None, penalty='l2',\n", | |
" random_state=None, solver='warn', tol=0.0001, verbose=0,\n", | |
" warm_start=False)" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"classifier = LogisticRegression()\n", | |
"classifier.fit(X_train,y_train)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Predict on Validation" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"y_pred = classifier.predict(X_val)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style>\n", | |
" .dataframe thead tr:only-child th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: left;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Actual</th>\n", | |
" <th>Predicted</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>26389</th>\n", | |
" <td>1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>55196</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>51250</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25508</th>\n", | |
" <td>1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>24128</th>\n", | |
" <td>1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2442</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5638</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3714</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>36579</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>10399</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13464</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>71600</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>80162</th>\n", | |
" <td>1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7077</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>63431</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>78584</th>\n", | |
" <td>1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>31413</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13393</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>90845</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23339</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>13756</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>63563</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>81880</th>\n", | |
" <td>-1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>77591</th>\n", | |
" <td>-1</td>\n", | |
" <td>-1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>23311</th>\n", | |
" <td>1</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" Actual Predicted\n", | |
"26389 1 -1\n", | |
"55196 -1 1\n", | |
"51250 -1 1\n", | |
"25508 1 -1\n", | |
"24128 1 -1\n", | |
"2442 -1 -1\n", | |
"5638 -1 -1\n", | |
"3714 -1 1\n", | |
"36579 -1 1\n", | |
"10399 -1 -1\n", | |
"13464 -1 -1\n", | |
"71600 -1 1\n", | |
"80162 1 -1\n", | |
"7077 1 1\n", | |
"63431 -1 1\n", | |
"78584 1 -1\n", | |
"31413 1 1\n", | |
"13393 1 1\n", | |
"90845 1 1\n", | |
"23339 -1 -1\n", | |
"13756 -1 1\n", | |
"63563 -1 -1\n", | |
"81880 -1 1\n", | |
"77591 -1 -1\n", | |
"23311 1 1" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = pd.DataFrame({'Actual': y_val, 'Predicted': y_pred})\n", | |
"df1 = df.head(25)\n", | |
"df1" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Evaluate the Performance" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"F1 score Error: 0.638888888888889\n", | |
"ROC AUC Error: 0.5928579002999843\n" | |
] | |
} | |
], | |
"source": [ | |
"print('F1 score Score:', metrics.f1_score(y_val, y_pred)) \n", | |
"print('ROC AUC Score:', metrics.roc_auc_score(y_val, y_pred)) " | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Load Test Set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"test_data = pd.read_csv('../data/public/test.csv')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"## Predict Test Set" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"y_test = classifier.predict(test_data)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [ | |
"df = pd.DataFrame(y_test,columns=['winner'])\n", | |
"df.to_csv('../data/public/submission.csv',index=False)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"collapsed": true | |
}, | |
"source": [ | |
"To participate in the challenge click [here](https://www.aicrowd.com/challenges/dotaw-dota-2-prediction/)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"collapsed": true | |
}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.10" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment