Last active
March 31, 2021 00:54
-
-
Save pikonha/afbf2270f1beb6f80b2b9add4b1a7267 to your computer and use it in GitHub Desktop.
cancer-analysis.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Detecção de Câncar de Mama.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/picolloo/afbf2270f1beb6f80b2b9add4b1a7267/detec-o-de-c-ncar-de-mama.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "7yEsLd3bk0h7" | |
}, | |
"source": [ | |
"Disciplina de Data Mining - INE5644\n", | |
"## Exercício 1 - Detecção de Câncer de Mama (Classificação)\n", | |
"\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "iHra4Wdrkz4U" | |
}, | |
"source": [ | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"import pandas as pd" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "xq9DCd3rlcnL" | |
}, | |
"source": [ | |
"###0. Carregamento do conjunto de dados" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "5xA7ndkBlIEe", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "b3aa6001-9e99-4289-a39b-bc1dcba09329" | |
}, | |
"source": [ | |
"from sklearn.datasets import load_breast_cancer\n", | |
"cancer = load_breast_cancer()\n", | |
"\n", | |
"#formato dos datasets no sklearn\n", | |
"cancer.keys()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 2 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "S_CP2rPyCvUM", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 258 | |
}, | |
"outputId": "a110845c-e7f5-4614-eb6a-1366853c797a" | |
}, | |
"source": [ | |
"df = pd.DataFrame(cancer['data'])\n", | |
"df.columns = cancer['feature_names']\n", | |
"df['target'] = cancer['target']\n", | |
"df.head()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mean radius</th>\n", | |
" <th>mean texture</th>\n", | |
" <th>mean perimeter</th>\n", | |
" <th>mean area</th>\n", | |
" <th>mean smoothness</th>\n", | |
" <th>mean compactness</th>\n", | |
" <th>mean concavity</th>\n", | |
" <th>mean concave points</th>\n", | |
" <th>mean symmetry</th>\n", | |
" <th>mean fractal dimension</th>\n", | |
" <th>radius error</th>\n", | |
" <th>texture error</th>\n", | |
" <th>perimeter error</th>\n", | |
" <th>area error</th>\n", | |
" <th>smoothness error</th>\n", | |
" <th>compactness error</th>\n", | |
" <th>concavity error</th>\n", | |
" <th>concave points error</th>\n", | |
" <th>symmetry error</th>\n", | |
" <th>fractal dimension error</th>\n", | |
" <th>worst radius</th>\n", | |
" <th>worst texture</th>\n", | |
" <th>worst perimeter</th>\n", | |
" <th>worst area</th>\n", | |
" <th>worst smoothness</th>\n", | |
" <th>worst compactness</th>\n", | |
" <th>worst concavity</th>\n", | |
" <th>worst concave points</th>\n", | |
" <th>worst symmetry</th>\n", | |
" <th>worst fractal dimension</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.097064</td>\n", | |
" <td>-2.073335</td>\n", | |
" <td>1.269934</td>\n", | |
" <td>0.984375</td>\n", | |
" <td>1.568466</td>\n", | |
" <td>3.283515</td>\n", | |
" <td>2.652874</td>\n", | |
" <td>2.532475</td>\n", | |
" <td>2.217515</td>\n", | |
" <td>2.255747</td>\n", | |
" <td>2.489734</td>\n", | |
" <td>-0.565265</td>\n", | |
" <td>2.833031</td>\n", | |
" <td>2.487578</td>\n", | |
" <td>-0.214002</td>\n", | |
" <td>1.316862</td>\n", | |
" <td>0.724026</td>\n", | |
" <td>0.660820</td>\n", | |
" <td>1.148757</td>\n", | |
" <td>0.907083</td>\n", | |
" <td>1.886690</td>\n", | |
" <td>-1.359293</td>\n", | |
" <td>2.303601</td>\n", | |
" <td>2.001237</td>\n", | |
" <td>1.307686</td>\n", | |
" <td>2.616665</td>\n", | |
" <td>2.109526</td>\n", | |
" <td>2.296076</td>\n", | |
" <td>2.750622</td>\n", | |
" <td>1.937015</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.829821</td>\n", | |
" <td>-0.353632</td>\n", | |
" <td>1.685955</td>\n", | |
" <td>1.908708</td>\n", | |
" <td>-0.826962</td>\n", | |
" <td>-0.487072</td>\n", | |
" <td>-0.023846</td>\n", | |
" <td>0.548144</td>\n", | |
" <td>0.001392</td>\n", | |
" <td>-0.868652</td>\n", | |
" <td>0.499255</td>\n", | |
" <td>-0.876244</td>\n", | |
" <td>0.263327</td>\n", | |
" <td>0.742402</td>\n", | |
" <td>-0.605351</td>\n", | |
" <td>-0.692926</td>\n", | |
" <td>-0.440780</td>\n", | |
" <td>0.260162</td>\n", | |
" <td>-0.805450</td>\n", | |
" <td>-0.099444</td>\n", | |
" <td>1.805927</td>\n", | |
" <td>-0.369203</td>\n", | |
" <td>1.535126</td>\n", | |
" <td>1.890489</td>\n", | |
" <td>-0.375612</td>\n", | |
" <td>-0.430444</td>\n", | |
" <td>-0.146749</td>\n", | |
" <td>1.087084</td>\n", | |
" <td>-0.243890</td>\n", | |
" <td>0.281190</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.579888</td>\n", | |
" <td>0.456187</td>\n", | |
" <td>1.566503</td>\n", | |
" <td>1.558884</td>\n", | |
" <td>0.942210</td>\n", | |
" <td>1.052926</td>\n", | |
" <td>1.363478</td>\n", | |
" <td>2.037231</td>\n", | |
" <td>0.939685</td>\n", | |
" <td>-0.398008</td>\n", | |
" <td>1.228676</td>\n", | |
" <td>-0.780083</td>\n", | |
" <td>0.850928</td>\n", | |
" <td>1.181336</td>\n", | |
" <td>-0.297005</td>\n", | |
" <td>0.814974</td>\n", | |
" <td>0.213076</td>\n", | |
" <td>1.424827</td>\n", | |
" <td>0.237036</td>\n", | |
" <td>0.293559</td>\n", | |
" <td>1.511870</td>\n", | |
" <td>-0.023974</td>\n", | |
" <td>1.347475</td>\n", | |
" <td>1.456285</td>\n", | |
" <td>0.527407</td>\n", | |
" <td>1.082932</td>\n", | |
" <td>0.854974</td>\n", | |
" <td>1.955000</td>\n", | |
" <td>1.152255</td>\n", | |
" <td>0.201391</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-0.768909</td>\n", | |
" <td>0.253732</td>\n", | |
" <td>-0.592687</td>\n", | |
" <td>-0.764464</td>\n", | |
" <td>3.283553</td>\n", | |
" <td>3.402909</td>\n", | |
" <td>1.915897</td>\n", | |
" <td>1.451707</td>\n", | |
" <td>2.867383</td>\n", | |
" <td>4.910919</td>\n", | |
" <td>0.326373</td>\n", | |
" <td>-0.110409</td>\n", | |
" <td>0.286593</td>\n", | |
" <td>-0.288378</td>\n", | |
" <td>0.689702</td>\n", | |
" <td>2.744280</td>\n", | |
" <td>0.819518</td>\n", | |
" <td>1.115007</td>\n", | |
" <td>4.732680</td>\n", | |
" <td>2.047511</td>\n", | |
" <td>-0.281464</td>\n", | |
" <td>0.133984</td>\n", | |
" <td>-0.249939</td>\n", | |
" <td>-0.550021</td>\n", | |
" <td>3.394275</td>\n", | |
" <td>3.893397</td>\n", | |
" <td>1.989588</td>\n", | |
" <td>2.175786</td>\n", | |
" <td>6.046041</td>\n", | |
" <td>4.935010</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.750297</td>\n", | |
" <td>-1.151816</td>\n", | |
" <td>1.776573</td>\n", | |
" <td>1.826229</td>\n", | |
" <td>0.280372</td>\n", | |
" <td>0.539340</td>\n", | |
" <td>1.371011</td>\n", | |
" <td>1.428493</td>\n", | |
" <td>-0.009560</td>\n", | |
" <td>-0.562450</td>\n", | |
" <td>1.270543</td>\n", | |
" <td>-0.790244</td>\n", | |
" <td>1.273189</td>\n", | |
" <td>1.190357</td>\n", | |
" <td>1.483067</td>\n", | |
" <td>-0.048520</td>\n", | |
" <td>0.828471</td>\n", | |
" <td>1.144205</td>\n", | |
" <td>-0.361092</td>\n", | |
" <td>0.499328</td>\n", | |
" <td>1.298575</td>\n", | |
" <td>-1.466770</td>\n", | |
" <td>1.338539</td>\n", | |
" <td>1.220724</td>\n", | |
" <td>0.220556</td>\n", | |
" <td>-0.313395</td>\n", | |
" <td>0.613179</td>\n", | |
" <td>0.729259</td>\n", | |
" <td>-0.868353</td>\n", | |
" <td>-0.397100</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mean radius mean texture ... worst fractal dimension target\n", | |
"0 1.097064 -2.073335 ... 1.937015 0\n", | |
"1 1.829821 -0.353632 ... 0.281190 0\n", | |
"2 1.579888 0.456187 ... 0.201391 0\n", | |
"3 -0.768909 0.253732 ... 4.935010 0\n", | |
"4 1.750297 -1.151816 ... -0.397100 0\n", | |
"\n", | |
"[5 rows x 31 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 47 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "g1GBP7FACovY" | |
}, | |
"source": [ | |
"#carrega X e y\n", | |
"X = df[cancer['feature_names']]\n", | |
"y = df['target']\n", | |
"\n", | |
"#divide treino e teste\n", | |
"from sklearn.model_selection import train_test_split\n", | |
"X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.15)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 413 | |
}, | |
"id": "hcCyQrN0Ofbs", | |
"outputId": "47456b0d-73fb-4b5b-d616-281583edfa64" | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mean radius</th>\n", | |
" <th>mean texture</th>\n", | |
" <th>mean perimeter</th>\n", | |
" <th>mean area</th>\n", | |
" <th>mean smoothness</th>\n", | |
" <th>mean compactness</th>\n", | |
" <th>mean concavity</th>\n", | |
" <th>mean concave points</th>\n", | |
" <th>mean symmetry</th>\n", | |
" <th>mean fractal dimension</th>\n", | |
" <th>radius error</th>\n", | |
" <th>texture error</th>\n", | |
" <th>perimeter error</th>\n", | |
" <th>area error</th>\n", | |
" <th>smoothness error</th>\n", | |
" <th>compactness error</th>\n", | |
" <th>concavity error</th>\n", | |
" <th>concave points error</th>\n", | |
" <th>symmetry error</th>\n", | |
" <th>fractal dimension error</th>\n", | |
" <th>worst radius</th>\n", | |
" <th>worst texture</th>\n", | |
" <th>worst perimeter</th>\n", | |
" <th>worst area</th>\n", | |
" <th>worst smoothness</th>\n", | |
" <th>worst compactness</th>\n", | |
" <th>worst concavity</th>\n", | |
" <th>worst concave points</th>\n", | |
" <th>worst symmetry</th>\n", | |
" <th>worst fractal dimension</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>17.99</td>\n", | |
" <td>10.38</td>\n", | |
" <td>122.80</td>\n", | |
" <td>1001.0</td>\n", | |
" <td>0.11840</td>\n", | |
" <td>0.27760</td>\n", | |
" <td>0.30010</td>\n", | |
" <td>0.14710</td>\n", | |
" <td>0.2419</td>\n", | |
" <td>0.07871</td>\n", | |
" <td>1.0950</td>\n", | |
" <td>0.9053</td>\n", | |
" <td>8.589</td>\n", | |
" <td>153.40</td>\n", | |
" <td>0.006399</td>\n", | |
" <td>0.04904</td>\n", | |
" <td>0.05373</td>\n", | |
" <td>0.01587</td>\n", | |
" <td>0.03003</td>\n", | |
" <td>0.006193</td>\n", | |
" <td>25.38</td>\n", | |
" <td>17.33</td>\n", | |
" <td>184.60</td>\n", | |
" <td>2019.0</td>\n", | |
" <td>0.1622</td>\n", | |
" <td>0.6656</td>\n", | |
" <td>0.7119</td>\n", | |
" <td>0.2654</td>\n", | |
" <td>0.4601</td>\n", | |
" <td>0.11890</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>20.57</td>\n", | |
" <td>17.77</td>\n", | |
" <td>132.90</td>\n", | |
" <td>1326.0</td>\n", | |
" <td>0.08474</td>\n", | |
" <td>0.07864</td>\n", | |
" <td>0.08690</td>\n", | |
" <td>0.07017</td>\n", | |
" <td>0.1812</td>\n", | |
" <td>0.05667</td>\n", | |
" <td>0.5435</td>\n", | |
" <td>0.7339</td>\n", | |
" <td>3.398</td>\n", | |
" <td>74.08</td>\n", | |
" <td>0.005225</td>\n", | |
" <td>0.01308</td>\n", | |
" <td>0.01860</td>\n", | |
" <td>0.01340</td>\n", | |
" <td>0.01389</td>\n", | |
" <td>0.003532</td>\n", | |
" <td>24.99</td>\n", | |
" <td>23.41</td>\n", | |
" <td>158.80</td>\n", | |
" <td>1956.0</td>\n", | |
" <td>0.1238</td>\n", | |
" <td>0.1866</td>\n", | |
" <td>0.2416</td>\n", | |
" <td>0.1860</td>\n", | |
" <td>0.2750</td>\n", | |
" <td>0.08902</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>19.69</td>\n", | |
" <td>21.25</td>\n", | |
" <td>130.00</td>\n", | |
" <td>1203.0</td>\n", | |
" <td>0.10960</td>\n", | |
" <td>0.15990</td>\n", | |
" <td>0.19740</td>\n", | |
" <td>0.12790</td>\n", | |
" <td>0.2069</td>\n", | |
" <td>0.05999</td>\n", | |
" <td>0.7456</td>\n", | |
" <td>0.7869</td>\n", | |
" <td>4.585</td>\n", | |
" <td>94.03</td>\n", | |
" <td>0.006150</td>\n", | |
" <td>0.04006</td>\n", | |
" <td>0.03832</td>\n", | |
" <td>0.02058</td>\n", | |
" <td>0.02250</td>\n", | |
" <td>0.004571</td>\n", | |
" <td>23.57</td>\n", | |
" <td>25.53</td>\n", | |
" <td>152.50</td>\n", | |
" <td>1709.0</td>\n", | |
" <td>0.1444</td>\n", | |
" <td>0.4245</td>\n", | |
" <td>0.4504</td>\n", | |
" <td>0.2430</td>\n", | |
" <td>0.3613</td>\n", | |
" <td>0.08758</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>11.42</td>\n", | |
" <td>20.38</td>\n", | |
" <td>77.58</td>\n", | |
" <td>386.1</td>\n", | |
" <td>0.14250</td>\n", | |
" <td>0.28390</td>\n", | |
" <td>0.24140</td>\n", | |
" <td>0.10520</td>\n", | |
" <td>0.2597</td>\n", | |
" <td>0.09744</td>\n", | |
" <td>0.4956</td>\n", | |
" <td>1.1560</td>\n", | |
" <td>3.445</td>\n", | |
" <td>27.23</td>\n", | |
" <td>0.009110</td>\n", | |
" <td>0.07458</td>\n", | |
" <td>0.05661</td>\n", | |
" <td>0.01867</td>\n", | |
" <td>0.05963</td>\n", | |
" <td>0.009208</td>\n", | |
" <td>14.91</td>\n", | |
" <td>26.50</td>\n", | |
" <td>98.87</td>\n", | |
" <td>567.7</td>\n", | |
" <td>0.2098</td>\n", | |
" <td>0.8663</td>\n", | |
" <td>0.6869</td>\n", | |
" <td>0.2575</td>\n", | |
" <td>0.6638</td>\n", | |
" <td>0.17300</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>20.29</td>\n", | |
" <td>14.34</td>\n", | |
" <td>135.10</td>\n", | |
" <td>1297.0</td>\n", | |
" <td>0.10030</td>\n", | |
" <td>0.13280</td>\n", | |
" <td>0.19800</td>\n", | |
" <td>0.10430</td>\n", | |
" <td>0.1809</td>\n", | |
" <td>0.05883</td>\n", | |
" <td>0.7572</td>\n", | |
" <td>0.7813</td>\n", | |
" <td>5.438</td>\n", | |
" <td>94.44</td>\n", | |
" <td>0.011490</td>\n", | |
" <td>0.02461</td>\n", | |
" <td>0.05688</td>\n", | |
" <td>0.01885</td>\n", | |
" <td>0.01756</td>\n", | |
" <td>0.005115</td>\n", | |
" <td>22.54</td>\n", | |
" <td>16.67</td>\n", | |
" <td>152.20</td>\n", | |
" <td>1575.0</td>\n", | |
" <td>0.1374</td>\n", | |
" <td>0.2050</td>\n", | |
" <td>0.4000</td>\n", | |
" <td>0.1625</td>\n", | |
" <td>0.2364</td>\n", | |
" <td>0.07678</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>12.45</td>\n", | |
" <td>15.70</td>\n", | |
" <td>82.57</td>\n", | |
" <td>477.1</td>\n", | |
" <td>0.12780</td>\n", | |
" <td>0.17000</td>\n", | |
" <td>0.15780</td>\n", | |
" <td>0.08089</td>\n", | |
" <td>0.2087</td>\n", | |
" <td>0.07613</td>\n", | |
" <td>0.3345</td>\n", | |
" <td>0.8902</td>\n", | |
" <td>2.217</td>\n", | |
" <td>27.19</td>\n", | |
" <td>0.007510</td>\n", | |
" <td>0.03345</td>\n", | |
" <td>0.03672</td>\n", | |
" <td>0.01137</td>\n", | |
" <td>0.02165</td>\n", | |
" <td>0.005082</td>\n", | |
" <td>15.47</td>\n", | |
" <td>23.75</td>\n", | |
" <td>103.40</td>\n", | |
" <td>741.6</td>\n", | |
" <td>0.1791</td>\n", | |
" <td>0.5249</td>\n", | |
" <td>0.5355</td>\n", | |
" <td>0.1741</td>\n", | |
" <td>0.3985</td>\n", | |
" <td>0.12440</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>18.25</td>\n", | |
" <td>19.98</td>\n", | |
" <td>119.60</td>\n", | |
" <td>1040.0</td>\n", | |
" <td>0.09463</td>\n", | |
" <td>0.10900</td>\n", | |
" <td>0.11270</td>\n", | |
" <td>0.07400</td>\n", | |
" <td>0.1794</td>\n", | |
" <td>0.05742</td>\n", | |
" <td>0.4467</td>\n", | |
" <td>0.7732</td>\n", | |
" <td>3.180</td>\n", | |
" <td>53.91</td>\n", | |
" <td>0.004314</td>\n", | |
" <td>0.01382</td>\n", | |
" <td>0.02254</td>\n", | |
" <td>0.01039</td>\n", | |
" <td>0.01369</td>\n", | |
" <td>0.002179</td>\n", | |
" <td>22.88</td>\n", | |
" <td>27.66</td>\n", | |
" <td>153.20</td>\n", | |
" <td>1606.0</td>\n", | |
" <td>0.1442</td>\n", | |
" <td>0.2576</td>\n", | |
" <td>0.3784</td>\n", | |
" <td>0.1932</td>\n", | |
" <td>0.3063</td>\n", | |
" <td>0.08368</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>13.71</td>\n", | |
" <td>20.83</td>\n", | |
" <td>90.20</td>\n", | |
" <td>577.9</td>\n", | |
" <td>0.11890</td>\n", | |
" <td>0.16450</td>\n", | |
" <td>0.09366</td>\n", | |
" <td>0.05985</td>\n", | |
" <td>0.2196</td>\n", | |
" <td>0.07451</td>\n", | |
" <td>0.5835</td>\n", | |
" <td>1.3770</td>\n", | |
" <td>3.856</td>\n", | |
" <td>50.96</td>\n", | |
" <td>0.008805</td>\n", | |
" <td>0.03029</td>\n", | |
" <td>0.02488</td>\n", | |
" <td>0.01448</td>\n", | |
" <td>0.01486</td>\n", | |
" <td>0.005412</td>\n", | |
" <td>17.06</td>\n", | |
" <td>28.14</td>\n", | |
" <td>110.60</td>\n", | |
" <td>897.0</td>\n", | |
" <td>0.1654</td>\n", | |
" <td>0.3682</td>\n", | |
" <td>0.2678</td>\n", | |
" <td>0.1556</td>\n", | |
" <td>0.3196</td>\n", | |
" <td>0.11510</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>13.00</td>\n", | |
" <td>21.82</td>\n", | |
" <td>87.50</td>\n", | |
" <td>519.8</td>\n", | |
" <td>0.12730</td>\n", | |
" <td>0.19320</td>\n", | |
" <td>0.18590</td>\n", | |
" <td>0.09353</td>\n", | |
" <td>0.2350</td>\n", | |
" <td>0.07389</td>\n", | |
" <td>0.3063</td>\n", | |
" <td>1.0020</td>\n", | |
" <td>2.406</td>\n", | |
" <td>24.32</td>\n", | |
" <td>0.005731</td>\n", | |
" <td>0.03502</td>\n", | |
" <td>0.03553</td>\n", | |
" <td>0.01226</td>\n", | |
" <td>0.02143</td>\n", | |
" <td>0.003749</td>\n", | |
" <td>15.49</td>\n", | |
" <td>30.73</td>\n", | |
" <td>106.20</td>\n", | |
" <td>739.3</td>\n", | |
" <td>0.1703</td>\n", | |
" <td>0.5401</td>\n", | |
" <td>0.5390</td>\n", | |
" <td>0.2060</td>\n", | |
" <td>0.4378</td>\n", | |
" <td>0.10720</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>9</th>\n", | |
" <td>12.46</td>\n", | |
" <td>24.04</td>\n", | |
" <td>83.97</td>\n", | |
" <td>475.9</td>\n", | |
" <td>0.11860</td>\n", | |
" <td>0.23960</td>\n", | |
" <td>0.22730</td>\n", | |
" <td>0.08543</td>\n", | |
" <td>0.2030</td>\n", | |
" <td>0.08243</td>\n", | |
" <td>0.2976</td>\n", | |
" <td>1.5990</td>\n", | |
" <td>2.039</td>\n", | |
" <td>23.94</td>\n", | |
" <td>0.007149</td>\n", | |
" <td>0.07217</td>\n", | |
" <td>0.07743</td>\n", | |
" <td>0.01432</td>\n", | |
" <td>0.01789</td>\n", | |
" <td>0.010080</td>\n", | |
" <td>15.09</td>\n", | |
" <td>40.68</td>\n", | |
" <td>97.65</td>\n", | |
" <td>711.4</td>\n", | |
" <td>0.1853</td>\n", | |
" <td>1.0580</td>\n", | |
" <td>1.1050</td>\n", | |
" <td>0.2210</td>\n", | |
" <td>0.4366</td>\n", | |
" <td>0.20750</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mean radius mean texture ... worst fractal dimension target\n", | |
"0 17.99 10.38 ... 0.11890 0\n", | |
"1 20.57 17.77 ... 0.08902 0\n", | |
"2 19.69 21.25 ... 0.08758 0\n", | |
"3 11.42 20.38 ... 0.17300 0\n", | |
"4 20.29 14.34 ... 0.07678 0\n", | |
"5 12.45 15.70 ... 0.12440 0\n", | |
"6 18.25 19.98 ... 0.08368 0\n", | |
"7 13.71 20.83 ... 0.11510 0\n", | |
"8 13.00 21.82 ... 0.10720 0\n", | |
"9 12.46 24.04 ... 0.20750 0\n", | |
"\n", | |
"[10 rows x 31 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 42 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "UP27uBUBlj6J" | |
}, | |
"source": [ | |
"### 1. Padronizar (z-score) os dados (feature scaling)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "VI3B2NY_lRrq", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 258 | |
}, | |
"outputId": "f7494ab2-ed9c-48d8-d9af-6d0d07fa804d" | |
}, | |
"source": [ | |
"from scipy.stats import zscore\n", | |
"df.apply(zscore)\n", | |
"df.head()" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>mean radius</th>\n", | |
" <th>mean texture</th>\n", | |
" <th>mean perimeter</th>\n", | |
" <th>mean area</th>\n", | |
" <th>mean smoothness</th>\n", | |
" <th>mean compactness</th>\n", | |
" <th>mean concavity</th>\n", | |
" <th>mean concave points</th>\n", | |
" <th>mean symmetry</th>\n", | |
" <th>mean fractal dimension</th>\n", | |
" <th>radius error</th>\n", | |
" <th>texture error</th>\n", | |
" <th>perimeter error</th>\n", | |
" <th>area error</th>\n", | |
" <th>smoothness error</th>\n", | |
" <th>compactness error</th>\n", | |
" <th>concavity error</th>\n", | |
" <th>concave points error</th>\n", | |
" <th>symmetry error</th>\n", | |
" <th>fractal dimension error</th>\n", | |
" <th>worst radius</th>\n", | |
" <th>worst texture</th>\n", | |
" <th>worst perimeter</th>\n", | |
" <th>worst area</th>\n", | |
" <th>worst smoothness</th>\n", | |
" <th>worst compactness</th>\n", | |
" <th>worst concavity</th>\n", | |
" <th>worst concave points</th>\n", | |
" <th>worst symmetry</th>\n", | |
" <th>worst fractal dimension</th>\n", | |
" <th>target</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>1.097064</td>\n", | |
" <td>-2.073335</td>\n", | |
" <td>1.269934</td>\n", | |
" <td>0.984375</td>\n", | |
" <td>1.568466</td>\n", | |
" <td>3.283515</td>\n", | |
" <td>2.652874</td>\n", | |
" <td>2.532475</td>\n", | |
" <td>2.217515</td>\n", | |
" <td>2.255747</td>\n", | |
" <td>2.489734</td>\n", | |
" <td>-0.565265</td>\n", | |
" <td>2.833031</td>\n", | |
" <td>2.487578</td>\n", | |
" <td>-0.214002</td>\n", | |
" <td>1.316862</td>\n", | |
" <td>0.724026</td>\n", | |
" <td>0.660820</td>\n", | |
" <td>1.148757</td>\n", | |
" <td>0.907083</td>\n", | |
" <td>1.886690</td>\n", | |
" <td>-1.359293</td>\n", | |
" <td>2.303601</td>\n", | |
" <td>2.001237</td>\n", | |
" <td>1.307686</td>\n", | |
" <td>2.616665</td>\n", | |
" <td>2.109526</td>\n", | |
" <td>2.296076</td>\n", | |
" <td>2.750622</td>\n", | |
" <td>1.937015</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>1.829821</td>\n", | |
" <td>-0.353632</td>\n", | |
" <td>1.685955</td>\n", | |
" <td>1.908708</td>\n", | |
" <td>-0.826962</td>\n", | |
" <td>-0.487072</td>\n", | |
" <td>-0.023846</td>\n", | |
" <td>0.548144</td>\n", | |
" <td>0.001392</td>\n", | |
" <td>-0.868652</td>\n", | |
" <td>0.499255</td>\n", | |
" <td>-0.876244</td>\n", | |
" <td>0.263327</td>\n", | |
" <td>0.742402</td>\n", | |
" <td>-0.605351</td>\n", | |
" <td>-0.692926</td>\n", | |
" <td>-0.440780</td>\n", | |
" <td>0.260162</td>\n", | |
" <td>-0.805450</td>\n", | |
" <td>-0.099444</td>\n", | |
" <td>1.805927</td>\n", | |
" <td>-0.369203</td>\n", | |
" <td>1.535126</td>\n", | |
" <td>1.890489</td>\n", | |
" <td>-0.375612</td>\n", | |
" <td>-0.430444</td>\n", | |
" <td>-0.146749</td>\n", | |
" <td>1.087084</td>\n", | |
" <td>-0.243890</td>\n", | |
" <td>0.281190</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>1.579888</td>\n", | |
" <td>0.456187</td>\n", | |
" <td>1.566503</td>\n", | |
" <td>1.558884</td>\n", | |
" <td>0.942210</td>\n", | |
" <td>1.052926</td>\n", | |
" <td>1.363478</td>\n", | |
" <td>2.037231</td>\n", | |
" <td>0.939685</td>\n", | |
" <td>-0.398008</td>\n", | |
" <td>1.228676</td>\n", | |
" <td>-0.780083</td>\n", | |
" <td>0.850928</td>\n", | |
" <td>1.181336</td>\n", | |
" <td>-0.297005</td>\n", | |
" <td>0.814974</td>\n", | |
" <td>0.213076</td>\n", | |
" <td>1.424827</td>\n", | |
" <td>0.237036</td>\n", | |
" <td>0.293559</td>\n", | |
" <td>1.511870</td>\n", | |
" <td>-0.023974</td>\n", | |
" <td>1.347475</td>\n", | |
" <td>1.456285</td>\n", | |
" <td>0.527407</td>\n", | |
" <td>1.082932</td>\n", | |
" <td>0.854974</td>\n", | |
" <td>1.955000</td>\n", | |
" <td>1.152255</td>\n", | |
" <td>0.201391</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>-0.768909</td>\n", | |
" <td>0.253732</td>\n", | |
" <td>-0.592687</td>\n", | |
" <td>-0.764464</td>\n", | |
" <td>3.283553</td>\n", | |
" <td>3.402909</td>\n", | |
" <td>1.915897</td>\n", | |
" <td>1.451707</td>\n", | |
" <td>2.867383</td>\n", | |
" <td>4.910919</td>\n", | |
" <td>0.326373</td>\n", | |
" <td>-0.110409</td>\n", | |
" <td>0.286593</td>\n", | |
" <td>-0.288378</td>\n", | |
" <td>0.689702</td>\n", | |
" <td>2.744280</td>\n", | |
" <td>0.819518</td>\n", | |
" <td>1.115007</td>\n", | |
" <td>4.732680</td>\n", | |
" <td>2.047511</td>\n", | |
" <td>-0.281464</td>\n", | |
" <td>0.133984</td>\n", | |
" <td>-0.249939</td>\n", | |
" <td>-0.550021</td>\n", | |
" <td>3.394275</td>\n", | |
" <td>3.893397</td>\n", | |
" <td>1.989588</td>\n", | |
" <td>2.175786</td>\n", | |
" <td>6.046041</td>\n", | |
" <td>4.935010</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>1.750297</td>\n", | |
" <td>-1.151816</td>\n", | |
" <td>1.776573</td>\n", | |
" <td>1.826229</td>\n", | |
" <td>0.280372</td>\n", | |
" <td>0.539340</td>\n", | |
" <td>1.371011</td>\n", | |
" <td>1.428493</td>\n", | |
" <td>-0.009560</td>\n", | |
" <td>-0.562450</td>\n", | |
" <td>1.270543</td>\n", | |
" <td>-0.790244</td>\n", | |
" <td>1.273189</td>\n", | |
" <td>1.190357</td>\n", | |
" <td>1.483067</td>\n", | |
" <td>-0.048520</td>\n", | |
" <td>0.828471</td>\n", | |
" <td>1.144205</td>\n", | |
" <td>-0.361092</td>\n", | |
" <td>0.499328</td>\n", | |
" <td>1.298575</td>\n", | |
" <td>-1.466770</td>\n", | |
" <td>1.338539</td>\n", | |
" <td>1.220724</td>\n", | |
" <td>0.220556</td>\n", | |
" <td>-0.313395</td>\n", | |
" <td>0.613179</td>\n", | |
" <td>0.729259</td>\n", | |
" <td>-0.868353</td>\n", | |
" <td>-0.397100</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" mean radius mean texture ... worst fractal dimension target\n", | |
"0 1.097064 -2.073335 ... 1.937015 0\n", | |
"1 1.829821 -0.353632 ... 0.281190 0\n", | |
"2 1.579888 0.456187 ... 0.201391 0\n", | |
"3 -0.768909 0.253732 ... 4.935010 0\n", | |
"4 1.750297 -1.151816 ... -0.397100 0\n", | |
"\n", | |
"[5 rows x 31 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 50 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "QeQtpbgwmJMP" | |
}, | |
"source": [ | |
"### 2. Construir o modelo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "dmMOZovnlpsx", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "b08760a2-37d8-409b-9f72-11f1a022a6d3" | |
}, | |
"source": [ | |
"from sklearn.tree import DecisionTreeClassifier\n", | |
"model = DecisionTreeClassifier()\n", | |
"model.fit(X_train, y_train)" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n", | |
" max_depth=None, max_features=None, max_leaf_nodes=None,\n", | |
" min_impurity_decrease=0.0, min_impurity_split=None,\n", | |
" min_samples_leaf=1, min_samples_split=2,\n", | |
" min_weight_fraction_leaf=0.0, presort='deprecated',\n", | |
" random_state=None, splitter='best')" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 53 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "pm4vzMgqmRQc" | |
}, | |
"source": [ | |
"###3. Realizar a predição e a avaliação do modelo" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "mNA_NWmUTjVU" | |
}, | |
"source": [ | |
"from sklearn.linear_model import LogisticRegression\n", | |
"\n", | |
"model= LogisticRegression()\n", | |
"model.fit(X_train,y_train)\n", | |
"\n", | |
"y_pred=model.predict(X_test)" | |
], | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Ik02F1cRTsZO", | |
"outputId": "9b56bab3-d7b7-488a-817e-4ac22baeadea" | |
}, | |
"source": [ | |
"from sklearn.metrics import classification_report, confusion_matrix\n", | |
"print(confusion_matrix(y_test, y_pred))\n", | |
"print(classification_report(y_test, y_pred))" | |
], | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[[25 2]\n", | |
" [ 2 57]]\n", | |
" precision recall f1-score support\n", | |
"\n", | |
" 0 0.93 0.93 0.93 27\n", | |
" 1 0.97 0.97 0.97 59\n", | |
"\n", | |
" accuracy 0.95 86\n", | |
" macro avg 0.95 0.95 0.95 86\n", | |
"weighted avg 0.95 0.95 0.95 86\n", | |
"\n" | |
], | |
"name": "stdout" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment