Skip to content

Instantly share code, notes, and snippets.

@pikonha
Last active March 31, 2021 00:54
Show Gist options
  • Save pikonha/afbf2270f1beb6f80b2b9add4b1a7267 to your computer and use it in GitHub Desktop.
Save pikonha/afbf2270f1beb6f80b2b9add4b1a7267 to your computer and use it in GitHub Desktop.
cancer-analysis.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Detecção de Câncar de Mama.ipynb",
"provenance": [],
"collapsed_sections": [],
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/picolloo/afbf2270f1beb6f80b2b9add4b1a7267/detec-o-de-c-ncar-de-mama.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "7yEsLd3bk0h7"
},
"source": [
"Disciplina de Data Mining - INE5644\n",
"## Exercício 1 - Detecção de Câncer de Mama (Classificação)\n",
"\n"
]
},
{
"cell_type": "code",
"metadata": {
"id": "iHra4Wdrkz4U"
},
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "markdown",
"metadata": {
"id": "xq9DCd3rlcnL"
},
"source": [
"###0. Carregamento do conjunto de dados"
]
},
{
"cell_type": "code",
"metadata": {
"id": "5xA7ndkBlIEe",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b3aa6001-9e99-4289-a39b-bc1dcba09329"
},
"source": [
"from sklearn.datasets import load_breast_cancer\n",
"cancer = load_breast_cancer()\n",
"\n",
"#formato dos datasets no sklearn\n",
"cancer.keys()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names', 'filename'])"
]
},
"metadata": {
"tags": []
},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "S_CP2rPyCvUM",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 258
},
"outputId": "a110845c-e7f5-4614-eb6a-1366853c797a"
},
"source": [
"df = pd.DataFrame(cancer['data'])\n",
"df.columns = cancer['feature_names']\n",
"df['target'] = cancer['target']\n",
"df.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>radius error</th>\n",
" <th>texture error</th>\n",
" <th>perimeter error</th>\n",
" <th>area error</th>\n",
" <th>smoothness error</th>\n",
" <th>compactness error</th>\n",
" <th>concavity error</th>\n",
" <th>concave points error</th>\n",
" <th>symmetry error</th>\n",
" <th>fractal dimension error</th>\n",
" <th>worst radius</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.097064</td>\n",
" <td>-2.073335</td>\n",
" <td>1.269934</td>\n",
" <td>0.984375</td>\n",
" <td>1.568466</td>\n",
" <td>3.283515</td>\n",
" <td>2.652874</td>\n",
" <td>2.532475</td>\n",
" <td>2.217515</td>\n",
" <td>2.255747</td>\n",
" <td>2.489734</td>\n",
" <td>-0.565265</td>\n",
" <td>2.833031</td>\n",
" <td>2.487578</td>\n",
" <td>-0.214002</td>\n",
" <td>1.316862</td>\n",
" <td>0.724026</td>\n",
" <td>0.660820</td>\n",
" <td>1.148757</td>\n",
" <td>0.907083</td>\n",
" <td>1.886690</td>\n",
" <td>-1.359293</td>\n",
" <td>2.303601</td>\n",
" <td>2.001237</td>\n",
" <td>1.307686</td>\n",
" <td>2.616665</td>\n",
" <td>2.109526</td>\n",
" <td>2.296076</td>\n",
" <td>2.750622</td>\n",
" <td>1.937015</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.829821</td>\n",
" <td>-0.353632</td>\n",
" <td>1.685955</td>\n",
" <td>1.908708</td>\n",
" <td>-0.826962</td>\n",
" <td>-0.487072</td>\n",
" <td>-0.023846</td>\n",
" <td>0.548144</td>\n",
" <td>0.001392</td>\n",
" <td>-0.868652</td>\n",
" <td>0.499255</td>\n",
" <td>-0.876244</td>\n",
" <td>0.263327</td>\n",
" <td>0.742402</td>\n",
" <td>-0.605351</td>\n",
" <td>-0.692926</td>\n",
" <td>-0.440780</td>\n",
" <td>0.260162</td>\n",
" <td>-0.805450</td>\n",
" <td>-0.099444</td>\n",
" <td>1.805927</td>\n",
" <td>-0.369203</td>\n",
" <td>1.535126</td>\n",
" <td>1.890489</td>\n",
" <td>-0.375612</td>\n",
" <td>-0.430444</td>\n",
" <td>-0.146749</td>\n",
" <td>1.087084</td>\n",
" <td>-0.243890</td>\n",
" <td>0.281190</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.579888</td>\n",
" <td>0.456187</td>\n",
" <td>1.566503</td>\n",
" <td>1.558884</td>\n",
" <td>0.942210</td>\n",
" <td>1.052926</td>\n",
" <td>1.363478</td>\n",
" <td>2.037231</td>\n",
" <td>0.939685</td>\n",
" <td>-0.398008</td>\n",
" <td>1.228676</td>\n",
" <td>-0.780083</td>\n",
" <td>0.850928</td>\n",
" <td>1.181336</td>\n",
" <td>-0.297005</td>\n",
" <td>0.814974</td>\n",
" <td>0.213076</td>\n",
" <td>1.424827</td>\n",
" <td>0.237036</td>\n",
" <td>0.293559</td>\n",
" <td>1.511870</td>\n",
" <td>-0.023974</td>\n",
" <td>1.347475</td>\n",
" <td>1.456285</td>\n",
" <td>0.527407</td>\n",
" <td>1.082932</td>\n",
" <td>0.854974</td>\n",
" <td>1.955000</td>\n",
" <td>1.152255</td>\n",
" <td>0.201391</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.768909</td>\n",
" <td>0.253732</td>\n",
" <td>-0.592687</td>\n",
" <td>-0.764464</td>\n",
" <td>3.283553</td>\n",
" <td>3.402909</td>\n",
" <td>1.915897</td>\n",
" <td>1.451707</td>\n",
" <td>2.867383</td>\n",
" <td>4.910919</td>\n",
" <td>0.326373</td>\n",
" <td>-0.110409</td>\n",
" <td>0.286593</td>\n",
" <td>-0.288378</td>\n",
" <td>0.689702</td>\n",
" <td>2.744280</td>\n",
" <td>0.819518</td>\n",
" <td>1.115007</td>\n",
" <td>4.732680</td>\n",
" <td>2.047511</td>\n",
" <td>-0.281464</td>\n",
" <td>0.133984</td>\n",
" <td>-0.249939</td>\n",
" <td>-0.550021</td>\n",
" <td>3.394275</td>\n",
" <td>3.893397</td>\n",
" <td>1.989588</td>\n",
" <td>2.175786</td>\n",
" <td>6.046041</td>\n",
" <td>4.935010</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.750297</td>\n",
" <td>-1.151816</td>\n",
" <td>1.776573</td>\n",
" <td>1.826229</td>\n",
" <td>0.280372</td>\n",
" <td>0.539340</td>\n",
" <td>1.371011</td>\n",
" <td>1.428493</td>\n",
" <td>-0.009560</td>\n",
" <td>-0.562450</td>\n",
" <td>1.270543</td>\n",
" <td>-0.790244</td>\n",
" <td>1.273189</td>\n",
" <td>1.190357</td>\n",
" <td>1.483067</td>\n",
" <td>-0.048520</td>\n",
" <td>0.828471</td>\n",
" <td>1.144205</td>\n",
" <td>-0.361092</td>\n",
" <td>0.499328</td>\n",
" <td>1.298575</td>\n",
" <td>-1.466770</td>\n",
" <td>1.338539</td>\n",
" <td>1.220724</td>\n",
" <td>0.220556</td>\n",
" <td>-0.313395</td>\n",
" <td>0.613179</td>\n",
" <td>0.729259</td>\n",
" <td>-0.868353</td>\n",
" <td>-0.397100</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture ... worst fractal dimension target\n",
"0 1.097064 -2.073335 ... 1.937015 0\n",
"1 1.829821 -0.353632 ... 0.281190 0\n",
"2 1.579888 0.456187 ... 0.201391 0\n",
"3 -0.768909 0.253732 ... 4.935010 0\n",
"4 1.750297 -1.151816 ... -0.397100 0\n",
"\n",
"[5 rows x 31 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 47
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "g1GBP7FACovY"
},
"source": [
"#carrega X e y\n",
"X = df[cancer['feature_names']]\n",
"y = df['target']\n",
"\n",
"#divide treino e teste\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y,test_size=0.15)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 413
},
"id": "hcCyQrN0Ofbs",
"outputId": "47456b0d-73fb-4b5b-d616-281583edfa64"
},
"source": [
""
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>radius error</th>\n",
" <th>texture error</th>\n",
" <th>perimeter error</th>\n",
" <th>area error</th>\n",
" <th>smoothness error</th>\n",
" <th>compactness error</th>\n",
" <th>concavity error</th>\n",
" <th>concave points error</th>\n",
" <th>symmetry error</th>\n",
" <th>fractal dimension error</th>\n",
" <th>worst radius</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.30010</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>1.0950</td>\n",
" <td>0.9053</td>\n",
" <td>8.589</td>\n",
" <td>153.40</td>\n",
" <td>0.006399</td>\n",
" <td>0.04904</td>\n",
" <td>0.05373</td>\n",
" <td>0.01587</td>\n",
" <td>0.03003</td>\n",
" <td>0.006193</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.08690</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>0.5435</td>\n",
" <td>0.7339</td>\n",
" <td>3.398</td>\n",
" <td>74.08</td>\n",
" <td>0.005225</td>\n",
" <td>0.01308</td>\n",
" <td>0.01860</td>\n",
" <td>0.01340</td>\n",
" <td>0.01389</td>\n",
" <td>0.003532</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.19740</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>0.7456</td>\n",
" <td>0.7869</td>\n",
" <td>4.585</td>\n",
" <td>94.03</td>\n",
" <td>0.006150</td>\n",
" <td>0.04006</td>\n",
" <td>0.03832</td>\n",
" <td>0.02058</td>\n",
" <td>0.02250</td>\n",
" <td>0.004571</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.24140</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>0.4956</td>\n",
" <td>1.1560</td>\n",
" <td>3.445</td>\n",
" <td>27.23</td>\n",
" <td>0.009110</td>\n",
" <td>0.07458</td>\n",
" <td>0.05661</td>\n",
" <td>0.01867</td>\n",
" <td>0.05963</td>\n",
" <td>0.009208</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.19800</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>0.7572</td>\n",
" <td>0.7813</td>\n",
" <td>5.438</td>\n",
" <td>94.44</td>\n",
" <td>0.011490</td>\n",
" <td>0.02461</td>\n",
" <td>0.05688</td>\n",
" <td>0.01885</td>\n",
" <td>0.01756</td>\n",
" <td>0.005115</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>12.45</td>\n",
" <td>15.70</td>\n",
" <td>82.57</td>\n",
" <td>477.1</td>\n",
" <td>0.12780</td>\n",
" <td>0.17000</td>\n",
" <td>0.15780</td>\n",
" <td>0.08089</td>\n",
" <td>0.2087</td>\n",
" <td>0.07613</td>\n",
" <td>0.3345</td>\n",
" <td>0.8902</td>\n",
" <td>2.217</td>\n",
" <td>27.19</td>\n",
" <td>0.007510</td>\n",
" <td>0.03345</td>\n",
" <td>0.03672</td>\n",
" <td>0.01137</td>\n",
" <td>0.02165</td>\n",
" <td>0.005082</td>\n",
" <td>15.47</td>\n",
" <td>23.75</td>\n",
" <td>103.40</td>\n",
" <td>741.6</td>\n",
" <td>0.1791</td>\n",
" <td>0.5249</td>\n",
" <td>0.5355</td>\n",
" <td>0.1741</td>\n",
" <td>0.3985</td>\n",
" <td>0.12440</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>18.25</td>\n",
" <td>19.98</td>\n",
" <td>119.60</td>\n",
" <td>1040.0</td>\n",
" <td>0.09463</td>\n",
" <td>0.10900</td>\n",
" <td>0.11270</td>\n",
" <td>0.07400</td>\n",
" <td>0.1794</td>\n",
" <td>0.05742</td>\n",
" <td>0.4467</td>\n",
" <td>0.7732</td>\n",
" <td>3.180</td>\n",
" <td>53.91</td>\n",
" <td>0.004314</td>\n",
" <td>0.01382</td>\n",
" <td>0.02254</td>\n",
" <td>0.01039</td>\n",
" <td>0.01369</td>\n",
" <td>0.002179</td>\n",
" <td>22.88</td>\n",
" <td>27.66</td>\n",
" <td>153.20</td>\n",
" <td>1606.0</td>\n",
" <td>0.1442</td>\n",
" <td>0.2576</td>\n",
" <td>0.3784</td>\n",
" <td>0.1932</td>\n",
" <td>0.3063</td>\n",
" <td>0.08368</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>13.71</td>\n",
" <td>20.83</td>\n",
" <td>90.20</td>\n",
" <td>577.9</td>\n",
" <td>0.11890</td>\n",
" <td>0.16450</td>\n",
" <td>0.09366</td>\n",
" <td>0.05985</td>\n",
" <td>0.2196</td>\n",
" <td>0.07451</td>\n",
" <td>0.5835</td>\n",
" <td>1.3770</td>\n",
" <td>3.856</td>\n",
" <td>50.96</td>\n",
" <td>0.008805</td>\n",
" <td>0.03029</td>\n",
" <td>0.02488</td>\n",
" <td>0.01448</td>\n",
" <td>0.01486</td>\n",
" <td>0.005412</td>\n",
" <td>17.06</td>\n",
" <td>28.14</td>\n",
" <td>110.60</td>\n",
" <td>897.0</td>\n",
" <td>0.1654</td>\n",
" <td>0.3682</td>\n",
" <td>0.2678</td>\n",
" <td>0.1556</td>\n",
" <td>0.3196</td>\n",
" <td>0.11510</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>13.00</td>\n",
" <td>21.82</td>\n",
" <td>87.50</td>\n",
" <td>519.8</td>\n",
" <td>0.12730</td>\n",
" <td>0.19320</td>\n",
" <td>0.18590</td>\n",
" <td>0.09353</td>\n",
" <td>0.2350</td>\n",
" <td>0.07389</td>\n",
" <td>0.3063</td>\n",
" <td>1.0020</td>\n",
" <td>2.406</td>\n",
" <td>24.32</td>\n",
" <td>0.005731</td>\n",
" <td>0.03502</td>\n",
" <td>0.03553</td>\n",
" <td>0.01226</td>\n",
" <td>0.02143</td>\n",
" <td>0.003749</td>\n",
" <td>15.49</td>\n",
" <td>30.73</td>\n",
" <td>106.20</td>\n",
" <td>739.3</td>\n",
" <td>0.1703</td>\n",
" <td>0.5401</td>\n",
" <td>0.5390</td>\n",
" <td>0.2060</td>\n",
" <td>0.4378</td>\n",
" <td>0.10720</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>12.46</td>\n",
" <td>24.04</td>\n",
" <td>83.97</td>\n",
" <td>475.9</td>\n",
" <td>0.11860</td>\n",
" <td>0.23960</td>\n",
" <td>0.22730</td>\n",
" <td>0.08543</td>\n",
" <td>0.2030</td>\n",
" <td>0.08243</td>\n",
" <td>0.2976</td>\n",
" <td>1.5990</td>\n",
" <td>2.039</td>\n",
" <td>23.94</td>\n",
" <td>0.007149</td>\n",
" <td>0.07217</td>\n",
" <td>0.07743</td>\n",
" <td>0.01432</td>\n",
" <td>0.01789</td>\n",
" <td>0.010080</td>\n",
" <td>15.09</td>\n",
" <td>40.68</td>\n",
" <td>97.65</td>\n",
" <td>711.4</td>\n",
" <td>0.1853</td>\n",
" <td>1.0580</td>\n",
" <td>1.1050</td>\n",
" <td>0.2210</td>\n",
" <td>0.4366</td>\n",
" <td>0.20750</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture ... worst fractal dimension target\n",
"0 17.99 10.38 ... 0.11890 0\n",
"1 20.57 17.77 ... 0.08902 0\n",
"2 19.69 21.25 ... 0.08758 0\n",
"3 11.42 20.38 ... 0.17300 0\n",
"4 20.29 14.34 ... 0.07678 0\n",
"5 12.45 15.70 ... 0.12440 0\n",
"6 18.25 19.98 ... 0.08368 0\n",
"7 13.71 20.83 ... 0.11510 0\n",
"8 13.00 21.82 ... 0.10720 0\n",
"9 12.46 24.04 ... 0.20750 0\n",
"\n",
"[10 rows x 31 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 42
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "UP27uBUBlj6J"
},
"source": [
"### 1. Padronizar (z-score) os dados (feature scaling)"
]
},
{
"cell_type": "code",
"metadata": {
"id": "VI3B2NY_lRrq",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 258
},
"outputId": "f7494ab2-ed9c-48d8-d9af-6d0d07fa804d"
},
"source": [
"from scipy.stats import zscore\n",
"df.apply(zscore)\n",
"df.head()"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>radius error</th>\n",
" <th>texture error</th>\n",
" <th>perimeter error</th>\n",
" <th>area error</th>\n",
" <th>smoothness error</th>\n",
" <th>compactness error</th>\n",
" <th>concavity error</th>\n",
" <th>concave points error</th>\n",
" <th>symmetry error</th>\n",
" <th>fractal dimension error</th>\n",
" <th>worst radius</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>target</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.097064</td>\n",
" <td>-2.073335</td>\n",
" <td>1.269934</td>\n",
" <td>0.984375</td>\n",
" <td>1.568466</td>\n",
" <td>3.283515</td>\n",
" <td>2.652874</td>\n",
" <td>2.532475</td>\n",
" <td>2.217515</td>\n",
" <td>2.255747</td>\n",
" <td>2.489734</td>\n",
" <td>-0.565265</td>\n",
" <td>2.833031</td>\n",
" <td>2.487578</td>\n",
" <td>-0.214002</td>\n",
" <td>1.316862</td>\n",
" <td>0.724026</td>\n",
" <td>0.660820</td>\n",
" <td>1.148757</td>\n",
" <td>0.907083</td>\n",
" <td>1.886690</td>\n",
" <td>-1.359293</td>\n",
" <td>2.303601</td>\n",
" <td>2.001237</td>\n",
" <td>1.307686</td>\n",
" <td>2.616665</td>\n",
" <td>2.109526</td>\n",
" <td>2.296076</td>\n",
" <td>2.750622</td>\n",
" <td>1.937015</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.829821</td>\n",
" <td>-0.353632</td>\n",
" <td>1.685955</td>\n",
" <td>1.908708</td>\n",
" <td>-0.826962</td>\n",
" <td>-0.487072</td>\n",
" <td>-0.023846</td>\n",
" <td>0.548144</td>\n",
" <td>0.001392</td>\n",
" <td>-0.868652</td>\n",
" <td>0.499255</td>\n",
" <td>-0.876244</td>\n",
" <td>0.263327</td>\n",
" <td>0.742402</td>\n",
" <td>-0.605351</td>\n",
" <td>-0.692926</td>\n",
" <td>-0.440780</td>\n",
" <td>0.260162</td>\n",
" <td>-0.805450</td>\n",
" <td>-0.099444</td>\n",
" <td>1.805927</td>\n",
" <td>-0.369203</td>\n",
" <td>1.535126</td>\n",
" <td>1.890489</td>\n",
" <td>-0.375612</td>\n",
" <td>-0.430444</td>\n",
" <td>-0.146749</td>\n",
" <td>1.087084</td>\n",
" <td>-0.243890</td>\n",
" <td>0.281190</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1.579888</td>\n",
" <td>0.456187</td>\n",
" <td>1.566503</td>\n",
" <td>1.558884</td>\n",
" <td>0.942210</td>\n",
" <td>1.052926</td>\n",
" <td>1.363478</td>\n",
" <td>2.037231</td>\n",
" <td>0.939685</td>\n",
" <td>-0.398008</td>\n",
" <td>1.228676</td>\n",
" <td>-0.780083</td>\n",
" <td>0.850928</td>\n",
" <td>1.181336</td>\n",
" <td>-0.297005</td>\n",
" <td>0.814974</td>\n",
" <td>0.213076</td>\n",
" <td>1.424827</td>\n",
" <td>0.237036</td>\n",
" <td>0.293559</td>\n",
" <td>1.511870</td>\n",
" <td>-0.023974</td>\n",
" <td>1.347475</td>\n",
" <td>1.456285</td>\n",
" <td>0.527407</td>\n",
" <td>1.082932</td>\n",
" <td>0.854974</td>\n",
" <td>1.955000</td>\n",
" <td>1.152255</td>\n",
" <td>0.201391</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>-0.768909</td>\n",
" <td>0.253732</td>\n",
" <td>-0.592687</td>\n",
" <td>-0.764464</td>\n",
" <td>3.283553</td>\n",
" <td>3.402909</td>\n",
" <td>1.915897</td>\n",
" <td>1.451707</td>\n",
" <td>2.867383</td>\n",
" <td>4.910919</td>\n",
" <td>0.326373</td>\n",
" <td>-0.110409</td>\n",
" <td>0.286593</td>\n",
" <td>-0.288378</td>\n",
" <td>0.689702</td>\n",
" <td>2.744280</td>\n",
" <td>0.819518</td>\n",
" <td>1.115007</td>\n",
" <td>4.732680</td>\n",
" <td>2.047511</td>\n",
" <td>-0.281464</td>\n",
" <td>0.133984</td>\n",
" <td>-0.249939</td>\n",
" <td>-0.550021</td>\n",
" <td>3.394275</td>\n",
" <td>3.893397</td>\n",
" <td>1.989588</td>\n",
" <td>2.175786</td>\n",
" <td>6.046041</td>\n",
" <td>4.935010</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.750297</td>\n",
" <td>-1.151816</td>\n",
" <td>1.776573</td>\n",
" <td>1.826229</td>\n",
" <td>0.280372</td>\n",
" <td>0.539340</td>\n",
" <td>1.371011</td>\n",
" <td>1.428493</td>\n",
" <td>-0.009560</td>\n",
" <td>-0.562450</td>\n",
" <td>1.270543</td>\n",
" <td>-0.790244</td>\n",
" <td>1.273189</td>\n",
" <td>1.190357</td>\n",
" <td>1.483067</td>\n",
" <td>-0.048520</td>\n",
" <td>0.828471</td>\n",
" <td>1.144205</td>\n",
" <td>-0.361092</td>\n",
" <td>0.499328</td>\n",
" <td>1.298575</td>\n",
" <td>-1.466770</td>\n",
" <td>1.338539</td>\n",
" <td>1.220724</td>\n",
" <td>0.220556</td>\n",
" <td>-0.313395</td>\n",
" <td>0.613179</td>\n",
" <td>0.729259</td>\n",
" <td>-0.868353</td>\n",
" <td>-0.397100</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" mean radius mean texture ... worst fractal dimension target\n",
"0 1.097064 -2.073335 ... 1.937015 0\n",
"1 1.829821 -0.353632 ... 0.281190 0\n",
"2 1.579888 0.456187 ... 0.201391 0\n",
"3 -0.768909 0.253732 ... 4.935010 0\n",
"4 1.750297 -1.151816 ... -0.397100 0\n",
"\n",
"[5 rows x 31 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 50
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "QeQtpbgwmJMP"
},
"source": [
"### 2. Construir o modelo"
]
},
{
"cell_type": "code",
"metadata": {
"id": "dmMOZovnlpsx",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "b08760a2-37d8-409b-9f72-11f1a022a6d3"
},
"source": [
"from sklearn.tree import DecisionTreeClassifier\n",
"model = DecisionTreeClassifier()\n",
"model.fit(X_train, y_train)"
],
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"DecisionTreeClassifier(ccp_alpha=0.0, class_weight=None, criterion='gini',\n",
" max_depth=None, max_features=None, max_leaf_nodes=None,\n",
" min_impurity_decrease=0.0, min_impurity_split=None,\n",
" min_samples_leaf=1, min_samples_split=2,\n",
" min_weight_fraction_leaf=0.0, presort='deprecated',\n",
" random_state=None, splitter='best')"
]
},
"metadata": {
"tags": []
},
"execution_count": 53
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "pm4vzMgqmRQc"
},
"source": [
"###3. Realizar a predição e a avaliação do modelo"
]
},
{
"cell_type": "code",
"metadata": {
"id": "mNA_NWmUTjVU"
},
"source": [
"from sklearn.linear_model import LogisticRegression\n",
"\n",
"model= LogisticRegression()\n",
"model.fit(X_train,y_train)\n",
"\n",
"y_pred=model.predict(X_test)"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Ik02F1cRTsZO",
"outputId": "9b56bab3-d7b7-488a-817e-4ac22baeadea"
},
"source": [
"from sklearn.metrics import classification_report, confusion_matrix\n",
"print(confusion_matrix(y_test, y_pred))\n",
"print(classification_report(y_test, y_pred))"
],
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"text": [
"[[25 2]\n",
" [ 2 57]]\n",
" precision recall f1-score support\n",
"\n",
" 0 0.93 0.93 0.93 27\n",
" 1 0.97 0.97 0.97 59\n",
"\n",
" accuracy 0.95 86\n",
" macro avg 0.95 0.95 0.95 86\n",
"weighted avg 0.95 0.95 0.95 86\n",
"\n"
],
"name": "stdout"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment