Created
September 27, 2023 13:40
-
-
Save RochaGabriell/6bfe6428a3290f9a11d2848673020790 to your computer and use it in GitHub Desktop.
RandomForestClassificationwithScikitLearn.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"provenance": [], | |
"authorship_tag": "ABX9TyPxXcBgtRZaHY95veoJoSel", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
}, | |
"language_info": { | |
"name": "python" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/RochaGabriell/6bfe6428a3290f9a11d2848673020790/randomforestclassificationwithscikitlearn.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"id": "DSXkj7qn136e" | |
}, | |
"outputs": [], | |
"source": [ | |
"# Data Processing\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"# Modelling\n", | |
"from sklearn.model_selection import train_test_split #Dividir o data set em conjuntos\n", | |
"from sklearn import metrics\n", | |
"from sklearn.metrics import accuracy_score, confusion_matrix #Para avaliar o modelo\n", | |
"from sklearn.ensemble import RandomForestClassifier\n", | |
"from sklearn.feature_extraction.text import TfidfVectorizer #Para vetorizar o texto representar numericamente o texto\n", | |
"\n", | |
"# Tree Visualisation\n", | |
"from sklearn.tree import export_graphviz\n", | |
"from IPython.display import Image\n", | |
"import graphviz" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"spam = pd.read_csv('spam.csv')" | |
], | |
"metadata": { | |
"id": "KXfFDz2h677L" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"spam.head()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "-4i2_u497IPb", | |
"outputId": "94c6354d-e6cd-48b7-80c0-1b41e91245e8" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
" Category Message\n", | |
"0 ham Go until jurong point, crazy.. Available only ...\n", | |
"1 ham Ok lar... Joking wif u oni...\n", | |
"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n", | |
"3 ham U dun say so early hor... U c already then say...\n", | |
"4 ham Nah I don't think he goes to usf, he lives aro..." | |
], | |
"text/html": [ | |
"\n", | |
" <div id=\"df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7\" class=\"colab-df-container\">\n", | |
" <div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>Category</th>\n", | |
" <th>Message</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>ham</td>\n", | |
" <td>Go until jurong point, crazy.. Available only ...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>ham</td>\n", | |
" <td>Ok lar... Joking wif u oni...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>spam</td>\n", | |
" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>ham</td>\n", | |
" <td>U dun say so early hor... U c already then say...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>ham</td>\n", | |
" <td>Nah I don't think he goes to usf, he lives aro...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>\n", | |
" <div class=\"colab-df-buttons\">\n", | |
"\n", | |
" <div class=\"colab-df-container\">\n", | |
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7')\"\n", | |
" title=\"Convert this dataframe to an interactive table.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n", | |
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n", | |
" </svg>\n", | |
" </button>\n", | |
"\n", | |
" <style>\n", | |
" .colab-df-container {\n", | |
" display:flex;\n", | |
" gap: 12px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert {\n", | |
" background-color: #E8F0FE;\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: #1967D2;\n", | |
" height: 32px;\n", | |
" padding: 0 0 0 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-convert:hover {\n", | |
" background-color: #E2EBFA;\n", | |
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: #174EA6;\n", | |
" }\n", | |
"\n", | |
" .colab-df-buttons div {\n", | |
" margin-bottom: 4px;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert {\n", | |
" background-color: #3B4455;\n", | |
" fill: #D2E3FC;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-convert:hover {\n", | |
" background-color: #434B5C;\n", | |
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n", | |
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n", | |
" fill: #FFFFFF;\n", | |
" }\n", | |
" </style>\n", | |
"\n", | |
" <script>\n", | |
" const buttonEl =\n", | |
" document.querySelector('#df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7 button.colab-df-convert');\n", | |
" buttonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
"\n", | |
" async function convertToInteractive(key) {\n", | |
" const element = document.querySelector('#df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7');\n", | |
" const dataTable =\n", | |
" await google.colab.kernel.invokeFunction('convertToInteractive',\n", | |
" [key], {});\n", | |
" if (!dataTable) return;\n", | |
"\n", | |
" const docLinkHtml = 'Like what you see? Visit the ' +\n", | |
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n", | |
" + ' to learn more about interactive tables.';\n", | |
" element.innerHTML = '';\n", | |
" dataTable['output_type'] = 'display_data';\n", | |
" await google.colab.output.renderOutput(dataTable, element);\n", | |
" const docLink = document.createElement('div');\n", | |
" docLink.innerHTML = docLinkHtml;\n", | |
" element.appendChild(docLink);\n", | |
" }\n", | |
" </script>\n", | |
" </div>\n", | |
"\n", | |
"\n", | |
"<div id=\"df-3a0a8d9d-b9eb-42cc-9c7e-7b02120478e6\">\n", | |
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3a0a8d9d-b9eb-42cc-9c7e-7b02120478e6')\"\n", | |
" title=\"Suggest charts.\"\n", | |
" style=\"display:none;\">\n", | |
"\n", | |
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n", | |
" width=\"24px\">\n", | |
" <g>\n", | |
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n", | |
" </g>\n", | |
"</svg>\n", | |
" </button>\n", | |
"\n", | |
"<style>\n", | |
" .colab-df-quickchart {\n", | |
" --bg-color: #E8F0FE;\n", | |
" --fill-color: #1967D2;\n", | |
" --hover-bg-color: #E2EBFA;\n", | |
" --hover-fill-color: #174EA6;\n", | |
" --disabled-fill-color: #AAA;\n", | |
" --disabled-bg-color: #DDD;\n", | |
" }\n", | |
"\n", | |
" [theme=dark] .colab-df-quickchart {\n", | |
" --bg-color: #3B4455;\n", | |
" --fill-color: #D2E3FC;\n", | |
" --hover-bg-color: #434B5C;\n", | |
" --hover-fill-color: #FFFFFF;\n", | |
" --disabled-bg-color: #3B4455;\n", | |
" --disabled-fill-color: #666;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart {\n", | |
" background-color: var(--bg-color);\n", | |
" border: none;\n", | |
" border-radius: 50%;\n", | |
" cursor: pointer;\n", | |
" display: none;\n", | |
" fill: var(--fill-color);\n", | |
" height: 32px;\n", | |
" padding: 0;\n", | |
" width: 32px;\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart:hover {\n", | |
" background-color: var(--hover-bg-color);\n", | |
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n", | |
" fill: var(--button-hover-fill-color);\n", | |
" }\n", | |
"\n", | |
" .colab-df-quickchart-complete:disabled,\n", | |
" .colab-df-quickchart-complete:disabled:hover {\n", | |
" background-color: var(--disabled-bg-color);\n", | |
" fill: var(--disabled-fill-color);\n", | |
" box-shadow: none;\n", | |
" }\n", | |
"\n", | |
" .colab-df-spinner {\n", | |
" border: 2px solid var(--fill-color);\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" animation:\n", | |
" spin 1s steps(1) infinite;\n", | |
" }\n", | |
"\n", | |
" @keyframes spin {\n", | |
" 0% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" border-left-color: var(--fill-color);\n", | |
" }\n", | |
" 20% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 30% {\n", | |
" border-color: transparent;\n", | |
" border-left-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 40% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-top-color: var(--fill-color);\n", | |
" }\n", | |
" 60% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" }\n", | |
" 80% {\n", | |
" border-color: transparent;\n", | |
" border-right-color: var(--fill-color);\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" 90% {\n", | |
" border-color: transparent;\n", | |
" border-bottom-color: var(--fill-color);\n", | |
" }\n", | |
" }\n", | |
"</style>\n", | |
"\n", | |
" <script>\n", | |
" async function quickchart(key) {\n", | |
" const quickchartButtonEl =\n", | |
" document.querySelector('#' + key + ' button');\n", | |
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n", | |
" quickchartButtonEl.classList.add('colab-df-spinner');\n", | |
" try {\n", | |
" const charts = await google.colab.kernel.invokeFunction(\n", | |
" 'suggestCharts', [key], {});\n", | |
" } catch (error) {\n", | |
" console.error('Error during call to suggestCharts:', error);\n", | |
" }\n", | |
" quickchartButtonEl.classList.remove('colab-df-spinner');\n", | |
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n", | |
" }\n", | |
" (() => {\n", | |
" let quickchartButtonEl =\n", | |
" document.querySelector('#df-3a0a8d9d-b9eb-42cc-9c7e-7b02120478e6 button');\n", | |
" quickchartButtonEl.style.display =\n", | |
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n", | |
" })();\n", | |
" </script>\n", | |
"</div>\n", | |
" </div>\n", | |
" </div>\n" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"spam.shape" | |
], | |
"metadata": { | |
"id": "_6RfpYT_7MeQ", | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"outputId": "19c54f91-f5c3-439d-f62d-8a13a54dd049" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(5572, 2)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 4 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"spam['Category'].value_counts()" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "U8tfOr0w9rgH", | |
"outputId": "5089e384-2e33-49d3-84f0-4137ff9b4fb1" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"ham 4825\n", | |
"spam 747\n", | |
"Name: Category, dtype: int64" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 5 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"messages = spam['Message']\n", | |
"category = spam['Category']" | |
], | |
"metadata": { | |
"id": "aw0LlApM-vnr" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"#Vetorização do texto\n", | |
"vectorizer = TfidfVectorizer()\n", | |
"text_vectorizer = vectorizer.fit_transform(messages)" | |
], | |
"metadata": { | |
"id": "3DVYx-M7_Fi1" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"text_vectorizer.shape" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "sXajo0J1_4PO", | |
"outputId": "085dd50b-5315-46b7-d4bc-d05f2c248ddd" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(5572, 8709)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 8 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"X_train, X_test, y_train, y_test = train_test_split(text_vectorizer, category, test_size=0.3)" | |
], | |
"metadata": { | |
"id": "KM7tS78YAG1u" | |
}, | |
"execution_count": null, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"X_train.shape" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "joFWMLgsBp_3", | |
"outputId": "b7570a22-3645-41fe-a1ae-afe104a8cee6" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(3900, 8709)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 10 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"X_test.shape" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "PmKqq6PuBurK", | |
"outputId": "0378a76d-215a-43ca-b32e-25235be458b4" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"(1672, 8709)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 11 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Montagem e avaliação do modelo\n", | |
"\n", | |
"Primeiro, criamos uma instância do modelo Random Forest, com quantidade de árvores. Em seguida, ajustamos isso aos nossos dados de treinamento. Passamos os recursos e a variável de destino, para que o modelo possa aprender.\n" | |
], | |
"metadata": { | |
"id": "jvH-pCzpDj9Q" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"rf = RandomForestClassifier(n_estimators=500)\n", | |
"rf.fit(X_train, y_train)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "Rt0buSlOBwke", | |
"outputId": "2a63b74c-4268-469c-e973-01f73ff08e58" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"RandomForestClassifier(n_estimators=500)" | |
], | |
"text/html": [ | |
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=500)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=500)</pre></div></div></div></div></div>" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 12 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Temos um modelo treinado de Floresta Aleatória, mas precisamos descobrir se está fazendo previsões precisas." | |
], | |
"metadata": { | |
"id": "uNCLZmGGDyW7" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"y_pred = rf.predict(X_test)\n", | |
"y_pred" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "GmsK1OLHDIv_", | |
"outputId": "9746543b-c679-4be1-e3e1-324c833c9095" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array(['spam', 'ham', 'ham', ..., 'ham', 'ham', 'ham'], dtype=object)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 13 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Verificamos as previsões em relação aos valores reais no conjunto de testes e contamos quantas o modelo acertou." | |
], | |
"metadata": { | |
"id": "gseibLs3EC9O" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"accuracy = accuracy_score(y_test, y_pred)\n", | |
"print(\"Accuracy:\", accuracy)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "gPHoQ4EGDK1O", | |
"outputId": "7f2c551d-d00d-4c05-99fb-54646bb0115f" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Accuracy: 0.9820574162679426\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"Determina a matriz de confusão\n", | |
"\n", | |
"![confusion_matrix](https://diegonogare.net/wp-content/uploads/2020/04/matrizConfusao-600x381.png)" | |
], | |
"metadata": { | |
"id": "L_5sSlgQE8_E" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"confusion_matrix(y_test, y_pred)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "alvI7NvvEdjg", | |
"outputId": "f9bda44b-f1b3-40ae-b71e-b0c314e63e0a" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array([[1445, 0],\n", | |
" [ 30, 197]])" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 15 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"print(metrics.classification_report(y_test, y_pred))" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "8T-bm0JNGKT8", | |
"outputId": "94819da0-9884-412b-ddcb-85e4b407eb51" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
" precision recall f1-score support\n", | |
"\n", | |
" ham 0.98 1.00 0.99 1445\n", | |
" spam 1.00 0.87 0.93 227\n", | |
"\n", | |
" accuracy 0.98 1672\n", | |
" macro avg 0.99 0.93 0.96 1672\n", | |
"weighted avg 0.98 0.98 0.98 1672\n", | |
"\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# Testando o modelo Florest criado" | |
], | |
"metadata": { | |
"id": "mqpBNU4VHn3R" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"test_text = ['WINNER!! As a valued network customer you have been selected to receivea £900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only.']\n", | |
"vectorizer_test_text = vectorizer.transform(test_text)\n", | |
"rf.predict(vectorizer_test_text)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "0cn-U_CBGY6k", | |
"outputId": "a3e44341-65d2-4752-efa7-72aea15f2f05" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"array(['spam'], dtype=object)" | |
] | |
}, | |
"metadata": {}, | |
"execution_count": 17 | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"## Visualizando os resultados\n", | |
"Podemos usar o seguinte código para visualizar nossas três primeiras árvores." | |
], | |
"metadata": { | |
"id": "advjkn6xEI1Q" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"for i in range(3):\n", | |
" tree = rf.estimators_[i]\n", | |
" dot_data = export_graphviz(tree,\n", | |
" filled=True,\n", | |
" max_depth=2,\n", | |
" impurity=False,\n", | |
" proportion=True)\n", | |
" graph = graphviz.Source(dot_data)\n", | |
" display(graph)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "t59Ou8ZxDhCy", | |
"outputId": "28dfc832-c58d-49e8-ca70-f6f49a41ce7a" | |
}, | |
"execution_count": null, | |
"outputs": [ | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"594pt\" height=\"311pt\"\n viewBox=\"0.00 0.00 594.00 311.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 307)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-307 590,-307 590,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#e99456\" stroke=\"black\" points=\"375,-303 228,-303 228,-250 375,-250 375,-303\"/>\n<text text-anchor=\"middle\" x=\"301.5\" y=\"-287.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[7048] <= 0.148</text>\n<text text-anchor=\"middle\" x=\"301.5\" y=\"-272.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 100.0%</text>\n<text text-anchor=\"middle\" x=\"301.5\" y=\"-257.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.871, 0.129]</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#e99355\" stroke=\"black\" points=\"293,-214 146,-214 146,-161 293,-161 293,-214\"/>\n<text text-anchor=\"middle\" x=\"219.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[8592] <= 0.065</text>\n<text text-anchor=\"middle\" x=\"219.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.1%</text>\n<text text-anchor=\"middle\" x=\"219.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.876, 0.124]</text>\n</g>\n<!-- 0->1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0->1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M277.34,-249.87C268.99,-241.01 259.51,-230.95 250.68,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"253.18,-219.13 243.77,-214.25 248.08,-223.93 253.18,-219.13\"/>\n<text text-anchor=\"middle\" x=\"243.03\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 432 -->\n<g id=\"node7\" class=\"node\">\n<title>432</title>\n<polygon fill=\"#b2d9f5\" stroke=\"black\" points=\"458,-214 311,-214 311,-161 458,-161 458,-214\"/>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[840] <= 0.114</text>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.9%</text>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.378, 0.622]</text>\n</g>\n<!-- 0->432 -->\n<g id=\"edge6\" class=\"edge\">\n<title>0->432</title>\n<path fill=\"none\" stroke=\"black\" d=\"M325.95,-249.87C334.41,-241.01 344,-230.95 352.94,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"355.56,-223.9 359.93,-214.25 350.5,-219.07 355.56,-223.9\"/>\n<text text-anchor=\"middle\" x=\"360.52\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#e89152\" stroke=\"black\" points=\"147,-125 0,-125 0,-72 147,-72 147,-125\"/>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[8674] <= 0.096</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 97.7%</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.889, 0.111]</text>\n</g>\n<!-- 1->2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1->2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M176.48,-160.87C160.37,-151.26 141.89,-140.25 125.08,-130.24\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"126.84,-127.21 116.46,-125.1 123.26,-133.23 126.84,-127.21\"/>\n</g>\n<!-- 431 -->\n<g id=\"node6\" class=\"node\">\n<title>431</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"282,-117.5 165,-117.5 165,-79.5 282,-79.5 282,-117.5\"/>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1.4%</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 1->431 -->\n<g id=\"edge5\" class=\"edge\">\n<title>1->431</title>\n<path fill=\"none\" stroke=\"black\" d=\"M220.68,-160.87C221.16,-150.42 221.72,-138.3 222.21,-127.61\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"225.71,-127.66 222.67,-117.51 218.72,-127.34 225.71,-127.66\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"64.5,-36 10.5,-36 10.5,0 64.5,0 64.5,-36\"/>\n<text text-anchor=\"middle\" x=\"37.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2->3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2->3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M61.71,-71.8C57.84,-63.35 53.53,-53.95 49.63,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"52.8,-43.96 45.45,-36.33 46.43,-46.88 52.8,-43.96\"/>\n</g>\n<!-- 422 -->\n<g id=\"node5\" class=\"node\">\n<title>422</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"136.5,-36 82.5,-36 82.5,0 136.5,0 136.5,-36\"/>\n<text text-anchor=\"middle\" x=\"109.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2->422 -->\n<g id=\"edge4\" class=\"edge\">\n<title>2->422</title>\n<path fill=\"none\" stroke=\"black\" d=\"M85.29,-71.8C89.16,-63.35 93.47,-53.95 97.37,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"100.57,-46.88 101.55,-36.33 94.2,-43.96 100.57,-46.88\"/>\n</g>\n<!-- 433 -->\n<g id=\"node8\" class=\"node\">\n<title>433</title>\n<polygon fill=\"#cbe5f8\" stroke=\"black\" points=\"451,-125 304,-125 304,-72 451,-72 451,-125\"/>\n<text text-anchor=\"middle\" x=\"377.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[7311] <= 0.068</text>\n<text text-anchor=\"middle\" x=\"377.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.8%</text>\n<text text-anchor=\"middle\" x=\"377.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.424, 0.576]</text>\n</g>\n<!-- 432->433 -->\n<g id=\"edge7\" class=\"edge\">\n<title>432->433</title>\n<path fill=\"none\" stroke=\"black\" d=\"M382.44,-160.87C381.8,-152.89 381.07,-143.93 380.39,-135.38\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"383.86,-134.94 379.57,-125.25 376.89,-135.5 383.86,-134.94\"/>\n</g>\n<!-- 458 -->\n<g id=\"node11\" class=\"node\">\n<title>458</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"586,-117.5 469,-117.5 469,-79.5 586,-79.5 586,-117.5\"/>\n<text text-anchor=\"middle\" x=\"527.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.1%</text>\n<text text-anchor=\"middle\" x=\"527.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 432->458 -->\n<g id=\"edge10\" class=\"edge\">\n<title>432->458</title>\n<path fill=\"none\" stroke=\"black\" d=\"M426.63,-160.87C446.49,-148.78 470.01,-134.47 489.34,-122.71\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"491.18,-125.69 497.9,-117.51 487.54,-119.71 491.18,-125.69\"/>\n</g>\n<!-- 434 -->\n<g id=\"node9\" class=\"node\">\n<title>434</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"368.5,-36 314.5,-36 314.5,0 368.5,0 368.5,-36\"/>\n<text text-anchor=\"middle\" x=\"341.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 433->434 -->\n<g id=\"edge8\" class=\"edge\">\n<title>433->434</title>\n<path fill=\"none\" stroke=\"black\" d=\"M365.71,-71.8C361.84,-63.35 357.53,-53.95 353.63,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"356.8,-43.96 349.45,-36.33 350.43,-46.88 356.8,-43.96\"/>\n</g>\n<!-- 457 -->\n<g id=\"node10\" class=\"node\">\n<title>457</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"440.5,-36 386.5,-36 386.5,0 440.5,0 440.5,-36\"/>\n<text text-anchor=\"middle\" x=\"413.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 433->457 -->\n<g id=\"edge9\" class=\"edge\">\n<title>433->457</title>\n<path fill=\"none\" stroke=\"black\" d=\"M389.29,-71.8C393.16,-63.35 397.47,-53.95 401.37,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"404.57,-46.88 405.55,-36.33 398.2,-43.96 404.57,-46.88\"/>\n</g>\n</g>\n</svg>\n", | |
"text/plain": [ | |
"<graphviz.sources.Source at 0x7d90b47afee0>" | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"365pt\" height=\"311pt\"\n viewBox=\"0.00 0.00 365.00 311.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 307)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-307 361,-307 361,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#e99356\" stroke=\"black\" points=\"297,-303 150,-303 150,-250 297,-250 297,-303\"/>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-287.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[2358] <= 0.117</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-272.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 100.0%</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-257.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.872, 0.128]</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#e99356\" stroke=\"black\" points=\"222,-214 75,-214 75,-161 222,-161 222,-214\"/>\n<text text-anchor=\"middle\" x=\"148.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[332] <= 0.117</text>\n<text text-anchor=\"middle\" x=\"148.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.8%</text>\n<text text-anchor=\"middle\" x=\"148.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.874, 0.126]</text>\n</g>\n<!-- 0->1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0->1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M201.4,-249.87C193.84,-241.1 185.27,-231.15 177.26,-221.86\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"179.88,-219.54 170.7,-214.25 174.58,-224.11 179.88,-219.54\"/>\n<text text-anchor=\"middle\" x=\"168.85\" y=\"-235.48\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 692 -->\n<g id=\"node7\" class=\"node\">\n<title>692</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"357,-206.5 240,-206.5 240,-168.5 357,-168.5 357,-206.5\"/>\n<text text-anchor=\"middle\" x=\"298.5\" y=\"-191.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.2%</text>\n<text text-anchor=\"middle\" x=\"298.5\" y=\"-176.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 0->692 -->\n<g id=\"edge6\" class=\"edge\">\n<title>0->692</title>\n<path fill=\"none\" stroke=\"black\" d=\"M245.6,-249.87C255.26,-238.66 266.58,-225.53 276.26,-214.29\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"279.1,-216.37 282.98,-206.51 273.8,-211.8 279.1,-216.37\"/>\n<text text-anchor=\"middle\" x=\"284.82\" y=\"-227.74\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#e99355\" stroke=\"black\" points=\"147,-125 0,-125 0,-72 147,-72 147,-125\"/>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[8101] <= 0.103</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.4%</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.877, 0.123]</text>\n</g>\n<!-- 1->2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1->2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M126.4,-160.87C118.84,-152.1 110.27,-142.15 102.26,-132.86\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"104.88,-130.54 95.7,-125.25 99.58,-135.11 104.88,-130.54\"/>\n</g>\n<!-- 691 -->\n<g id=\"node6\" class=\"node\">\n<title>691</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"282,-117.5 165,-117.5 165,-79.5 282,-79.5 282,-117.5\"/>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.4%</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 1->691 -->\n<g id=\"edge5\" class=\"edge\">\n<title>1->691</title>\n<path fill=\"none\" stroke=\"black\" d=\"M170.6,-160.87C180.26,-149.66 191.58,-136.53 201.26,-125.29\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"204.1,-127.37 207.98,-117.51 198.8,-122.8 204.1,-127.37\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"64.5,-36 10.5,-36 10.5,0 64.5,0 64.5,-36\"/>\n<text text-anchor=\"middle\" x=\"37.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2->3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2->3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M61.71,-71.8C57.84,-63.35 53.53,-53.95 49.63,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"52.8,-43.96 45.45,-36.33 46.43,-46.88 52.8,-43.96\"/>\n</g>\n<!-- 614 -->\n<g id=\"node5\" class=\"node\">\n<title>614</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"136.5,-36 82.5,-36 82.5,0 136.5,0 136.5,-36\"/>\n<text text-anchor=\"middle\" x=\"109.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2->614 -->\n<g id=\"edge4\" class=\"edge\">\n<title>2->614</title>\n<path fill=\"none\" stroke=\"black\" d=\"M85.29,-71.8C89.16,-63.35 93.47,-53.95 97.37,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"100.57,-46.88 101.55,-36.33 94.2,-43.96 100.57,-46.88\"/>\n</g>\n</g>\n</svg>\n", | |
"text/plain": [ | |
"<graphviz.sources.Source at 0x7d9075c508b0>" | |
] | |
}, | |
"metadata": {} | |
}, | |
{ | |
"output_type": "display_data", | |
"data": { | |
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"620pt\" height=\"311pt\"\n viewBox=\"0.00 0.00 620.00 311.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 307)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-307 616,-307 616,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#e99558\" stroke=\"black\" points=\"394,-303 247,-303 247,-250 394,-250 394,-303\"/>\n<text text-anchor=\"middle\" x=\"320.5\" y=\"-287.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5238] <= 0.096</text>\n<text text-anchor=\"middle\" x=\"320.5\" y=\"-272.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 100.0%</text>\n<text text-anchor=\"middle\" x=\"320.5\" y=\"-257.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.865, 0.135]</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#e99457\" stroke=\"black\" points=\"312,-214 165,-214 165,-161 312,-161 312,-214\"/>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5931] <= 0.078</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.6%</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.868, 0.132]</text>\n</g>\n<!-- 0->1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0->1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M296.34,-249.87C287.99,-241.01 278.51,-230.95 269.68,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"272.18,-219.13 262.77,-214.25 267.08,-223.93 272.18,-219.13\"/>\n<text text-anchor=\"middle\" x=\"262.03\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 854 -->\n<g id=\"node9\" class=\"node\">\n<title>854</title>\n<polygon fill=\"#47a4e7\" stroke=\"black\" points=\"477,-214 330,-214 330,-161 477,-161 477,-214\"/>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[7947] <= 0.135</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.4%</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.067, 0.933]</text>\n</g>\n<!-- 0->854 -->\n<g id=\"edge8\" class=\"edge\">\n<title>0->854</title>\n<path fill=\"none\" stroke=\"black\" d=\"M344.95,-249.87C353.41,-241.01 363,-230.95 371.94,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"374.56,-223.9 378.93,-214.25 369.5,-219.07 374.56,-223.9\"/>\n<text text-anchor=\"middle\" x=\"379.52\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#e99356\" stroke=\"black\" points=\"147,-125 0,-125 0,-72 147,-72 147,-125\"/>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5637] <= 0.142</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 98.8%</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.874, 0.126]</text>\n</g>\n<!-- 1->2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1->2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M189.89,-160.87C171.42,-151.13 150.22,-139.95 131.01,-129.82\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"132.53,-126.67 122.05,-125.1 129.26,-132.86 132.53,-126.67\"/>\n</g>\n<!-- 835 -->\n<g id=\"node6\" class=\"node\">\n<title>835</title>\n<polygon fill=\"#76bbed\" stroke=\"black\" points=\"312,-125 165,-125 165,-72 312,-72 312,-125\"/>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5449] <= 0.047</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.8%</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.235, 0.765]</text>\n</g>\n<!-- 1->835 -->\n<g id=\"edge5\" class=\"edge\">\n<title>1->835</title>\n<path fill=\"none\" stroke=\"black\" d=\"M238.5,-160.87C238.5,-152.89 238.5,-143.93 238.5,-135.38\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"242,-135.25 238.5,-125.25 235,-135.25 242,-135.25\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"85.5,-36 31.5,-36 31.5,0 85.5,0 85.5,-36\"/>\n<text text-anchor=\"middle\" x=\"58.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2->3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2->3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M68.59,-71.8C67.03,-63.62 65.29,-54.55 63.71,-46.27\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"67.13,-45.5 61.81,-36.33 60.25,-46.81 67.13,-45.5\"/>\n</g>\n<!-- 774 -->\n<g id=\"node5\" class=\"node\">\n<title>774</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"157.5,-36 103.5,-36 103.5,0 157.5,0 157.5,-36\"/>\n<text text-anchor=\"middle\" x=\"130.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2->774 -->\n<g id=\"edge4\" class=\"edge\">\n<title>2->774</title>\n<path fill=\"none\" stroke=\"black\" d=\"M92.16,-71.8C98.5,-63.08 105.56,-53.34 111.89,-44.63\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"114.87,-46.48 117.92,-36.33 109.21,-42.37 114.87,-46.48\"/>\n</g>\n<!-- 836 -->\n<g id=\"node7\" class=\"node\">\n<title>836</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"247.5,-36 193.5,-36 193.5,0 247.5,0 247.5,-36\"/>\n<text text-anchor=\"middle\" x=\"220.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 835->836 -->\n<g id=\"edge6\" class=\"edge\">\n<title>835->836</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.61,-71.8C230.73,-63.62 228.65,-54.55 226.75,-46.27\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"230.12,-45.3 224.47,-36.33 223.3,-46.86 230.12,-45.3\"/>\n</g>\n<!-- 853 -->\n<g id=\"node8\" class=\"node\">\n<title>853</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"319.5,-36 265.5,-36 265.5,0 319.5,0 319.5,-36\"/>\n<text text-anchor=\"middle\" x=\"292.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 835->853 -->\n<g id=\"edge7\" class=\"edge\">\n<title>835->853</title>\n<path fill=\"none\" stroke=\"black\" d=\"M256.18,-71.8C262.18,-63.08 268.88,-53.34 274.87,-44.63\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"277.79,-46.55 280.58,-36.33 272.03,-42.59 277.79,-46.55\"/>\n</g>\n<!-- 855 -->\n<g id=\"node10\" class=\"node\">\n<title>855</title>\n<polygon fill=\"#48a5e7\" stroke=\"black\" points=\"477,-125 330,-125 330,-72 477,-72 477,-125\"/>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[525] <= 0.113</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.3%</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.071, 0.929]</text>\n</g>\n<!-- 854->855 -->\n<g id=\"edge9\" class=\"edge\">\n<title>854->855</title>\n<path fill=\"none\" stroke=\"black\" d=\"M403.5,-160.87C403.5,-152.89 403.5,-143.93 403.5,-135.38\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"407,-135.25 403.5,-125.25 400,-135.25 407,-135.25\"/>\n</g>\n<!-- 860 -->\n<g id=\"node13\" class=\"node\">\n<title>860</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"612,-117.5 495,-117.5 495,-79.5 612,-79.5 612,-117.5\"/>\n<text text-anchor=\"middle\" x=\"553.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.0%</text>\n<text text-anchor=\"middle\" x=\"553.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 854->860 -->\n<g id=\"edge12\" class=\"edge\">\n<title>854->860</title>\n<path fill=\"none\" stroke=\"black\" d=\"M447.69,-160.87C468.62,-148.73 493.42,-134.35 513.75,-122.55\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"515.56,-125.55 522.45,-117.51 512.05,-119.5 515.56,-125.55\"/>\n</g>\n<!-- 856 -->\n<g id=\"node11\" class=\"node\">\n<title>856</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"410.5,-36 356.5,-36 356.5,0 410.5,0 410.5,-36\"/>\n<text text-anchor=\"middle\" x=\"383.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 855->856 -->\n<g id=\"edge10\" class=\"edge\">\n<title>855->856</title>\n<path fill=\"none\" stroke=\"black\" d=\"M396.95,-71.8C394.87,-63.62 392.56,-54.55 390.45,-46.27\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"393.78,-45.16 387.92,-36.33 386.99,-46.88 393.78,-45.16\"/>\n</g>\n<!-- 859 -->\n<g id=\"node12\" class=\"node\">\n<title>859</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"482.5,-36 428.5,-36 428.5,0 482.5,0 482.5,-36\"/>\n<text text-anchor=\"middle\" x=\"455.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 855->859 -->\n<g id=\"edge11\" class=\"edge\">\n<title>855->859</title>\n<path fill=\"none\" stroke=\"black\" d=\"M420.53,-71.8C426.24,-63.17 432.62,-53.54 438.34,-44.9\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"441.42,-46.6 444.02,-36.33 435.58,-42.73 441.42,-46.6\"/>\n</g>\n</g>\n</svg>\n", | |
"text/plain": [ | |
"<graphviz.sources.Source at 0x7d90b47afee0>" | |
] | |
}, | |
"metadata": {} | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment