Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save RochaGabriell/6bfe6428a3290f9a11d2848673020790 to your computer and use it in GitHub Desktop.
Save RochaGabriell/6bfe6428a3290f9a11d2848673020790 to your computer and use it in GitHub Desktop.
RandomForestClassificationwithScikitLearn.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"authorship_tag": "ABX9TyPxXcBgtRZaHY95veoJoSel",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/RochaGabriell/6bfe6428a3290f9a11d2848673020790/randomforestclassificationwithscikitlearn.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "DSXkj7qn136e"
},
"outputs": [],
"source": [
"# Data Processing\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"# Modelling\n",
"from sklearn.model_selection import train_test_split #Dividir o data set em conjuntos\n",
"from sklearn import metrics\n",
"from sklearn.metrics import accuracy_score, confusion_matrix #Para avaliar o modelo\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"from sklearn.feature_extraction.text import TfidfVectorizer #Para vetorizar o texto representar numericamente o texto\n",
"\n",
"# Tree Visualisation\n",
"from sklearn.tree import export_graphviz\n",
"from IPython.display import Image\n",
"import graphviz"
]
},
{
"cell_type": "code",
"source": [
"spam = pd.read_csv('spam.csv')"
],
"metadata": {
"id": "KXfFDz2h677L"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"spam.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "-4i2_u497IPb",
"outputId": "94c6354d-e6cd-48b7-80c0-1b41e91245e8"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" Category Message\n",
"0 ham Go until jurong point, crazy.. Available only ...\n",
"1 ham Ok lar... Joking wif u oni...\n",
"2 spam Free entry in 2 a wkly comp to win FA Cup fina...\n",
"3 ham U dun say so early hor... U c already then say...\n",
"4 ham Nah I don't think he goes to usf, he lives aro..."
],
"text/html": [
"\n",
" <div id=\"df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Category</th>\n",
" <th>Message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>ham</td>\n",
" <td>Go until jurong point, crazy.. Available only ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ham</td>\n",
" <td>Ok lar... Joking wif u oni...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>spam</td>\n",
" <td>Free entry in 2 a wkly comp to win FA Cup fina...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>ham</td>\n",
" <td>U dun say so early hor... U c already then say...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>ham</td>\n",
" <td>Nah I don't think he goes to usf, he lives aro...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-d7e26e11-aceb-46fa-a6a4-222e0cd79ff7');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-3a0a8d9d-b9eb-42cc-9c7e-7b02120478e6\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-3a0a8d9d-b9eb-42cc-9c7e-7b02120478e6')\"\n",
" title=\"Suggest charts.\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-3a0a8d9d-b9eb-42cc-9c7e-7b02120478e6 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
" </div>\n",
" </div>\n"
]
},
"metadata": {},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"source": [
"spam.shape"
],
"metadata": {
"id": "_6RfpYT_7MeQ",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "19c54f91-f5c3-439d-f62d-8a13a54dd049"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(5572, 2)"
]
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"spam['Category'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "U8tfOr0w9rgH",
"outputId": "5089e384-2e33-49d3-84f0-4137ff9b4fb1"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"ham 4825\n",
"spam 747\n",
"Name: Category, dtype: int64"
]
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"source": [
"messages = spam['Message']\n",
"category = spam['Category']"
],
"metadata": {
"id": "aw0LlApM-vnr"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#Vetorização do texto\n",
"vectorizer = TfidfVectorizer()\n",
"text_vectorizer = vectorizer.fit_transform(messages)"
],
"metadata": {
"id": "3DVYx-M7_Fi1"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"text_vectorizer.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "sXajo0J1_4PO",
"outputId": "085dd50b-5315-46b7-d4bc-d05f2c248ddd"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(5572, 8709)"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"X_train, X_test, y_train, y_test = train_test_split(text_vectorizer, category, test_size=0.3)"
],
"metadata": {
"id": "KM7tS78YAG1u"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"X_train.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "joFWMLgsBp_3",
"outputId": "b7570a22-3645-41fe-a1ae-afe104a8cee6"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(3900, 8709)"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"source": [
"X_test.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "PmKqq6PuBurK",
"outputId": "0378a76d-215a-43ca-b32e-25235be458b4"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(1672, 8709)"
]
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "markdown",
"source": [
"## Montagem e avaliação do modelo\n",
"\n",
"Primeiro, criamos uma instância do modelo Random Forest, com quantidade de árvores. Em seguida, ajustamos isso aos nossos dados de treinamento. Passamos os recursos e a variável de destino, para que o modelo possa aprender.\n"
],
"metadata": {
"id": "jvH-pCzpDj9Q"
}
},
{
"cell_type": "code",
"source": [
"rf = RandomForestClassifier(n_estimators=500)\n",
"rf.fit(X_train, y_train)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Rt0buSlOBwke",
"outputId": "2a63b74c-4268-469c-e973-01f73ff08e58"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"RandomForestClassifier(n_estimators=500)"
],
"text/html": [
"<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>RandomForestClassifier(n_estimators=500)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">RandomForestClassifier</label><div class=\"sk-toggleable__content\"><pre>RandomForestClassifier(n_estimators=500)</pre></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "markdown",
"source": [
"Temos um modelo treinado de Floresta Aleatória, mas precisamos descobrir se está fazendo previsões precisas."
],
"metadata": {
"id": "uNCLZmGGDyW7"
}
},
{
"cell_type": "code",
"source": [
"y_pred = rf.predict(X_test)\n",
"y_pred"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "GmsK1OLHDIv_",
"outputId": "9746543b-c679-4be1-e3e1-324c833c9095"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['spam', 'ham', 'ham', ..., 'ham', 'ham', 'ham'], dtype=object)"
]
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"source": [
"Verificamos as previsões em relação aos valores reais no conjunto de testes e contamos quantas o modelo acertou."
],
"metadata": {
"id": "gseibLs3EC9O"
}
},
{
"cell_type": "code",
"source": [
"accuracy = accuracy_score(y_test, y_pred)\n",
"print(\"Accuracy:\", accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gPHoQ4EGDK1O",
"outputId": "7f2c551d-d00d-4c05-99fb-54646bb0115f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy: 0.9820574162679426\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Determina a matriz de confusão\n",
"\n",
"![confusion_matrix](https://diegonogare.net/wp-content/uploads/2020/04/matrizConfusao-600x381.png)"
],
"metadata": {
"id": "L_5sSlgQE8_E"
}
},
{
"cell_type": "code",
"source": [
"confusion_matrix(y_test, y_pred)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "alvI7NvvEdjg",
"outputId": "f9bda44b-f1b3-40ae-b71e-b0c314e63e0a"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array([[1445, 0],\n",
" [ 30, 197]])"
]
},
"metadata": {},
"execution_count": 15
}
]
},
{
"cell_type": "code",
"source": [
"print(metrics.classification_report(y_test, y_pred))"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8T-bm0JNGKT8",
"outputId": "94819da0-9884-412b-ddcb-85e4b407eb51"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" precision recall f1-score support\n",
"\n",
" ham 0.98 1.00 0.99 1445\n",
" spam 1.00 0.87 0.93 227\n",
"\n",
" accuracy 0.98 1672\n",
" macro avg 0.99 0.93 0.96 1672\n",
"weighted avg 0.98 0.98 0.98 1672\n",
"\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"# Testando o modelo Florest criado"
],
"metadata": {
"id": "mqpBNU4VHn3R"
}
},
{
"cell_type": "code",
"source": [
"test_text = ['WINNER!! As a valued network customer you have been selected to receivea £900 prize reward! To claim call 09061701461. Claim code KL341. Valid 12 hours only.']\n",
"vectorizer_test_text = vectorizer.transform(test_text)\n",
"rf.predict(vectorizer_test_text)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "0cn-U_CBGY6k",
"outputId": "a3e44341-65d2-4752-efa7-72aea15f2f05"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"array(['spam'], dtype=object)"
]
},
"metadata": {},
"execution_count": 17
}
]
},
{
"cell_type": "markdown",
"source": [
"## Visualizando os resultados\n",
"Podemos usar o seguinte código para visualizar nossas três primeiras árvores."
],
"metadata": {
"id": "advjkn6xEI1Q"
}
},
{
"cell_type": "code",
"source": [
"for i in range(3):\n",
" tree = rf.estimators_[i]\n",
" dot_data = export_graphviz(tree,\n",
" filled=True,\n",
" max_depth=2,\n",
" impurity=False,\n",
" proportion=True)\n",
" graph = graphviz.Source(dot_data)\n",
" display(graph)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "t59Ou8ZxDhCy",
"outputId": "28dfc832-c58d-49e8-ca70-f6f49a41ce7a"
},
"execution_count": null,
"outputs": [
{
"output_type": "display_data",
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"594pt\" height=\"311pt\"\n viewBox=\"0.00 0.00 594.00 311.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 307)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-307 590,-307 590,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#e99456\" stroke=\"black\" points=\"375,-303 228,-303 228,-250 375,-250 375,-303\"/>\n<text text-anchor=\"middle\" x=\"301.5\" y=\"-287.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[7048] &lt;= 0.148</text>\n<text text-anchor=\"middle\" x=\"301.5\" y=\"-272.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 100.0%</text>\n<text text-anchor=\"middle\" x=\"301.5\" y=\"-257.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.871, 0.129]</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#e99355\" stroke=\"black\" points=\"293,-214 146,-214 146,-161 293,-161 293,-214\"/>\n<text text-anchor=\"middle\" x=\"219.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[8592] &lt;= 0.065</text>\n<text text-anchor=\"middle\" x=\"219.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.1%</text>\n<text text-anchor=\"middle\" x=\"219.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.876, 0.124]</text>\n</g>\n<!-- 0&#45;&gt;1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0&#45;&gt;1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M277.34,-249.87C268.99,-241.01 259.51,-230.95 250.68,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"253.18,-219.13 243.77,-214.25 248.08,-223.93 253.18,-219.13\"/>\n<text text-anchor=\"middle\" x=\"243.03\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 432 -->\n<g id=\"node7\" class=\"node\">\n<title>432</title>\n<polygon fill=\"#b2d9f5\" stroke=\"black\" points=\"458,-214 311,-214 311,-161 458,-161 458,-214\"/>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[840] &lt;= 0.114</text>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.9%</text>\n<text text-anchor=\"middle\" x=\"384.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.378, 0.622]</text>\n</g>\n<!-- 0&#45;&gt;432 -->\n<g id=\"edge6\" class=\"edge\">\n<title>0&#45;&gt;432</title>\n<path fill=\"none\" stroke=\"black\" d=\"M325.95,-249.87C334.41,-241.01 344,-230.95 352.94,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"355.56,-223.9 359.93,-214.25 350.5,-219.07 355.56,-223.9\"/>\n<text text-anchor=\"middle\" x=\"360.52\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#e89152\" stroke=\"black\" points=\"147,-125 0,-125 0,-72 147,-72 147,-125\"/>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[8674] &lt;= 0.096</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 97.7%</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.889, 0.111]</text>\n</g>\n<!-- 1&#45;&gt;2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1&#45;&gt;2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M176.48,-160.87C160.37,-151.26 141.89,-140.25 125.08,-130.24\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"126.84,-127.21 116.46,-125.1 123.26,-133.23 126.84,-127.21\"/>\n</g>\n<!-- 431 -->\n<g id=\"node6\" class=\"node\">\n<title>431</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"282,-117.5 165,-117.5 165,-79.5 282,-79.5 282,-117.5\"/>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 1.4%</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 1&#45;&gt;431 -->\n<g id=\"edge5\" class=\"edge\">\n<title>1&#45;&gt;431</title>\n<path fill=\"none\" stroke=\"black\" d=\"M220.68,-160.87C221.16,-150.42 221.72,-138.3 222.21,-127.61\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"225.71,-127.66 222.67,-117.51 218.72,-127.34 225.71,-127.66\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"64.5,-36 10.5,-36 10.5,0 64.5,0 64.5,-36\"/>\n<text text-anchor=\"middle\" x=\"37.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2&#45;&gt;3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2&#45;&gt;3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M61.71,-71.8C57.84,-63.35 53.53,-53.95 49.63,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"52.8,-43.96 45.45,-36.33 46.43,-46.88 52.8,-43.96\"/>\n</g>\n<!-- 422 -->\n<g id=\"node5\" class=\"node\">\n<title>422</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"136.5,-36 82.5,-36 82.5,0 136.5,0 136.5,-36\"/>\n<text text-anchor=\"middle\" x=\"109.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2&#45;&gt;422 -->\n<g id=\"edge4\" class=\"edge\">\n<title>2&#45;&gt;422</title>\n<path fill=\"none\" stroke=\"black\" d=\"M85.29,-71.8C89.16,-63.35 93.47,-53.95 97.37,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"100.57,-46.88 101.55,-36.33 94.2,-43.96 100.57,-46.88\"/>\n</g>\n<!-- 433 -->\n<g id=\"node8\" class=\"node\">\n<title>433</title>\n<polygon fill=\"#cbe5f8\" stroke=\"black\" points=\"451,-125 304,-125 304,-72 451,-72 451,-125\"/>\n<text text-anchor=\"middle\" x=\"377.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[7311] &lt;= 0.068</text>\n<text text-anchor=\"middle\" x=\"377.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.8%</text>\n<text text-anchor=\"middle\" x=\"377.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.424, 0.576]</text>\n</g>\n<!-- 432&#45;&gt;433 -->\n<g id=\"edge7\" class=\"edge\">\n<title>432&#45;&gt;433</title>\n<path fill=\"none\" stroke=\"black\" d=\"M382.44,-160.87C381.8,-152.89 381.07,-143.93 380.39,-135.38\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"383.86,-134.94 379.57,-125.25 376.89,-135.5 383.86,-134.94\"/>\n</g>\n<!-- 458 -->\n<g id=\"node11\" class=\"node\">\n<title>458</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"586,-117.5 469,-117.5 469,-79.5 586,-79.5 586,-117.5\"/>\n<text text-anchor=\"middle\" x=\"527.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.1%</text>\n<text text-anchor=\"middle\" x=\"527.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 432&#45;&gt;458 -->\n<g id=\"edge10\" class=\"edge\">\n<title>432&#45;&gt;458</title>\n<path fill=\"none\" stroke=\"black\" d=\"M426.63,-160.87C446.49,-148.78 470.01,-134.47 489.34,-122.71\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"491.18,-125.69 497.9,-117.51 487.54,-119.71 491.18,-125.69\"/>\n</g>\n<!-- 434 -->\n<g id=\"node9\" class=\"node\">\n<title>434</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"368.5,-36 314.5,-36 314.5,0 368.5,0 368.5,-36\"/>\n<text text-anchor=\"middle\" x=\"341.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 433&#45;&gt;434 -->\n<g id=\"edge8\" class=\"edge\">\n<title>433&#45;&gt;434</title>\n<path fill=\"none\" stroke=\"black\" d=\"M365.71,-71.8C361.84,-63.35 357.53,-53.95 353.63,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"356.8,-43.96 349.45,-36.33 350.43,-46.88 356.8,-43.96\"/>\n</g>\n<!-- 457 -->\n<g id=\"node10\" class=\"node\">\n<title>457</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"440.5,-36 386.5,-36 386.5,0 440.5,0 440.5,-36\"/>\n<text text-anchor=\"middle\" x=\"413.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 433&#45;&gt;457 -->\n<g id=\"edge9\" class=\"edge\">\n<title>433&#45;&gt;457</title>\n<path fill=\"none\" stroke=\"black\" d=\"M389.29,-71.8C393.16,-63.35 397.47,-53.95 401.37,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"404.57,-46.88 405.55,-36.33 398.2,-43.96 404.57,-46.88\"/>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.sources.Source at 0x7d90b47afee0>"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"365pt\" height=\"311pt\"\n viewBox=\"0.00 0.00 365.00 311.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 307)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-307 361,-307 361,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#e99356\" stroke=\"black\" points=\"297,-303 150,-303 150,-250 297,-250 297,-303\"/>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-287.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[2358] &lt;= 0.117</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-272.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 100.0%</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-257.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.872, 0.128]</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#e99356\" stroke=\"black\" points=\"222,-214 75,-214 75,-161 222,-161 222,-214\"/>\n<text text-anchor=\"middle\" x=\"148.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[332] &lt;= 0.117</text>\n<text text-anchor=\"middle\" x=\"148.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.8%</text>\n<text text-anchor=\"middle\" x=\"148.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.874, 0.126]</text>\n</g>\n<!-- 0&#45;&gt;1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0&#45;&gt;1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M201.4,-249.87C193.84,-241.1 185.27,-231.15 177.26,-221.86\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"179.88,-219.54 170.7,-214.25 174.58,-224.11 179.88,-219.54\"/>\n<text text-anchor=\"middle\" x=\"168.85\" y=\"-235.48\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 692 -->\n<g id=\"node7\" class=\"node\">\n<title>692</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"357,-206.5 240,-206.5 240,-168.5 357,-168.5 357,-206.5\"/>\n<text text-anchor=\"middle\" x=\"298.5\" y=\"-191.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.2%</text>\n<text text-anchor=\"middle\" x=\"298.5\" y=\"-176.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 0&#45;&gt;692 -->\n<g id=\"edge6\" class=\"edge\">\n<title>0&#45;&gt;692</title>\n<path fill=\"none\" stroke=\"black\" d=\"M245.6,-249.87C255.26,-238.66 266.58,-225.53 276.26,-214.29\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"279.1,-216.37 282.98,-206.51 273.8,-211.8 279.1,-216.37\"/>\n<text text-anchor=\"middle\" x=\"284.82\" y=\"-227.74\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#e99355\" stroke=\"black\" points=\"147,-125 0,-125 0,-72 147,-72 147,-125\"/>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[8101] &lt;= 0.103</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.4%</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.877, 0.123]</text>\n</g>\n<!-- 1&#45;&gt;2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1&#45;&gt;2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M126.4,-160.87C118.84,-152.1 110.27,-142.15 102.26,-132.86\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"104.88,-130.54 95.7,-125.25 99.58,-135.11 104.88,-130.54\"/>\n</g>\n<!-- 691 -->\n<g id=\"node6\" class=\"node\">\n<title>691</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"282,-117.5 165,-117.5 165,-79.5 282,-79.5 282,-117.5\"/>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.4%</text>\n<text text-anchor=\"middle\" x=\"223.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 1&#45;&gt;691 -->\n<g id=\"edge5\" class=\"edge\">\n<title>1&#45;&gt;691</title>\n<path fill=\"none\" stroke=\"black\" d=\"M170.6,-160.87C180.26,-149.66 191.58,-136.53 201.26,-125.29\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"204.1,-127.37 207.98,-117.51 198.8,-122.8 204.1,-127.37\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"64.5,-36 10.5,-36 10.5,0 64.5,0 64.5,-36\"/>\n<text text-anchor=\"middle\" x=\"37.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2&#45;&gt;3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2&#45;&gt;3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M61.71,-71.8C57.84,-63.35 53.53,-53.95 49.63,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"52.8,-43.96 45.45,-36.33 46.43,-46.88 52.8,-43.96\"/>\n</g>\n<!-- 614 -->\n<g id=\"node5\" class=\"node\">\n<title>614</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"136.5,-36 82.5,-36 82.5,0 136.5,0 136.5,-36\"/>\n<text text-anchor=\"middle\" x=\"109.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2&#45;&gt;614 -->\n<g id=\"edge4\" class=\"edge\">\n<title>2&#45;&gt;614</title>\n<path fill=\"none\" stroke=\"black\" d=\"M85.29,-71.8C89.16,-63.35 93.47,-53.95 97.37,-45.45\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"100.57,-46.88 101.55,-36.33 94.2,-43.96 100.57,-46.88\"/>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.sources.Source at 0x7d9075c508b0>"
]
},
"metadata": {}
},
{
"output_type": "display_data",
"data": {
"image/svg+xml": "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Generated by graphviz version 2.43.0 (0)\n -->\n<!-- Title: Tree Pages: 1 -->\n<svg width=\"620pt\" height=\"311pt\"\n viewBox=\"0.00 0.00 620.00 311.00\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n<g id=\"graph0\" class=\"graph\" transform=\"scale(1 1) rotate(0) translate(4 307)\">\n<title>Tree</title>\n<polygon fill=\"white\" stroke=\"transparent\" points=\"-4,4 -4,-307 616,-307 616,4 -4,4\"/>\n<!-- 0 -->\n<g id=\"node1\" class=\"node\">\n<title>0</title>\n<polygon fill=\"#e99558\" stroke=\"black\" points=\"394,-303 247,-303 247,-250 394,-250 394,-303\"/>\n<text text-anchor=\"middle\" x=\"320.5\" y=\"-287.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5238] &lt;= 0.096</text>\n<text text-anchor=\"middle\" x=\"320.5\" y=\"-272.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 100.0%</text>\n<text text-anchor=\"middle\" x=\"320.5\" y=\"-257.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.865, 0.135]</text>\n</g>\n<!-- 1 -->\n<g id=\"node2\" class=\"node\">\n<title>1</title>\n<polygon fill=\"#e99457\" stroke=\"black\" points=\"312,-214 165,-214 165,-161 312,-161 312,-214\"/>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5931] &lt;= 0.078</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 99.6%</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.868, 0.132]</text>\n</g>\n<!-- 0&#45;&gt;1 -->\n<g id=\"edge1\" class=\"edge\">\n<title>0&#45;&gt;1</title>\n<path fill=\"none\" stroke=\"black\" d=\"M296.34,-249.87C287.99,-241.01 278.51,-230.95 269.68,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"272.18,-219.13 262.77,-214.25 267.08,-223.93 272.18,-219.13\"/>\n<text text-anchor=\"middle\" x=\"262.03\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">True</text>\n</g>\n<!-- 854 -->\n<g id=\"node9\" class=\"node\">\n<title>854</title>\n<polygon fill=\"#47a4e7\" stroke=\"black\" points=\"477,-214 330,-214 330,-161 477,-161 477,-214\"/>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-198.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[7947] &lt;= 0.135</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-183.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.4%</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-168.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.067, 0.933]</text>\n</g>\n<!-- 0&#45;&gt;854 -->\n<g id=\"edge8\" class=\"edge\">\n<title>0&#45;&gt;854</title>\n<path fill=\"none\" stroke=\"black\" d=\"M344.95,-249.87C353.41,-241.01 363,-230.95 371.94,-221.58\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"374.56,-223.9 378.93,-214.25 369.5,-219.07 374.56,-223.9\"/>\n<text text-anchor=\"middle\" x=\"379.52\" y=\"-235.54\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">False</text>\n</g>\n<!-- 2 -->\n<g id=\"node3\" class=\"node\">\n<title>2</title>\n<polygon fill=\"#e99356\" stroke=\"black\" points=\"147,-125 0,-125 0,-72 147,-72 147,-125\"/>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5637] &lt;= 0.142</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 98.8%</text>\n<text text-anchor=\"middle\" x=\"73.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.874, 0.126]</text>\n</g>\n<!-- 1&#45;&gt;2 -->\n<g id=\"edge2\" class=\"edge\">\n<title>1&#45;&gt;2</title>\n<path fill=\"none\" stroke=\"black\" d=\"M189.89,-160.87C171.42,-151.13 150.22,-139.95 131.01,-129.82\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"132.53,-126.67 122.05,-125.1 129.26,-132.86 132.53,-126.67\"/>\n</g>\n<!-- 835 -->\n<g id=\"node6\" class=\"node\">\n<title>835</title>\n<polygon fill=\"#76bbed\" stroke=\"black\" points=\"312,-125 165,-125 165,-72 312,-72 312,-125\"/>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[5449] &lt;= 0.047</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.8%</text>\n<text text-anchor=\"middle\" x=\"238.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.235, 0.765]</text>\n</g>\n<!-- 1&#45;&gt;835 -->\n<g id=\"edge5\" class=\"edge\">\n<title>1&#45;&gt;835</title>\n<path fill=\"none\" stroke=\"black\" d=\"M238.5,-160.87C238.5,-152.89 238.5,-143.93 238.5,-135.38\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"242,-135.25 238.5,-125.25 235,-135.25 242,-135.25\"/>\n</g>\n<!-- 3 -->\n<g id=\"node4\" class=\"node\">\n<title>3</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"85.5,-36 31.5,-36 31.5,0 85.5,0 85.5,-36\"/>\n<text text-anchor=\"middle\" x=\"58.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2&#45;&gt;3 -->\n<g id=\"edge3\" class=\"edge\">\n<title>2&#45;&gt;3</title>\n<path fill=\"none\" stroke=\"black\" d=\"M68.59,-71.8C67.03,-63.62 65.29,-54.55 63.71,-46.27\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"67.13,-45.5 61.81,-36.33 60.25,-46.81 67.13,-45.5\"/>\n</g>\n<!-- 774 -->\n<g id=\"node5\" class=\"node\">\n<title>774</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"157.5,-36 103.5,-36 103.5,0 157.5,0 157.5,-36\"/>\n<text text-anchor=\"middle\" x=\"130.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 2&#45;&gt;774 -->\n<g id=\"edge4\" class=\"edge\">\n<title>2&#45;&gt;774</title>\n<path fill=\"none\" stroke=\"black\" d=\"M92.16,-71.8C98.5,-63.08 105.56,-53.34 111.89,-44.63\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"114.87,-46.48 117.92,-36.33 109.21,-42.37 114.87,-46.48\"/>\n</g>\n<!-- 836 -->\n<g id=\"node7\" class=\"node\">\n<title>836</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"247.5,-36 193.5,-36 193.5,0 247.5,0 247.5,-36\"/>\n<text text-anchor=\"middle\" x=\"220.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 835&#45;&gt;836 -->\n<g id=\"edge6\" class=\"edge\">\n<title>835&#45;&gt;836</title>\n<path fill=\"none\" stroke=\"black\" d=\"M232.61,-71.8C230.73,-63.62 228.65,-54.55 226.75,-46.27\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"230.12,-45.3 224.47,-36.33 223.3,-46.86 230.12,-45.3\"/>\n</g>\n<!-- 853 -->\n<g id=\"node8\" class=\"node\">\n<title>853</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"319.5,-36 265.5,-36 265.5,0 319.5,0 319.5,-36\"/>\n<text text-anchor=\"middle\" x=\"292.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 835&#45;&gt;853 -->\n<g id=\"edge7\" class=\"edge\">\n<title>835&#45;&gt;853</title>\n<path fill=\"none\" stroke=\"black\" d=\"M256.18,-71.8C262.18,-63.08 268.88,-53.34 274.87,-44.63\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"277.79,-46.55 280.58,-36.33 272.03,-42.59 277.79,-46.55\"/>\n</g>\n<!-- 855 -->\n<g id=\"node10\" class=\"node\">\n<title>855</title>\n<polygon fill=\"#48a5e7\" stroke=\"black\" points=\"477,-125 330,-125 330,-72 477,-72 477,-125\"/>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-109.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">x[525] &lt;= 0.113</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-94.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.3%</text>\n<text text-anchor=\"middle\" x=\"403.5\" y=\"-79.8\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.071, 0.929]</text>\n</g>\n<!-- 854&#45;&gt;855 -->\n<g id=\"edge9\" class=\"edge\">\n<title>854&#45;&gt;855</title>\n<path fill=\"none\" stroke=\"black\" d=\"M403.5,-160.87C403.5,-152.89 403.5,-143.93 403.5,-135.38\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"407,-135.25 403.5,-125.25 400,-135.25 407,-135.25\"/>\n</g>\n<!-- 860 -->\n<g id=\"node13\" class=\"node\">\n<title>860</title>\n<polygon fill=\"#399de5\" stroke=\"black\" points=\"612,-117.5 495,-117.5 495,-79.5 612,-79.5 612,-117.5\"/>\n<text text-anchor=\"middle\" x=\"553.5\" y=\"-102.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">samples = 0.0%</text>\n<text text-anchor=\"middle\" x=\"553.5\" y=\"-87.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">value = [0.0, 1.0]</text>\n</g>\n<!-- 854&#45;&gt;860 -->\n<g id=\"edge12\" class=\"edge\">\n<title>854&#45;&gt;860</title>\n<path fill=\"none\" stroke=\"black\" d=\"M447.69,-160.87C468.62,-148.73 493.42,-134.35 513.75,-122.55\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"515.56,-125.55 522.45,-117.51 512.05,-119.5 515.56,-125.55\"/>\n</g>\n<!-- 856 -->\n<g id=\"node11\" class=\"node\">\n<title>856</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"410.5,-36 356.5,-36 356.5,0 410.5,0 410.5,-36\"/>\n<text text-anchor=\"middle\" x=\"383.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 855&#45;&gt;856 -->\n<g id=\"edge10\" class=\"edge\">\n<title>855&#45;&gt;856</title>\n<path fill=\"none\" stroke=\"black\" d=\"M396.95,-71.8C394.87,-63.62 392.56,-54.55 390.45,-46.27\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"393.78,-45.16 387.92,-36.33 386.99,-46.88 393.78,-45.16\"/>\n</g>\n<!-- 859 -->\n<g id=\"node12\" class=\"node\">\n<title>859</title>\n<polygon fill=\"#c0c0c0\" stroke=\"black\" points=\"482.5,-36 428.5,-36 428.5,0 482.5,0 482.5,-36\"/>\n<text text-anchor=\"middle\" x=\"455.5\" y=\"-14.3\" font-family=\"Helvetica,sans-Serif\" font-size=\"14.00\">(...)</text>\n</g>\n<!-- 855&#45;&gt;859 -->\n<g id=\"edge11\" class=\"edge\">\n<title>855&#45;&gt;859</title>\n<path fill=\"none\" stroke=\"black\" d=\"M420.53,-71.8C426.24,-63.17 432.62,-53.54 438.34,-44.9\"/>\n<polygon fill=\"black\" stroke=\"black\" points=\"441.42,-46.6 444.02,-36.33 435.58,-42.73 441.42,-46.6\"/>\n</g>\n</g>\n</svg>\n",
"text/plain": [
"<graphviz.sources.Source at 0x7d90b47afee0>"
]
},
"metadata": {}
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment