Created
May 15, 2019 19:52
-
-
Save VictoriaMaia/0e275243a19f98469b4654ed70a8b7ef to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 36, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"import matplotlib.pyplot as plt\n", | |
"%matplotlib inline" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Vamos utilizar o KNN e o train_test_split\n", | |
"# Importe os módulos necessários\n" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"colunas = ['buying',\n", | |
"'maint',\n", | |
"'doors',\n", | |
"'persons',\n", | |
"'lug_boot',\n", | |
"'safety',\n", | |
"'y']" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Leia o dataset\n", | |
"data = _____('data_car.csv', names=colunas)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Veja os primeiros 5 dados\n", | |
"data.____" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Veja se tem dado nulo\n", | |
"data.___.___" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Transforme os dados categóricos em númericos\n", | |
"y_mapping = __________\n", | |
"\n", | |
"data[___] = data[___].____" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 21, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Crie o conjunto de rótulos e o conjunto de features\n", | |
"y = ______\n", | |
"\n", | |
"X = ______" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Construa o conjunto de treino e teste usando:\n", | |
"# - 0,2 para o tamanho do conjunto de testes \n", | |
"# - 42 para o estado aleatório\n", | |
"# - Estratifique a divisão com o conjunto de rótulos \n", | |
"\n", | |
"X_train, X_test, y_train, y_test = ______(___, ___, test_size=___, random_state=___, stratify=___)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"#### Gerando um gráfico com a acuracia da resposta do modelo com o conjunto de treino e teste e vendo qual a quantidade de vizinhos da um melhor resultado e quais valores da overfiting" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Verifiquem a resposta do gráfico com 100 e depois\n", | |
"# façam um gráfico com 30\n", | |
"\n", | |
"neighbors = np.arange(1, 100)\n", | |
"train_accuracy = np.empty(len(neighbors))\n", | |
"test_accuracy = np.empty(len(neighbors))\n", | |
"\n", | |
"# Coloque o numero de vizinhos igual a k do modelo do KNN\n", | |
"for i, k in enumerate(neighbors):\n", | |
" knn = _____\n", | |
" knn.fit(___, ___)\n", | |
" train_accuracy[i] = knn.score(___, ___)\n", | |
" test_accuracy[i] = knn.score(___, ___)\n", | |
"\n", | |
"# gerando o grafico\n", | |
"plt.title('k-NN: Número de vizinhos')\n", | |
"plt.plot(neighbors, test_accuracy, label = 'Accuracia teste')\n", | |
"plt.plot(neighbors, train_accuracy, label = 'Accuracia treino')\n", | |
"plt.legend()\n", | |
"plt.xlabel('Número de vizinhos')\n", | |
"plt.ylabel('Accuracia')\n", | |
"plt.show()" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment