Created
November 29, 2022 01:50
-
-
Save BrunoBiz/dd45a1b7608a1cda38e0c08d10dedf0d to your computer and use it in GitHub Desktop.
MachineLearning
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from sklearn.model_selection import KFold | |
from sklearn.model_selection import cross_val_score | |
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report | |
from sklearn.model_selection import train_test_split | |
# Modelos | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.naive_bayes import ComplementNB | |
from sklearn.naive_bayes import BernoulliNB | |
from sklearn.naive_bayes import CategoricalNB | |
# FUNÇÃO GENÉRICA PARA OS MODELOS DE NAYVE BAYES | |
def f_modelo(funcao, funcao_name): | |
# ============== | |
# NAYVE BAYES | |
# ============== | |
print("============================ " + funcao_name) | |
funcao.fit(x_treino, y_treino) | |
prev_funcao = funcao.predict(x_teste) | |
# print(confusion_matrix(y_teste, prev_funcao)) | |
# print(classification_report(y_teste, prev_funcao)) | |
print("Acurácia...: %.2f%%" % (accuracy_score(y_teste, prev_funcao) * 100.0)) | |
# CROSS-VALIDATION | |
funcao_cv = funcao | |
resultado_funcao_cv = cross_val_score(funcao_cv, previsores, alvo, cv=kfold) | |
print("Acurácia CV: %.2f%%" % (resultado_funcao_cv.mean() * 100.0)) | |
print("\n") | |
# kfold = KFold(n_splits=30, shuffle=True, random_state=10) | |
kfold = KFold(n_splits=30, shuffle=True, random_state=0) | |
# CONFIGURAÇÕES DE IMPRESSÃO DE DADOS (APRESENTA TODAS AS COLUNAS DO DATASET NO CONSOLE, POR PADRÃO ERA APENAS 5) | |
desired_width = 320 | |
pd.set_option('display.width', desired_width) | |
np.set_printoptions(linewidth=desired_width) | |
pd.set_option('display.max_columns', 15) | |
# CARREGA O CSV DIRETAMENTE DA PASTA DO PROJETO | |
ds_stroke = pd.read_csv('STROKE.csv', sep=',', encoding='iso-8859-1') | |
""" | |
print(ds_stroke.head(20)) | |
print(ds_stroke.dtypes) | |
print(ds_stroke.shape) | |
""" | |
# VERIFICA VALORES NULOS | |
# TAG BMI -> MUDA NULOS PARA 0 | |
ds_stroke['bmi'].fillna(0, inplace=True) | |
# PADRONIZAÇÃO DOS DADOS | |
""" | |
Dados que serão padronizados | |
1 gender 5110 non-null object | |
5 ever_married 5110 non-null object | |
6 work_type 5110 non-null object | |
7 Residence_type 5110 non-null object | |
10 smoking_status 5110 non-null object | |
""" | |
# GENDER | |
ds_stroke['gender'].replace({'Female': 0, 'Male': 1, 'Other': 2}, inplace=True) | |
# EVER_MARRIED | |
ds_stroke['ever_married'].replace({'Yes': 1, 'No': 0}, inplace=True) | |
# WORK_TYPE | |
ds_stroke['work_type'].replace({'Private': 0, 'Self-employed': 1, 'children': 2, 'Govt_job': 3, 'Never_worked': 4}, | |
inplace=True) | |
# RESIDENCE_TYPE | |
ds_stroke['Residence_type'].replace({'Urban': 0, 'Rural': 1}, inplace=True) | |
# SMOKING_STATUS | |
ds_stroke['smoking_status'].replace({'never smoked': 0, 'Unknown': 1, 'formerly smoked': 2, 'smokes': 3}, inplace=True) | |
# PREVISORES / ALVO | |
previsores = ds_stroke.iloc[:, 0:11].values | |
alvo = ds_stroke.iloc[:, 11].values | |
# BASE DE TESTES / TREINO | |
x_treino, x_teste, y_treino, y_teste = train_test_split(previsores, alvo, test_size=0.20, random_state=0) | |
# CHAMA OS MÉTODOS DE APRENDIZAGEM | |
f_modelo(GaussianNB(), "Gauss") | |
f_modelo(MultinomialNB(), "Multinomial") | |
f_modelo(ComplementNB(), "Complement") | |
f_modelo(BernoulliNB(), "Bernoulli") | |
# f_modelo(CategoricalNB(), "Categorical") deu erro :) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment