Skip to content

Instantly share code, notes, and snippets.

@BrunoBiz
Created November 29, 2022 01:50
Show Gist options
  • Save BrunoBiz/dd45a1b7608a1cda38e0c08d10dedf0d to your computer and use it in GitHub Desktop.
Save BrunoBiz/dd45a1b7608a1cda38e0c08d10dedf0d to your computer and use it in GitHub Desktop.
MachineLearning
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split
# Modelos
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import ComplementNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import CategoricalNB
# FUNÇÃO GENÉRICA PARA OS MODELOS DE NAYVE BAYES
def f_modelo(funcao, funcao_name):
# ==============
# NAYVE BAYES
# ==============
print("============================ " + funcao_name)
funcao.fit(x_treino, y_treino)
prev_funcao = funcao.predict(x_teste)
# print(confusion_matrix(y_teste, prev_funcao))
# print(classification_report(y_teste, prev_funcao))
print("Acurácia...: %.2f%%" % (accuracy_score(y_teste, prev_funcao) * 100.0))
# CROSS-VALIDATION
funcao_cv = funcao
resultado_funcao_cv = cross_val_score(funcao_cv, previsores, alvo, cv=kfold)
print("Acurácia CV: %.2f%%" % (resultado_funcao_cv.mean() * 100.0))
print("\n")
# kfold = KFold(n_splits=30, shuffle=True, random_state=10)
kfold = KFold(n_splits=30, shuffle=True, random_state=0)
# CONFIGURAÇÕES DE IMPRESSÃO DE DADOS (APRESENTA TODAS AS COLUNAS DO DATASET NO CONSOLE, POR PADRÃO ERA APENAS 5)
desired_width = 320
pd.set_option('display.width', desired_width)
np.set_printoptions(linewidth=desired_width)
pd.set_option('display.max_columns', 15)
# CARREGA O CSV DIRETAMENTE DA PASTA DO PROJETO
ds_stroke = pd.read_csv('STROKE.csv', sep=',', encoding='iso-8859-1')
"""
print(ds_stroke.head(20))
print(ds_stroke.dtypes)
print(ds_stroke.shape)
"""
# VERIFICA VALORES NULOS
# TAG BMI -> MUDA NULOS PARA 0
ds_stroke['bmi'].fillna(0, inplace=True)
# PADRONIZAÇÃO DOS DADOS
"""
Dados que serão padronizados
1 gender 5110 non-null object
5 ever_married 5110 non-null object
6 work_type 5110 non-null object
7 Residence_type 5110 non-null object
10 smoking_status 5110 non-null object
"""
# GENDER
ds_stroke['gender'].replace({'Female': 0, 'Male': 1, 'Other': 2}, inplace=True)
# EVER_MARRIED
ds_stroke['ever_married'].replace({'Yes': 1, 'No': 0}, inplace=True)
# WORK_TYPE
ds_stroke['work_type'].replace({'Private': 0, 'Self-employed': 1, 'children': 2, 'Govt_job': 3, 'Never_worked': 4},
inplace=True)
# RESIDENCE_TYPE
ds_stroke['Residence_type'].replace({'Urban': 0, 'Rural': 1}, inplace=True)
# SMOKING_STATUS
ds_stroke['smoking_status'].replace({'never smoked': 0, 'Unknown': 1, 'formerly smoked': 2, 'smokes': 3}, inplace=True)
# PREVISORES / ALVO
previsores = ds_stroke.iloc[:, 0:11].values
alvo = ds_stroke.iloc[:, 11].values
# BASE DE TESTES / TREINO
x_treino, x_teste, y_treino, y_teste = train_test_split(previsores, alvo, test_size=0.20, random_state=0)
# CHAMA OS MÉTODOS DE APRENDIZAGEM
f_modelo(GaussianNB(), "Gauss")
f_modelo(MultinomialNB(), "Multinomial")
f_modelo(ComplementNB(), "Complement")
f_modelo(BernoulliNB(), "Bernoulli")
# f_modelo(CategoricalNB(), "Categorical") deu erro :)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment