Skip to content

Instantly share code, notes, and snippets.

View aateg's full-sized avatar
😛

aateg aateg

😛
View GitHub Profile
import nltk
nltk.download('machado')
from nltk.probability import FreqDist
from nltk.tokenize import word_tokenize
nltk.download('punkt')
# corpus dom casmurro
corpus_dom_casmurro = nltk.corpus.machado.raw('romance/marm08.txt')
# pre processamento
import nltk
nltk.download('stopwords')
# retorna lista de stopwords em portugues
stopwords = nltk.corpus.stopwords.words('portuguese')
import nltk
# id do corpus
# no nosso caso estamos usando id machado
nltk_id = 'machado'
# eh necessario baixar o corpus
nltk.download(nltk_id)
# agora o corpus esta acessivel
from sklearn.metrics import confusion_matrix
tn, fp, fn, tp = confusion_matrix(y_test, y_predito).ravel()
precision = tp/(tp + fp)
recall = tp/(tp + fn)
fpr = fp/(fp + tn)
from sklearn.metrics import roc_auc_score
"""
Função que calcula a área sob a curva ROC.
Recebe os valores preditos e os valores
do teste para calcular a area. Seu retorno
é um valor no intervalo [0, 1].
"""
auc_score = roc_auc_score(y_true, y_scores)
from sklearn import metrics
"""
a função do sklearn retorna uma tupla
contendo numpy arrays com true positive rate (TPR)
false positive rate (FPR) e threshold
"""
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_predito)
@aateg
aateg / nn_train.py
Last active October 27, 2019 06:07
Treinamento da Rede Neural
def nnTrain(epsilon, alpha, max_iter):
input_layer_size = x_train.shape[1]
hidden_layer_size = 800
num_labels = 10
theta_1, theta_2 = randomInit(input_layer_size, hidden_layer_size, num_labels)
for i in range(max_iter):
J_theta, Theta1_grad, Theta2_grad = nnRegCostFunction(
theta_1, theta_2, x_train, y_train,
@aateg
aateg / accuracy.py
Last active October 27, 2019 06:06
Cálculo da Acurácia
def classifications(theta_1, theta_2, X,):
a1 = np.append(np.ones(shape=(X.shape[0], 1)), X, axis=1)
z2 = a1 @ theta_1.transpose()
a2 = sigmoid(z2)
a2 = np.append(np.ones(shape=(a2.shape[0], 1)), a2, axis=1)
z3 = a2 @ theta_2.transpose()
a3 = sigmoid(z3)
@aateg
aateg / cost_forward_back.py
Last active October 27, 2019 06:08
Função de Custo Regularizada, Algoritmos de Feedforward e Backpropagation
def nnRegCostFunction(theta_1, theta_2, X, y, input_layer_size, hidden_layer_size, num_labels):
# Variáveis úteis (m=número de imagens, n=número de pixels por imagem)
[m, n] = X.shape
# Algoritmo Feedforward
# Adiciona a bias unit (uma coluna de 1) ao dataset
a1 = np.append(np.ones(shape=(X.shape[0], 1)), X, axis=1)
z2 = a1 @ theta_1.transpose()
a2 = sigmoid(z2)
@aateg
aateg / init_theta_nn.py
Last active October 27, 2019 06:08
Inicialização aleatória dos parâmetros theta em redes neurais
def randomInit(input_layer_size, hidden_layer_size, num_labels):
# Parâmetros da primeira camada (incluindo bias)
# As matrizes têm formato (num_entradas, num_saidas)
size1 = (hidden_layer_size, input_layer_size+1)
theta1_ini = np.random.normal(0, .1, size=size1)
# Parâmetros da primeira camada (incluindo bias)
size2 = (num_labels, hidden_layer_size+1)
theta2_ini = np.random.normal(0, .1, size=size2)