Created
August 21, 2020 12:20
-
-
Save agibsonccc/bb9e2cdc4ab1058c6ffc600de8c26a33 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow as tf | |
import numpy as np | |
import keras | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder,OneHotEncoder | |
from keras import backend as K | |
from keras.utils import to_categorical | |
# https://datascience.stackexchange.com/questions/45165/how-to-get-accuracy-f1-precision-and-recall-for-a-keras-model | |
def recall_m(y_true, y_pred): | |
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) | |
possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) | |
recall = true_positives / (possible_positives + K.epsilon()) | |
return recall | |
def precision_m(y_true, y_pred): | |
true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) | |
predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) | |
precision = true_positives / (predicted_positives + K.epsilon()) | |
return precision | |
def f1_m(y_true, y_pred): | |
precision = precision_m(y_true, y_pred) | |
recall = recall_m(y_true, y_pred) | |
return 2*((precision*recall)/(precision+recall+K.epsilon())) | |
def create_model(): | |
model = tf.keras.models.Sequential([ | |
keras.layers.Dense(600, activation='relu', input_shape = (512,)), | |
keras.layers.Dropout(0.2), | |
keras.layers.Dense(300, activation='relu'), | |
keras.layers.Dropout(0.2), | |
keras.layers.Dense(2, activation='softmax') | |
]) | |
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=[f1_m,precision_m, recall_m]) | |
return model | |
def get_data_from_csv(): | |
my_data = np.genfromtxt('spam.csv', delimiter=',') | |
X = my_data[:, :512] | |
X = X.astype('float32') | |
Y = my_data[:, -1].astype('int') | |
Y = to_categorical(Y) | |
print(type(X), X.dtype, type(Y), Y.dtype) | |
print(X.shape, Y.shape) | |
return train_test_split(X, Y, test_size=0.25) | |
model = create_model() | |
X_train, X_test, y_train, y_test = get_data_from_csv() | |
model.fit(X_train, y_train, epochs=20, batch_size=100) | |
np.save('X_train.npy',X_train,allow_pickle=False) | |
np.save('X_test.npy',X_test,allow_pickle=False) | |
np.save('y_train.npy',y_train,allow_pickle=False) | |
np.save('y_test.npy',y_test,allow_pickle=False) | |
_,f1_score, precision, recall = model.evaluate(X_test, y_test) | |
print('Accuracy: %.2f' % (f1_score * 100)) | |
model.save('test-spam.hdf5') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment