Created
August 21, 2019 20:08
-
-
Save rjurney/d4a9f6e50105834a640c8a78a38378e3 to your computer and use it in GitHub Desktop.
Working code using MirroredStrategy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Model imports | |
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping | |
from tensorflow.keras.layers import ( Input, Embedding, GlobalMaxPooling1D, Conv1D, Dense, Activation, | |
Dropout, Lambda, BatchNormalization, concatenate ) | |
from tensorflow.keras.models import Model, Sequential | |
from tensorflow.keras.optimizers import Adam, SGD | |
from tensorflow.keras.preprocessing.text import Tokenizer | |
# Fit imports | |
from tensorflow.keras.losses import hinge, mae, binary_crossentropy, kld, Huber, squared_hinge | |
# Hyperparameter/method search space | |
import itertools | |
# For 4 GPUs | |
DIST_BATCH_SIZE = int(BATCH_SIZE/4) | |
print('Starting experiment loop...') | |
learning_rates = [0.01, 0.001, 0.0005, 0.0001]# , 0.00005] | |
losses = [binary_crossentropy, hinge, squared_hinge, mae, kld, Huber, hamming_loss] | |
activations = ['selu'] | |
optimizers = ['adam'] | |
dropout_ratios = [0.2] | |
filter_lengths = [128] | |
class_weight_set = [None, train_class_weights] | |
sample_weight_set = [None, train_sample_weights] | |
test_sample_weight_set = [None] #, test_sample_weights] | |
args = itertools.product( | |
learning_rates, | |
losses, | |
activations, | |
optimizers, | |
dropout_ratios, | |
filter_lengths, | |
class_weight_set, | |
sample_weight_set, | |
test_sample_weight_set | |
) | |
# tqdm_notebook | |
for learning_rate, loss_function, activation, optimizer, dropout_ratio, filter_length, class_weights, \ | |
sample_weights, test_sample_weights in args: | |
# | |
# Build ze model... | |
# | |
def build_model( | |
token_count=20000, | |
max_words=100, | |
embedding_dim=50, | |
label_count=y_train.shape[1], | |
dropout_ratio=0.2, | |
filter_length=filter_length, | |
loss_function='binary_crossentropy', | |
learning_rate=0.001, | |
optimizer=Adam, | |
activation='relu' | |
): | |
"""Build the model using this experiment's parameters""" | |
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:2", "/gpu:3"]) | |
with mirrored_strategy.scope(): | |
print ('Number of devices: {}'.format(mirrored_strategy.num_replicas_in_sync)) | |
# # Distribute the datasets | |
# X_train_dataset = mirrored_strategy.experimental_make_numpy_dataset(X_train).batch(DIST_BATCH_SIZE) | |
# print(type(X_train_dataset)) | |
# X_test_dataset = mirrored_strategy.experimental_make_numpy_dataset(X_test).batch(DIST_BATCH_SIZE) | |
# print(type(X_test_dataset)) | |
# X_train_dist = mirrored_strategy.experimental_distribute_dataset(X_train_dataset) | |
# print(type(X_train_dist)) | |
# X_test_dist = mirrored_strategy.experimental_distribute_dataset(X_test_dataset) | |
# print(type(X_test_dist)) | |
hashed_input = Input(shape=(X_train.shape[1],), dtype='int64') | |
emb = Embedding(token_count, embedding_dim, weights=[embedding_matrix])(hashed_input) | |
# Specify each convolution layer and their kernel siz i.e. n-grams | |
conv1_1 = Conv1D(filters=filter_length, kernel_size=3)(emb) | |
btch1_1 = BatchNormalization()(conv1_1) | |
drp1_1 = Dropout(dropout_ratio)(btch1_1) | |
actv1_1 = Activation(activation)(drp1_1) | |
glmp1_1 = GlobalMaxPooling1D()(actv1_1) | |
conv1_2 = Conv1D(filters=filter_length, kernel_size=4)(emb) | |
btch1_2 = BatchNormalization()(conv1_2) | |
drp1_2 = Dropout(dropout_ratio)(btch1_2) | |
actv1_2 = Activation(activation)(drp1_2) | |
glmp1_2 = GlobalMaxPooling1D()(actv1_2) | |
conv1_3 = Conv1D(filters=filter_length, kernel_size=5)(emb) | |
btch1_3 = BatchNormalization()(conv1_3) | |
drp1_3 = Dropout(dropout_ratio)(btch1_3) | |
actv1_3 = Activation(activation)(drp1_3) | |
glmp1_3 = GlobalMaxPooling1D()(actv1_3) | |
conv1_4 = Conv1D(filters=filter_length, kernel_size=6)(emb) | |
btch1_4 = BatchNormalization()(conv1_4) | |
drp1_4 = Dropout(dropout_ratio)(btch1_4) | |
actv1_4 = Activation(activation)(drp1_4) | |
glmp1_4 = GlobalMaxPooling1D()(actv1_4) | |
# Gather all convolution layers | |
cnct = concatenate([glmp1_1, glmp1_2, glmp1_3, glmp1_4], axis=1) | |
drp1 = Dropout(dropout_ratio)(cnct) | |
dns1 = Dense(32, activation=activation)(drp1) | |
btch1 = BatchNormalization()(dns1) | |
drp2 = Dropout(dropout_ratio)(btch1) | |
out = Dense(y_train.shape[1], activation='sigmoid')(drp2) | |
text_model = Model( | |
inputs=hashed_input, | |
outputs=out | |
) | |
if activation == 'adam': | |
activation = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) | |
if activation == 'sgd': | |
activation = SGD(lr=learning_rate) | |
text_model.compile( | |
optimizer=optimizer, | |
loss=loss_function, | |
metrics=[ | |
'categorical_accuracy', | |
tf.keras.metrics.Precision(), | |
tf.keras.metrics.Recall(), | |
tf.keras.metrics.BinaryAccuracy(), | |
tf.keras.metrics.Hinge(), | |
tf.keras.metrics.AUC(), | |
tf.keras.metrics.Accuracy(), | |
tf.keras.metrics.MeanAbsoluteError(), | |
tf.keras.metrics.MeanAbsolutePercentageError(), | |
tf.keras.metrics.TruePositives(), | |
tf.keras.metrics.FalsePositives(), | |
tf.keras.metrics.TrueNegatives(), | |
tf.keras.metrics.FalseNegatives() | |
] | |
) | |
text_model.summary() | |
return text_model#, X_train_dist, X_test_dist | |
# | |
# Train ze model... | |
# | |
def train_model( | |
model=None, | |
X_train=None, | |
X_test=None, | |
dropout_ratio=0.1, | |
learning_rate=0.001, | |
optimizer='adam', | |
activation='relu', | |
epochs=5, | |
class_weights=None, | |
sample_weights=None, | |
test_sample_weights=None, | |
): | |
"""Train the model using the current parameters and evaluate performance""" | |
model_name = str(loss_function) + ' ' + str(learning_rate) + ' ' + str(optimizer) + ' ' + \ | |
str(activation) + ' ' + str(epochs) + ' ' + \ | |
('class_weights' if isinstance(class_weights, dict) else 'no_class_weights') + ' ' + \ | |
('sample_weights' if isinstance(sample_weights, np.ndarray) else 'no_sample_weights') | |
print(model_name) | |
callbacks = [ | |
# ReduceLROnPlateau( | |
# patience=1, | |
# verbose=1, | |
# min_delta=0.001, | |
# min_lr=0.0005, | |
# ), | |
# EarlyStopping( | |
# patience=2, | |
# min_delta=0.001, | |
# verbose=1, | |
# restore_best_weights=True | |
# ), | |
#ModelCheckpoint(filepath='model-conv1d.h5', save_best_only=True) | |
] | |
history = text_model.fit( | |
X_train, | |
y_train, | |
class_weight=class_weights, | |
sample_weight=sample_weights, | |
epochs=epochs, | |
batch_size=DIST_BATCH_SIZE, | |
validation_data=(X_test, y_test), | |
callbacks=callbacks | |
) | |
# Evaluate to our log and return a description key and a list of metrics | |
accr = text_model.evaluate(X_test, y_test)#, sample_weight=test_sample_weights) | |
f1_score = 2.0 * (accr[1] * accr[2]) / \ | |
(accr[1] + accr[2]) | |
return_val = model_name, [i for i in zip([j.item() for j in accr + [f1_score]], text_model.metrics_names + ['val_f1_score'])] | |
return return_val | |
# | |
# main() | |
# | |
text_model = build_model( | |
token_count=TOKEN_COUNT, | |
max_words=100, | |
embedding_dim=50, | |
label_count=y_train.shape[1], | |
filter_length=128, | |
loss_function=loss_function, | |
learning_rate=learning_rate, | |
optimizer=optimizer, | |
activation=activation, | |
dropout_ratio=0.2 | |
) | |
description_key, accuracies = train_model( | |
model=text_model, | |
X_train=X_train, | |
X_test=X_test, | |
dropout_ratio=dropout_ratio, | |
learning_rate=learning_rate, | |
optimizer=optimizer, | |
activation=activation, | |
epochs=1, | |
class_weights=class_weights, | |
sample_weights=sample_weights, | |
test_sample_weights=test_sample_weights, | |
) | |
log_record = (description_key, accuracies) | |
performance_log.append(log_record) | |
with open('data/performance_log.jsonl', 'w') as f: | |
for record in performance_log: | |
f.write(json.dumps(record) + '\n') | |
print(log_record) | |
print() | |
print('Completed experiment loop!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment