Skip to content

Instantly share code, notes, and snippets.

@rjurney
Created August 21, 2019 20:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rjurney/d4a9f6e50105834a640c8a78a38378e3 to your computer and use it in GitHub Desktop.
Save rjurney/d4a9f6e50105834a640c8a78a38378e3 to your computer and use it in GitHub Desktop.
Working code using MirroredStrategy
## Model imports
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint, EarlyStopping
from tensorflow.keras.layers import ( Input, Embedding, GlobalMaxPooling1D, Conv1D, Dense, Activation,
Dropout, Lambda, BatchNormalization, concatenate )
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing.text import Tokenizer
# Fit imports
from tensorflow.keras.losses import hinge, mae, binary_crossentropy, kld, Huber, squared_hinge
# Hyperparameter/method search space
import itertools
# For 4 GPUs
DIST_BATCH_SIZE = int(BATCH_SIZE/4)
print('Starting experiment loop...')
learning_rates = [0.01, 0.001, 0.0005, 0.0001]# , 0.00005]
losses = [binary_crossentropy, hinge, squared_hinge, mae, kld, Huber, hamming_loss]
activations = ['selu']
optimizers = ['adam']
dropout_ratios = [0.2]
filter_lengths = [128]
class_weight_set = [None, train_class_weights]
sample_weight_set = [None, train_sample_weights]
test_sample_weight_set = [None] #, test_sample_weights]
args = itertools.product(
learning_rates,
losses,
activations,
optimizers,
dropout_ratios,
filter_lengths,
class_weight_set,
sample_weight_set,
test_sample_weight_set
)
# tqdm_notebook
for learning_rate, loss_function, activation, optimizer, dropout_ratio, filter_length, class_weights, \
sample_weights, test_sample_weights in args:
#
# Build ze model...
#
def build_model(
token_count=20000,
max_words=100,
embedding_dim=50,
label_count=y_train.shape[1],
dropout_ratio=0.2,
filter_length=filter_length,
loss_function='binary_crossentropy',
learning_rate=0.001,
optimizer=Adam,
activation='relu'
):
"""Build the model using this experiment's parameters"""
mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:2", "/gpu:3"])
with mirrored_strategy.scope():
print ('Number of devices: {}'.format(mirrored_strategy.num_replicas_in_sync))
# # Distribute the datasets
# X_train_dataset = mirrored_strategy.experimental_make_numpy_dataset(X_train).batch(DIST_BATCH_SIZE)
# print(type(X_train_dataset))
# X_test_dataset = mirrored_strategy.experimental_make_numpy_dataset(X_test).batch(DIST_BATCH_SIZE)
# print(type(X_test_dataset))
# X_train_dist = mirrored_strategy.experimental_distribute_dataset(X_train_dataset)
# print(type(X_train_dist))
# X_test_dist = mirrored_strategy.experimental_distribute_dataset(X_test_dataset)
# print(type(X_test_dist))
hashed_input = Input(shape=(X_train.shape[1],), dtype='int64')
emb = Embedding(token_count, embedding_dim, weights=[embedding_matrix])(hashed_input)
# Specify each convolution layer and their kernel siz i.e. n-grams
conv1_1 = Conv1D(filters=filter_length, kernel_size=3)(emb)
btch1_1 = BatchNormalization()(conv1_1)
drp1_1 = Dropout(dropout_ratio)(btch1_1)
actv1_1 = Activation(activation)(drp1_1)
glmp1_1 = GlobalMaxPooling1D()(actv1_1)
conv1_2 = Conv1D(filters=filter_length, kernel_size=4)(emb)
btch1_2 = BatchNormalization()(conv1_2)
drp1_2 = Dropout(dropout_ratio)(btch1_2)
actv1_2 = Activation(activation)(drp1_2)
glmp1_2 = GlobalMaxPooling1D()(actv1_2)
conv1_3 = Conv1D(filters=filter_length, kernel_size=5)(emb)
btch1_3 = BatchNormalization()(conv1_3)
drp1_3 = Dropout(dropout_ratio)(btch1_3)
actv1_3 = Activation(activation)(drp1_3)
glmp1_3 = GlobalMaxPooling1D()(actv1_3)
conv1_4 = Conv1D(filters=filter_length, kernel_size=6)(emb)
btch1_4 = BatchNormalization()(conv1_4)
drp1_4 = Dropout(dropout_ratio)(btch1_4)
actv1_4 = Activation(activation)(drp1_4)
glmp1_4 = GlobalMaxPooling1D()(actv1_4)
# Gather all convolution layers
cnct = concatenate([glmp1_1, glmp1_2, glmp1_3, glmp1_4], axis=1)
drp1 = Dropout(dropout_ratio)(cnct)
dns1 = Dense(32, activation=activation)(drp1)
btch1 = BatchNormalization()(dns1)
drp2 = Dropout(dropout_ratio)(btch1)
out = Dense(y_train.shape[1], activation='sigmoid')(drp2)
text_model = Model(
inputs=hashed_input,
outputs=out
)
if activation == 'adam':
activation = Adam(lr=learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
if activation == 'sgd':
activation = SGD(lr=learning_rate)
text_model.compile(
optimizer=optimizer,
loss=loss_function,
metrics=[
'categorical_accuracy',
tf.keras.metrics.Precision(),
tf.keras.metrics.Recall(),
tf.keras.metrics.BinaryAccuracy(),
tf.keras.metrics.Hinge(),
tf.keras.metrics.AUC(),
tf.keras.metrics.Accuracy(),
tf.keras.metrics.MeanAbsoluteError(),
tf.keras.metrics.MeanAbsolutePercentageError(),
tf.keras.metrics.TruePositives(),
tf.keras.metrics.FalsePositives(),
tf.keras.metrics.TrueNegatives(),
tf.keras.metrics.FalseNegatives()
]
)
text_model.summary()
return text_model#, X_train_dist, X_test_dist
#
# Train ze model...
#
def train_model(
model=None,
X_train=None,
X_test=None,
dropout_ratio=0.1,
learning_rate=0.001,
optimizer='adam',
activation='relu',
epochs=5,
class_weights=None,
sample_weights=None,
test_sample_weights=None,
):
"""Train the model using the current parameters and evaluate performance"""
model_name = str(loss_function) + ' ' + str(learning_rate) + ' ' + str(optimizer) + ' ' + \
str(activation) + ' ' + str(epochs) + ' ' + \
('class_weights' if isinstance(class_weights, dict) else 'no_class_weights') + ' ' + \
('sample_weights' if isinstance(sample_weights, np.ndarray) else 'no_sample_weights')
print(model_name)
callbacks = [
# ReduceLROnPlateau(
# patience=1,
# verbose=1,
# min_delta=0.001,
# min_lr=0.0005,
# ),
# EarlyStopping(
# patience=2,
# min_delta=0.001,
# verbose=1,
# restore_best_weights=True
# ),
#ModelCheckpoint(filepath='model-conv1d.h5', save_best_only=True)
]
history = text_model.fit(
X_train,
y_train,
class_weight=class_weights,
sample_weight=sample_weights,
epochs=epochs,
batch_size=DIST_BATCH_SIZE,
validation_data=(X_test, y_test),
callbacks=callbacks
)
# Evaluate to our log and return a description key and a list of metrics
accr = text_model.evaluate(X_test, y_test)#, sample_weight=test_sample_weights)
f1_score = 2.0 * (accr[1] * accr[2]) / \
(accr[1] + accr[2])
return_val = model_name, [i for i in zip([j.item() for j in accr + [f1_score]], text_model.metrics_names + ['val_f1_score'])]
return return_val
#
# main()
#
text_model = build_model(
token_count=TOKEN_COUNT,
max_words=100,
embedding_dim=50,
label_count=y_train.shape[1],
filter_length=128,
loss_function=loss_function,
learning_rate=learning_rate,
optimizer=optimizer,
activation=activation,
dropout_ratio=0.2
)
description_key, accuracies = train_model(
model=text_model,
X_train=X_train,
X_test=X_test,
dropout_ratio=dropout_ratio,
learning_rate=learning_rate,
optimizer=optimizer,
activation=activation,
epochs=1,
class_weights=class_weights,
sample_weights=sample_weights,
test_sample_weights=test_sample_weights,
)
log_record = (description_key, accuracies)
performance_log.append(log_record)
with open('data/performance_log.jsonl', 'w') as f:
for record in performance_log:
f.write(json.dumps(record) + '\n')
print(log_record)
print()
print('Completed experiment loop!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment