Skip to content

Instantly share code, notes, and snippets.

@tlkh
Created October 14, 2019 12:51
Show Gist options
  • Save tlkh/d252abcb3a5b59a7b8c47660997fd390 to your computer and use it in GitHub Desktop.
Save tlkh/d252abcb3a5b59a7b8c47660997fd390 to your computer and use it in GitHub Desktop.
tf_transformers
import os
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
import tensorflow_datasets
from transformers import BertTokenizer, TFBertForSequenceClassification, glue_convert_examples_to_features#, BertForSequenceClassification
# script parameters
BATCH_SIZE = 8
EVAL_BATCH_SIZE = BATCH_SIZE
USE_XLA = False
USE_AMP = False
tf.config.optimizer.set_jit(USE_XLA)
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": USE_AMP})
# Load tokenizer and model from pretrained model/vocabulary
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
model = TFBertForSequenceClassification.from_pretrained('bert-base-cased')
# Load dataset via TensorFlow Datasets
data, info = tensorflow_datasets.load('glue/mrpc', with_info=True)
train_examples = info.splits['train'].num_examples
valid_examples = info.splits['validation'].num_examples
# Prepare dataset for GLUE as a tf.data.Dataset instance
train_dataset = glue_convert_examples_to_features(data['train'], tokenizer, 512, 'mrpc')
valid_dataset = glue_convert_examples_to_features(data['validation'], tokenizer, 512, 'mrpc')
train_dataset = train_dataset.shuffle(128).batch(BATCH_SIZE).repeat(-1)
valid_dataset = valid_dataset.batch(EVAL_BATCH_SIZE)
# Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule
opt = tf.keras.optimizers.Adam(learning_rate=3e-5, epsilon=1e-08)
if USE_AMP:
# loss scaling is currently required when using mixed precision
opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt, 'dynamic')
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
model.compile(optimizer=opt, loss=loss, metrics=[metric])
# Train and evaluate using tf.keras.Model.fit()
train_steps = train_examples//BATCH_SIZE
valid_steps = valid_examples//EVAL_BATCH_SIZE
history = model.fit(train_dataset, epochs=2, steps_per_epoch=train_steps,
validation_data=valid_dataset, validation_steps=valid_steps)
@iliaschalkidis
Copy link

I just made a working example by loading dataset as tf.data.Dataset

import numpy as np
import tensorflow as tf
from transformers import TFBertForSequenceClassification

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)

# script parameters
BATCH_SIZE = 8
EVAL_BATCH_SIZE = BATCH_SIZE
USE_XLA = False
USE_AMP = False

tf.config.optimizer.set_jit(USE_XLA)
tf.config.optimizer.set_experimental_options({"auto_mixed_precision": USE_AMP})

# Load model from pretrained model/vocabulary
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=1000)


def gen():
    for x1, x2, x3, y in zip(np.zeros((80, 512), dtype=np.int32),
                          np.zeros((80, 512), dtype=np.int32),
                          np.zeros((80, 512), dtype=np.int32),
                          np.zeros((80, 1000), dtype=np.int32)):
        yield ({'input_ids': x1,
                'attention_mask': x2,
                'token_type_ids': x3}, y)


# Prepare dataset as tf.Dataset from generator
dataset = tf.data.Dataset.from_generator(gen,
            ({'input_ids': tf.int32,
              'attention_mask': tf.int32,
              'token_type_ids': tf.int32},
             tf.int32),
            ({'input_ids': tf.TensorShape([None]),
              'attention_mask': tf.TensorShape([None]),
              'token_type_ids': tf.TensorShape([None])},
             tf.TensorShape([None])))

train_dataset = dataset.shuffle(128).batch(BATCH_SIZE).repeat(-1)

# Prepare training: Compile tf.keras model with optimizer, loss and learning rate schedule
opt = tf.keras.optimizers.Adam(learning_rate=3e-5)
if USE_AMP:
    # loss scaling is currently required when using mixed precision
    opt = tf.keras.mixed_precision.experimental.LossScaleOptimizer(opt, 'dynamic')

model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

# Train and evaluate using tf.keras.Model.fit()
model.fit(train_dataset, epochs=2, steps_per_epoch= 80//BATCH_SIZE)

It seems tf.data.Dataset.from_generator() is more optimized compared to Keras generators....

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment