Skip to content

Instantly share code, notes, and snippets.

Hannes Hapke hanneshapke

Block or report user

Report or block hanneshapke

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@hanneshapke
hanneshapke / tfx-pipeline-for-bert-preprocessing.ipynb
Last active Apr 2, 2020
TFX Pipeline for Bert Preprocessing.ipynb
View tfx-pipeline-for-bert-preprocessing.ipynb
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
View model_architecture.py
x = tf.keras.layers.Dense(256, activation='relu')(pooled_output)
dense = tf.keras.layers.Dense(64, activation='relu')(x)
pred = tf.keras.layers.Dense(1, activation='sigmoid')(dense)
model = tf.keras.Model(
inputs=[inputs['input_word_ids'],
inputs['input_mask'],
inputs['input_type_ids']],
outputs=pred
)
View load_bert.py
bert_layer = load_bert_layer()
pooled_output, _ = bert_layer(
[input_word_ids,
input_mask,
input_type_ids
]
)
View model_input.py
feature_spec = tf_transform_output.transformed_feature_spec()
feature_spec.pop(_LABEL_KEY)
inputs = {
key: tf.keras.layers.Input(
shape=(max_seq_length),
name=key,
dtype=tf.int32)
for key in feature_spec.keys()
}
View model_architecture.py
feature_spec = tf_transform_output.transformed_feature_spec()
feature_spec.pop(_LABEL_KEY)
inputs = {key: tf.keras.layers.Input(shape=(max_seq_length),
name=key, dtype=tf.int32)
for key in feature_spec.keys()}
input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)
View using_tft_with_model_fit.py
tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
train_dataset = _input_fn(fn_args.train_files, tf_transform_output, 32)
eval_dataset = _input_fn(fn_args.eval_files, tf_transform_output, 32)
...
model.fit(
train_dataset,
validation_data=eval_dataset,
...
)
View bert_data_structure.py
{
'input_mask': array(
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]),
'input_type_ids': array(
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),
'input_word_ids': array(
[ 101, 2023, 3319, 3397, 27594, 2545, 2005, 2216, 2040, ..., 2014, 102]),
'label': array([0])
}
View mirrored_strategy.py
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
model = get_model(tf_transform_output=tf_transform_output)
View run_fn.py
def run_fn(fn_args: TrainerFnArgs):
tf_transform_output = tft.TFTransformOutput(fn_args.transform_output)
train_dataset = _input_fn(
fn_args.train_files, tf_transform_output, 32)
eval_dataset = _input_fn(
fn_args.eval_files, tf_transform_output, 32)
mirrored_strategy = tf.distribute.MirroredStrategy()
with mirrored_strategy.scope():
model = get_model(tf_transform_output=tf_transform_output)
View cast_between_tft_tfhub.py
input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)
You can’t perform that action at this time.