Skip to content

Instantly share code, notes, and snippets.

Avatar

Hannes Hapke hanneshapke

View GitHub Profile
View cast_between_tft_tfhub.py
input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)
View model_architecture.py
feature_spec = tf_transform_output.transformed_feature_spec()
feature_spec.pop(_LABEL_KEY)
inputs = {key: tf.keras.layers.Input(shape=(max_seq_length),
name=key, dtype=tf.int32)
for key in feature_spec.keys()}
input_word_ids = tf.cast(inputs["input_word_ids"], dtype=tf.int32)
input_mask = tf.cast(inputs["input_mask"], dtype=tf.int32)
input_type_ids = tf.cast(inputs["input_type_ids"], dtype=tf.int32)
View input_type_ids.py
input_type_ids = tf.zeros_like(input_mask)
View preprocessing_fn.py
def preprocessing_fn(inputs):
def tokenize_text(text, sequence_length=MAX_SEQ_LEN):
...
return tf.reshape(tokens, [-1, sequence_length])
def preprocess_bert_input(text, segment_id=0):
input_word_ids = tokenize_text(text)
...
return (
View adding_of_CLS_and_SEP_tokens.py
CLS_ID = tf.constant(101, dtype=tf.int64)
SEP_ID = tf.constant(102, dtype=tf.int64)
start_tokens = tf.fill([tf.shape(text)[0], 1], CLS_ID)
end_tokens = tf.fill([tf.shape(text)[0], 1], SEP_ID)
tokens = tokens[:, :sequence_length - 2]
tokens = tf.concat([start_tokens, tokens, end_tokens], axis=1)
View partial_setup_of_berttokenizer_part_3.py
bert_tokenizer = text.BertTokenizer(
vocab_lookup_table=vocab_file_path,
token_out_type=tf.int64,
lower_case=do_lower_case
)
View partial_setup_of_berttokenizer.py
import tensorflow_hub as hub
BERT_TFHUB_URL = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/2"
bert_layer = hub.KerasLayer(handle=BERT_TFHUB_URL, trainable=True)
vocab_file_path = bert_layer.resolved_object.vocab_file.asset_path.numpy()
View example_dataset.csv
We can make this file beautiful and searchable if this error is corrected: No commas found in this CSV file in line 0.
‘This is the best movie I have ever seen ...’ -> 1
‘Probably the worst movie produced in 2019 ...’ -> 0
‘Tom Hank\’s performance turns this movie into ...’ -> ?
You can’t perform that action at this time.