Skip to content

Instantly share code, notes, and snippets.

@zmjjmz
Created March 7, 2019 23:39
Show Gist options
  • Save zmjjmz/2ee2090233217c0d1be2dc975e5c3a47 to your computer and use it in GitHub Desktop.
Save zmjjmz/2ee2090233217c0d1be2dc975e5c3a47 to your computer and use it in GitHub Desktop.
TF2 Upgrade Script testing - part 1
import itertools
import numpy
import tensorflow
class TokenizeLookupLayer(tensorflow.keras.layers.Layer):
"""
Layer that encapsulates the following:
- Tokenizing sentences by space (or given delimiter)
- Looking up the words with a given vocabulary list / table
- Resetting the shape of the above to be batch_size x pad_len (using dark magic)
# Input Shape
2D string tensor with shape `(batch_size, 1)`
# Output Shape
2D int32 tensor with shape `(batch_size, pad_len)`
"""
def __init__(self, word_ind_map, pad_len, pad_value=0, oov_value=1, **kwargs):
super(TokenizeLookupLayer, self).__init__(**kwargs)
self.input_spec = tensorflow.keras.layers.InputSpec(
ndim=2, dtype=tensorflow.string)
self.pad_len = pad_len
self.pad_value = pad_value
self.oov_value = oov_value
self.word_ind_map = word_ind_map
def get_config(self):
config = {
'word_ind_map': self.word_ind_map,
'pad_len': self.pad_len,
'pad_value': self.pad_value,
'oov_value': self.oov_value,
}
base_config = super(TokenizeLookupLayer, self).get_config()
config.update(base_config)
return config
def build(self, input_shape):
# UNCAUGHT
self.lookup_tab = tensorflow.contrib.lookup.HashTable(
tensorflow.contrib.lookup.KeyValueTensorInitializer(
#self.lookup_tab = tensorflow.lookup.StaticHashTable(
# tensorflow.lookup.KeyValueTensorInitializer(
*zip(*self.word_ind_map.items())),
default_value=self.oov_value)
super(TokenizeLookupLayer, self).build(input_shape)
def call(self, str_inp):
# no name supported for this op?!
tokenized_inp = tensorflow.string_split(
tensorflow.squeeze(str_inp, axis=1))
sparse_inp_lookedup = self.lookup_tab.lookup(
tokenized_inp,
name='lookup'
)
# this could be batch_size x max_seq_len_in_batch
# and max_seq_len_in_batch bears no relation to pad_len, but we need to
# get it out in pad_len
dense_inp = tensorflow.sparse_tensor_to_dense(
#dense_inp = tensorflow.sparse.to_dense(
sparse_inp_lookedup,
default_value=self.pad_value,
name='dense'
)
# So essentially: add 0s to the end up to pad_len
# pad
pad_full = tensorflow.pad(
dense_inp,
paddings=tensorflow.constant([[0, 0], [0, self.pad_len]]),
#paddings=tensorflow.constant([[0, self.pad_len]]),
mode='CONSTANT',
constant_values=self.pad_value,
name='pad'
)
# Then limit the second dimension to pad_len
# slice
sliced = pad_full[:, :self.pad_len]
return sliced
def compute_output_shape(self, input_shape):
# return (input_shape[0], self.pad_len)
return (input_shape[0], self.pad_len,)
def test_TokenizeLookupLayer():
word_ind_map = {w: ind + 1 for ind, w in enumerate('abcdefghijk')}
n_examples = 1000
pad_len = 162
#batch_size = 32
word_iterator = itertools.cycle(word_ind_map.keys())
strs = [
' '.join(sorted([
i for i in
[next(word_iterator) for _ in
range(numpy.random.randint(1, high=pad_len + 100))]
], key=lambda x: numpy.random.rand())) for _ in range(n_examples)
]
str_inp = tensorflow.keras.layers.Input(shape=(1,), dtype='string')
looked_up = TokenizeLookupLayer(word_ind_map, pad_len)(str_inp)
tokenize_lookup_model = tensorflow.keras.models.Model(
inputs=str_inp, outputs=looked_up)
#train_args = {'batch_size':32, 'epochs':1}
compile_kwargs = {
"optimizer": "sgd",
"loss": "mean_squared_error",
"metrics": []
}
# tokenize_lookup_model.compile(**compile_kwargs)
with tensorflow.Session() as sess:
tensorflow.tables_initializer().run(session=sess)
strs_looked_up = sess.run(looked_up, feed_dict={
str_inp: numpy.expand_dims(strs, axis=1)})
print(strs_looked_up)
print(strs_looked_up.shape)
if __name__ == "__main__":
test_TokenizeLookupLayer()
TensorFlow 2.0 Upgrade Script
-----------------------------
Converted 1 files
Detected 0 issues that require attention
--------------------------------------------------------------------------------
================================================================================
Detailed log follows:
================================================================================
--------------------------------------------------------------------------------
Processing file 'tf2_upgrade_test.py'
outputting to 'tf2_upgrade_test_upgraded.py'
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment