Created
December 7, 2017 17:28
-
-
Save zmjjmz/0e9dd22c175ce6cde1c9120c8fbea464 to your computer and use it in GitHub Desktop.
Admittedly janky model.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def keras_avgpool_linear_pad_endtoend(word_map_emb_pair, pad_length, | |
n_classes, random_seed, oov_thresh=0.9, embed_config={}, model_config={}): | |
# expect these in the order filter_embeddings returns them | |
# nastyyy | |
word_ind_map, embedding_mat = word_map_emb_pair | |
numpy.random.seed(random_seed) | |
inp = keras.layers.Input(shape=(1,), name='text', dtype='string') | |
# assume word_ind_map and embedding_mat has been fucked with accordingly | |
pad_value = 0 | |
oov_value = 1 | |
lookedup = ml_utils.TokenizeLookupLayer(word_ind_map, pad_length, | |
pad_value=pad_value, oov_value=oov_value, | |
name='lookedup')(inp) | |
# only makes sense if pad_value is 0 | |
lengths = ml_utils.CountNonZeroLayer(name='get_len')(lookedup) | |
oov_code = ml_utils.OOVCodeLayer( | |
oov_value=oov_value, | |
oov_thresh=oov_thresh, | |
name='oov_code' | |
)([lookedup, lengths]) | |
emb = keras.layers.Embedding(*(embedding_mat.shape), weights=[embedding_mat], | |
input_length=pad_length, name='embed', | |
**embed_config)(lookedup) | |
pooler = ml_utils.MaskedGlobalAveragePooling1D(name='avg')([emb, lengths]) | |
model = keras.layers.core.Dense(n_classes, input_shape=(embedding_mat.shape[1],), | |
name='weights', **model_config)(pooler) | |
softmax = keras.layers.core.Activation('softmax', name='softmax')(model) | |
return keras.models.Model(inputs=[inp], outputs=[softmax, oov_code, pooler]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment