Skip to content

Instantly share code, notes, and snippets.

@lettergram
Last active January 3, 2019 22:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lettergram/87b6c193ca4acdc54a88d8b9281b52ee to your computer and use it in GitHub Desktop.
Save lettergram/87b6c193ca4acdc54a88d8b9281b52ee to your computer and use it in GitHub Desktop.
max_words, batch_size, maxlen, epochs, ngram_range = 10000, 32, 500, 5, 2
# Determine the number of categories + default(i.e. sentence types)
num_classes = np.max(y_train) + 1
# Vectorize the output sentence type classifcations to Keras readable format
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
if ngram_range > 1:
# Create set of unique n-gram from the training set
ngram_set = set()
for input_list in x_train:
for i in range(2, ngram_range + 1):
set_of_ngram = create_ngram_set(input_list, ngram_value=i)
ngram_set.update(set_of_ngram)
# Dictionary mapping n-gram token to a unique integer
# Integer values are greater than max_words in order
# to avoid collision with existing features
start_index = max_words + 1
token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
indice_token = {token_indice[k]: k for k in token_indice}
# max_words is the highest integer that could be found in the dataset
max_words = np.max(list(indice_token.keys())) + 1
# Augmenting x_train and x_test with n-grams features
x_train = add_ngram(x_train, token_indice, ngram_range)
x_test = add_ngram(x_test, token_indice, ngram_range)
# Pad the input vectors to ensure a consistent length
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
model = Sequential()
# Created Embedding (Input) Layer (max_words) --> Pooling Layer
model.add(Embedding(max_words, embedding_dims, input_length=maxlen))
# Create the average Pooling Layer
model.add(GlobalAveragePooling1D())
# Create the output layer (num_classes)
model.add(Dense(num_classes, activation='softmax'))
# Add optimization method, loss function and optimization value
model.compile(loss='categorical_crossentropy',
optimizer='adam', metrics=['accuracy'])
# "Fit the model" (train model), using training data (80% of datset)
model.fit(x_train, y_train, batch_size=batch_size,
epochs=epochs, validation_data=(x_test, y_test))
# Evaluate the trained model, using the test data (20% of the dataset)
score = model.evaluate(x_test, y_test, batch_size=batch_size)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment