This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sequence_input = Input(shape=(maxlen_text,)) | |
x = Embedding(name='embedding_layer', | |
input_dim=max_words_to_keep, | |
output_dim=token_vec_size, | |
input_length=maxlen_text)(sequence_input) | |
x = Dropout(.20)(x) | |
x = Conv1D(64, 5, activation='relu', name='1-conv1d', padding='same')(x) | |
x = MaxPooling1D(pool_size=4)(x) |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_conv_layer(model, layer_name): | |
conv_layer = model.get_layer(layer_name) | |
output_dim = conv_layer.output_shape[1] | |
return conv_layer, output_dim |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_heatmap(model, layer_name, matrix, y_labels): | |
# obtain probability of the label with the highest certainty | |
network_output = model.get_output_at(0)[:, np.argmax(y_labels)] | |
# obtain the output vector and its dimension of the convolutional layer we want to visualize | |
conv_layer, layer_output_dim = get_conv_layer(model, layer_name) | |
# Setting up the calculation of the gradients between the output and the conv layer. Will be executed in the iteration step | |
grads = K.gradients(network_output, conv_layer.output)[0] | |
# average the gradients across our samples (one sample) and all filters |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def norm_heatmap(heatmap): | |
# element-wise maximum calculation, basically setting all negative values to zero | |
heatmap = np.maximum(heatmap, 0) | |
# normalizing the heatmap to values between 0 and 1 | |
norm_heatmap = heatmap / np.max(heatmap) | |
return norm_heatmap |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_heatmap(heatmap, height_ratio=0.05): | |
# calculating how often the vector should be repeated to display a height relative to the vector length | |
repeat_vector_n_times = int(heatmap.shape[0] * height_ratio) | |
plt.matshow([heatmap] * repeat_vector_n_times) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def cstr(s, color='black'): | |
return "<text style=\"color:{}\">{}</text>".format(color, s) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def color(hvalue, threshold, max=1, cdefault='black', colors=['red', 'yellow', 'green', 'cyan', 'blue']): | |
num_colors = len(colors) | |
if hvalue < threshold: | |
return cdefault | |
for i, color in enumerate(colors): | |
if hvalue > (max - (max - threshold) / num_colors * (i + 1)): | |
return color |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_token_indices(model, layer_name, threshold, matrix, y_labels): | |
heatmap = get_heatmap(model=model, layer_name=layer_name, matrix=matrix, y_labels=y_labels) | |
_, output_dim = get_conv_layer(model, layer_name) | |
# depending on the ration between the input and layer output shape, we need to calculate | |
# how many original tokens have contributed to the layer output | |
dim_ratio = matrix.shape[1] / output_dim | |
if dim_ratio < 1.5: | |
window_size = 1 | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def get_highlighted_tokens(tokens, matrix, model, layer_name, threshold, y_labels): | |
indices = get_token_indices(model, layer_name, threshold, matrix, y_labels) | |
ctokens = [] | |
for i, t in enumerate(tokens): | |
if i in indices.keys(): | |
_color = color(indices[i], threshold=threshold) | |
ctokens.append(cstr(t, color=_color)) | |
else: | |
ctokens.append(t) |