Hannes Hapke hanneshapke

## model.py
sequence_input = Input(shape=(maxlen_text,))

x = Embedding(name='embedding_layer',
              input_dim=max_words_to_keep,
              output_dim=token_vec_size,
              input_length=maxlen_text)(sequence_input)
x = Dropout(.20)(x)

x = Conv1D(64, 5, activation='relu', name='1-conv1d', padding='same')(x)
x = MaxPooling1D(pool_size=4)(x)

## visualize_conv_nets_for_nlp.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                hanneshapke
                / visualize_conv_nets_for_nlp.ipynb
            
            
              Last active
              April 16, 2018 15:40
            
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## extract_layer.py
def get_conv_layer(model, layer_name):
    conv_layer = model.get_layer(layer_name)
    output_dim = conv_layer.output_shape[1]
    return conv_layer, output_dim

## generate_layer_heat_map.py
def get_heatmap(model, layer_name, matrix, y_labels):

    # obtain probability of the label with the highest certainty
    network_output = model.get_output_at(0)[:, np.argmax(y_labels)]
    # obtain the output vector and its dimension of the convolutional layer we want to visualize
    conv_layer, layer_output_dim = get_conv_layer(model, layer_name)

    # Setting up the calculation of the gradients between the output and the conv layer. Will be executed in the iteration step
    grads = K.gradients(network_output, conv_layer.output)[0]
    # average the gradients across our samples (one sample) and all filters

## norm_heatmap.py
def norm_heatmap(heatmap):
    # element-wise maximum calculation, basically setting all negative values to zero
    heatmap = np.maximum(heatmap, 0)
    # normalizing the heatmap to values between 0 and 1
    norm_heatmap = heatmap / np.max(heatmap)
    return norm_heatmap

## plot_heatmap.py
def plot_heatmap(heatmap, height_ratio=0.05):
        # calculating how often the vector should be repeated to display a height relative to the vector length
        repeat_vector_n_times = int(heatmap.shape[0] * height_ratio)
        plt.matshow([heatmap] * repeat_vector_n_times)

## generate_text_tag.py
def cstr(s, color='black'):
    return "<text style=\"color:{}\">{}</text>".format(color, s)

## generate_color_for_heatmap_value.py
def color(hvalue, threshold, max=1, cdefault='black', colors=['red', 'yellow', 'green', 'cyan', 'blue']):
    num_colors = len(colors)
    if hvalue < threshold:
        return cdefault
    for i, color in enumerate(colors):
        if hvalue > (max - (max - threshold) / num_colors * (i + 1)):
            return color

## get_token_indicies.py
def get_token_indices(model, layer_name, threshold, matrix, y_labels):
    heatmap = get_heatmap(model=model, layer_name=layer_name, matrix=matrix, y_labels=y_labels)
    _, output_dim = get_conv_layer(model, layer_name)

    # depending on the ration between the input and layer output shape, we need to calculate
    # how many original tokens have contributed to the layer output
    dim_ratio = matrix.shape[1] / output_dim
    if dim_ratio < 1.5:
        window_size = 1
    else:

## get_highlighted_tokens.py
def get_highlighted_tokens(tokens, matrix, model, layer_name, threshold, y_labels):
    indices = get_token_indices(model, layer_name, threshold, matrix, y_labels)

    ctokens = []
    for i, t in enumerate(tokens):
        if i in indices.keys():
            _color = color(indices[i], threshold=threshold)
            ctokens.append(cstr(t, color=_color))
        else:
            ctokens.append(t)
	sequence_input = Input(shape=(maxlen_text,))

	x = Embedding(name='embedding_layer',
	input_dim=max_words_to_keep,
	output_dim=token_vec_size,
	input_length=maxlen_text)(sequence_input)
	x = Dropout(.20)(x)

	x = Conv1D(64, 5, activation='relu', name='1-conv1d', padding='same')(x)
	x = MaxPooling1D(pool_size=4)(x)
	def get_conv_layer(model, layer_name):
	conv_layer = model.get_layer(layer_name)
	output_dim = conv_layer.output_shape[1]
	return conv_layer, output_dim
	def get_heatmap(model, layer_name, matrix, y_labels):

	# obtain probability of the label with the highest certainty
	network_output = model.get_output_at(0)[:, np.argmax(y_labels)]
	# obtain the output vector and its dimension of the convolutional layer we want to visualize
	conv_layer, layer_output_dim = get_conv_layer(model, layer_name)

	# Setting up the calculation of the gradients between the output and the conv layer. Will be executed in the iteration step
	grads = K.gradients(network_output, conv_layer.output)[0]
	# average the gradients across our samples (one sample) and all filters
	def norm_heatmap(heatmap):
	# element-wise maximum calculation, basically setting all negative values to zero
	heatmap = np.maximum(heatmap, 0)
	# normalizing the heatmap to values between 0 and 1
	norm_heatmap = heatmap / np.max(heatmap)
	return norm_heatmap
	def plot_heatmap(heatmap, height_ratio=0.05):
	# calculating how often the vector should be repeated to display a height relative to the vector length
	repeat_vector_n_times = int(heatmap.shape[0] * height_ratio)
	plt.matshow([heatmap] * repeat_vector_n_times)
	def cstr(s, color='black'):
	return "<text style=\"color:{}\">{}</text>".format(color, s)
	def color(hvalue, threshold, max=1, cdefault='black', colors=['red', 'yellow', 'green', 'cyan', 'blue']):
	num_colors = len(colors)
	if hvalue < threshold:
	return cdefault
	for i, color in enumerate(colors):
	if hvalue > (max - (max - threshold) / num_colors * (i + 1)):
	return color
	def get_token_indices(model, layer_name, threshold, matrix, y_labels):
	heatmap = get_heatmap(model=model, layer_name=layer_name, matrix=matrix, y_labels=y_labels)
	_, output_dim = get_conv_layer(model, layer_name)

	# depending on the ration between the input and layer output shape, we need to calculate
	# how many original tokens have contributed to the layer output
	dim_ratio = matrix.shape[1] / output_dim
	if dim_ratio < 1.5:
	window_size = 1
	else:
	def get_highlighted_tokens(tokens, matrix, model, layer_name, threshold, y_labels):
	indices = get_token_indices(model, layer_name, threshold, matrix, y_labels)

	ctokens = []
	for i, t in enumerate(tokens):
	if i in indices.keys():
	_color = color(indices[i], threshold=threshold)
	ctokens.append(cstr(t, color=_color))
	else:
	ctokens.append(t)