Lanme/attn.py

## attn.py
###tf.contrib.seq2seq.AttentionWrapper封装好了attn层
def task_specific_attention(self, inputs, output_size,
                            initializer=layers.xavier_initializer(),
                            activation_fn=tf.tanh, scope=None):
    """
    multiply为加性模型
    Performs task-specific attention reduction, using learned
    attention context vector (constant within task of interest).
    Args:
        inputs: Tensor of shape [batch_size, units, input_size]
            `input_size` must be static (known)
            `units` axis will be attended over (reduced from output)
            `batch_size` will be preserved
        output_size: Size of output's inner (feature) dimension
    Returns:
       outputs: Tensor of shape [batch_size, output_dim].
    """
    assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None
    with tf.variable_scope(scope or 'attention') as scope:
        # u_w, attention 向量
        attention_context_vector = tf.get_variable(name='attention_context_vector', shape=[output_size],
                                                   initializer=initializer, dtype=tf.float32)
        # 全连接层，把 h_i 转为 u_i ， shape= [batch_size, units, input_size] -> [batch_size, units, output_size]
        input_projection = layers.fully_connected(inputs, output_size, activation_fn=activation_fn, scope=scope)
        # 输出 [batch_size, units]
        vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True)
        attention_weights = tf.nn.softmax(vector_attn, dim=1)
        tf.summary.histogram('attention_weigths', attention_weights)
        weighted_projection = tf.multiply(inputs, attention_weights)
        outputs = tf.reduce_sum(weighted_projection, axis=1)
        return outputs  # 输出 [batch_size, hidden_size*2]

## batchnorm.py
def batchnorm(self, Ylogits, offset, convolutional=False):
    """batchnormalization.
    Args:
        Ylogits: 1D向量或者是3D的卷积结果。
        num_updates: 迭代的global_step
        offset：表示beta，全局均值；在 RELU 激活中一般初始化为 0.1。
        scale：表示lambda，全局方差；在 sigmoid 激活中需要，这 RELU 激活中作用不大。
        m: 表示batch均值；v:表示batch方差。
        bnepsilon：一个很小的浮点数，防止除以 0.
    Returns:
        Ybn: 和 Ylogits 的维度一样，就是经过 Batch Normalization 处理的结果。
        update_moving_everages：更新mean和variance，主要是给最后的 test 使用。
    """
    exp_moving_avg = tf.train.ExponentialMovingAverage(0.999,
                                                       self._global_step)  # adding the iteration prevents from averaging across non-existing iterations
    bnepsilon = 1e-5
    if convolutional:
        mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
    else:
        mean, variance = tf.nn.moments(Ylogits, [0])
    update_moving_everages = exp_moving_avg.apply([mean, variance])
    m = tf.cond(self.tst, lambda: exp_moving_avg.average(mean), lambda: mean)
    v = tf.cond(self.tst, lambda: exp_moving_avg.average(variance), lambda: variance)
    Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
    return Ybn, update_moving_everages

## bi_gru.py
  def bi_gru(self, inputs):
      """build the bi-GRU network. 返回个所有层的隐含状态。"""
      cells_fw = [self.gru_cell() for _ in range(self.n_layer)]
      cells_bw = [self.gru_cell() for _ in range(self.n_layer)]
      initial_states_fw = [cell_fw.zero_state(self.batch_size, tf.float32) for cell_fw in cells_fw]
      initial_states_bw = [cell_bw.zero_state(self.batch_size, tf.float32) for cell_bw in cells_bw]
      outputs, _, _ = rnn.stack_bidirectional_dynamic_rnn(cells_fw, cells_bw, inputs,
                                                          initial_states_fw=initial_states_fw,
                                                          initial_states_bw=initial_states_bw, dtype=tf.float32)
      return outputs

## count_word.py
from collections import Counter
def build_vocab(corpus):
    """
    Build a vocabulary with word frequencies for an entire corpus.
    Returns a dictionary `w -> (i, f)`, mapping word strings to pairs of
    word ID and word corpus frequency.
    """

    vocab = Counter()
    for line in corpus:
        tokens = line.strip().split()
        vocab.update(tokens)

    return {word: (i, freq) for i, (word, freq) in enumerate(vocab.items())}

## generate_batch.py
import tqdm
def gen_batch(X, y, batch_size=128):
    sample_num = len(X)
    for start in tqdm(xrange(0, sample_num, batch_size)):
        end = min(start + batch_size, sample_num)
        X_batch = X[start:end]
        y_batch = y[start:end]
        return X_batch,y_batch

## l2_loss.py
loss = your regular output loss
l2 = lambda_l2_reg * sum(
    tf.nn.l2_loss(tf_var)
        for tf_var in tf.trainable_variables()
        if not ("noreg" in tf_var.name or "Bias" in tf_var.name)
)
loss += l2

## to_categorical.py
def to_categorical(topics):
    """topics:array,start from 0"""
    n_topics = len(set(topics))
    n_sample = len(topics)
    y = np.zeros(shape=(n_sample, n_topics))
    for i in range(n_sample):
        topic_index = topics[i]
        y[i,topic_index] = 1
    return y
	###tf.contrib.seq2seq.AttentionWrapper封装好了attn层
	def task_specific_attention(self, inputs, output_size,
	initializer=layers.xavier_initializer(),
	activation_fn=tf.tanh, scope=None):
	"""
	multiply为加性模型
	Performs task-specific attention reduction, using learned
	attention context vector (constant within task of interest).
	Args:
	inputs: Tensor of shape [batch_size, units, input_size]
	`input_size` must be static (known)
	`units` axis will be attended over (reduced from output)
	`batch_size` will be preserved
	output_size: Size of output's inner (feature) dimension
	Returns:
	outputs: Tensor of shape [batch_size, output_dim].
	"""
	assert len(inputs.get_shape()) == 3 and inputs.get_shape()[-1].value is not None
	with tf.variable_scope(scope or 'attention') as scope:
	# u_w, attention 向量
	attention_context_vector = tf.get_variable(name='attention_context_vector', shape=[output_size],
	initializer=initializer, dtype=tf.float32)
	# 全连接层，把 h_i 转为 u_i ， shape= [batch_size, units, input_size] -> [batch_size, units, output_size]
	input_projection = layers.fully_connected(inputs, output_size, activation_fn=activation_fn, scope=scope)
	# 输出 [batch_size, units]
	vector_attn = tf.reduce_sum(tf.multiply(input_projection, attention_context_vector), axis=2, keep_dims=True)
	attention_weights = tf.nn.softmax(vector_attn, dim=1)
	tf.summary.histogram('attention_weigths', attention_weights)
	weighted_projection = tf.multiply(inputs, attention_weights)
	outputs = tf.reduce_sum(weighted_projection, axis=1)
	return outputs # 输出 [batch_size, hidden_size*2]
	def batchnorm(self, Ylogits, offset, convolutional=False):
	"""batchnormalization.
	Args:
	Ylogits: 1D向量或者是3D的卷积结果。
	num_updates: 迭代的global_step
	offset：表示beta，全局均值；在 RELU 激活中一般初始化为 0.1。
	scale：表示lambda，全局方差；在 sigmoid 激活中需要，这 RELU 激活中作用不大。
	m: 表示batch均值；v:表示batch方差。
	bnepsilon：一个很小的浮点数，防止除以 0.
	Returns:
	Ybn: 和 Ylogits 的维度一样，就是经过 Batch Normalization 处理的结果。
	update_moving_everages：更新mean和variance，主要是给最后的 test 使用。
	"""
	exp_moving_avg = tf.train.ExponentialMovingAverage(0.999,
	self._global_step) # adding the iteration prevents from averaging across non-existing iterations
	bnepsilon = 1e-5
	if convolutional:
	mean, variance = tf.nn.moments(Ylogits, [0, 1, 2])
	else:
	mean, variance = tf.nn.moments(Ylogits, [0])
	update_moving_everages = exp_moving_avg.apply([mean, variance])
	m = tf.cond(self.tst, lambda: exp_moving_avg.average(mean), lambda: mean)
	v = tf.cond(self.tst, lambda: exp_moving_avg.average(variance), lambda: variance)
	Ybn = tf.nn.batch_normalization(Ylogits, m, v, offset, None, bnepsilon)
	return Ybn, update_moving_everages
	def bi_gru(self, inputs):
	"""build the bi-GRU network. 返回个所有层的隐含状态。"""
	cells_fw = [self.gru_cell() for _ in range(self.n_layer)]
	cells_bw = [self.gru_cell() for _ in range(self.n_layer)]
	initial_states_fw = [cell_fw.zero_state(self.batch_size, tf.float32) for cell_fw in cells_fw]
	initial_states_bw = [cell_bw.zero_state(self.batch_size, tf.float32) for cell_bw in cells_bw]
	outputs, _, _ = rnn.stack_bidirectional_dynamic_rnn(cells_fw, cells_bw, inputs,
	initial_states_fw=initial_states_fw,
	initial_states_bw=initial_states_bw, dtype=tf.float32)
	return outputs
	from collections import Counter
	def build_vocab(corpus):
	"""
	Build a vocabulary with word frequencies for an entire corpus.
	Returns a dictionary `w -> (i, f)`, mapping word strings to pairs of
	word ID and word corpus frequency.
	"""

	vocab = Counter()
	for line in corpus:
	tokens = line.strip().split()
	vocab.update(tokens)

	return {word: (i, freq) for i, (word, freq) in enumerate(vocab.items())}
	import tqdm
	def gen_batch(X, y, batch_size=128):
	sample_num = len(X)
	for start in tqdm(xrange(0, sample_num, batch_size)):
	end = min(start + batch_size, sample_num)
	X_batch = X[start:end]
	y_batch = y[start:end]
	return X_batch,y_batch
	loss = your regular output loss
	l2 = lambda_l2_reg * sum(
	tf.nn.l2_loss(tf_var)
	for tf_var in tf.trainable_variables()
	if not ("noreg" in tf_var.name or "Bias" in tf_var.name)
	)
	loss += l2
	def to_categorical(topics):
	"""topics:array,start from 0"""
	n_topics = len(set(topics))
	n_sample = len(topics)
	y = np.zeros(shape=(n_sample, n_topics))
	for i in range(n_sample):
	topic_index = topics[i]
	y[i,topic_index] = 1
	return y