kaniblu/rnn_init.py

## rnn_init.py
def init_gru(cell, gain=1):
    cell.reset_parameters()

    # orthogonal initialization of recurrent weights
    for _, hh, _, _ in cell.all_weights:
        for i in range(0, hh.size(0), cell.hidden_size):
            I.orthogonal(hh[i:i + cell.hidden_size], gain=gain)


def init_lstm(cell, gain=1):
    init_gru(cell, gain)

    # positive forget gate bias (Jozefowicz et al., 2015)
    for _, _, ih_b, hh_b in cell.all_weights:
        l = len(ih_b)
        ih_b[l // 4:l // 2].data.fill_(1.0)
        hh_b[l // 4:l // 2].data.fill_(1.0)
	def init_gru(cell, gain=1):
	cell.reset_parameters()

	# orthogonal initialization of recurrent weights
	for _, hh, _, _ in cell.all_weights:
	for i in range(0, hh.size(0), cell.hidden_size):
	I.orthogonal(hh[i:i + cell.hidden_size], gain=gain)


	def init_lstm(cell, gain=1):
	init_gru(cell, gain)

	# positive forget gate bias (Jozefowicz et al., 2015)
	for _, _, ih_b, hh_b in cell.all_weights:
	l = len(ih_b)
	ih_b[l // 4:l // 2].data.fill_(1.0)
	hh_b[l // 4:l // 2].data.fill_(1.0)