hccho2/attention-matmul.py

## attention-matmul.py
N=2   # batch size
T=20  # encoder time length
D1=30 # encoder hidden dim
D2=6 # decoder hidden dim
D3=11 # attention dim
h = np.random.randn(N,T,D1) # all encoder hidden
s = np.random.randn(N,D2) # decoder hidden at one time step
Wm = np.random.randn(D1,D3)
Wq = np.random.randn(D2,D3)
A = np.matmul(h,Wm) + np.expand_dims(np.matmul(s,Wq),axis=1)

hs = np.concatenate([h,np.tile(np.expand_dims(s,1),(1,T,1))],axis=-1)
Wmq = np.concatenate([Wm,Wq],axis=0)
B = np.matmul(hs,Wmq)
print(np.allclose(A,B))
	N=2 # batch size
	T=20 # encoder time length
	D1=30 # encoder hidden dim
	D2=6 # decoder hidden dim
	D3=11 # attention dim
	h = np.random.randn(N,T,D1) # all encoder hidden
	s = np.random.randn(N,D2) # decoder hidden at one time step
	Wm = np.random.randn(D1,D3)
	Wq = np.random.randn(D2,D3)
	A = np.matmul(h,Wm) + np.expand_dims(np.matmul(s,Wq),axis=1)

	hs = np.concatenate([h,np.tile(np.expand_dims(s,1),(1,T,1))],axis=-1)
	Wmq = np.concatenate([Wm,Wq],axis=0)
	B = np.matmul(hs,Wmq)
	print(np.allclose(A,B))