brendano/sim.py

## gistfile1.txt
In [9]: run -i sim
for each word: prob connect to root
[ 0.28026994  0.16394082  0.10616135  0.17767563  0.12675216  0.1452001 ]
for (head,child) entries: P(head <- child)
[[ 0.          0.12563458  0.27335659  0.17451717  0.24229165  0.24617475]
 [ 0.25410789  0.          0.21883649  0.09361327  0.17785164  0.2048422 ]
 [ 0.12280784  0.12047786  0.          0.13921346  0.12119944  0.11342211]
 [ 0.11823039  0.27609723  0.15487263  0.          0.22541093  0.15197973]
 [ 0.11249058  0.1968143   0.09766069  0.21988013  0.          0.13838112]
 [ 0.11209336  0.11703521  0.14911225  0.19510033  0.10649417  0.        ]]
sum of upward marginals, per word: should be 1's
[ 1.  1.  1.  1.  1.  1.]

In [10]: run -i sim
for each word: prob connect to root
[ 0.1441609   0.18915872  0.20356234  0.1635163   0.18355205  0.11604969]
for (head,child) entries: P(head <- child)
[[ 0.          0.18785295  0.11732568  0.16197518  0.11481071  0.21501431]
 [ 0.17023724  0.          0.12637202  0.1875958   0.19765703  0.13494025]
 [ 0.2075128   0.13650031  0.          0.16724631  0.18643813  0.21004565]
 [ 0.20507604  0.11666844  0.1447081   0.          0.17073895  0.12750111]
 [ 0.16216366  0.22632245  0.2347626   0.22055959  0.          0.19644899]
 [ 0.11084936  0.14349712  0.17326925  0.09910682  0.14680313  0.        ]]
sum of upward marginals, per word: should be 1's
[ 1.  1.  1.  1.  1.  1.]

In [11]: run -i sim
for each word: prob connect to root
[ 0.26605198  0.08445264  0.1239581   0.13587276  0.23178177  0.15788275]
for (head,child) entries: P(head <- child)
[[ 0.          0.15971026  0.2217024   0.20083989  0.23890931  0.25569957]
 [ 0.12212709  0.          0.14543613  0.21355699  0.09285587  0.10903569]
 [ 0.11753579  0.24068976  0.          0.1363988   0.12173247  0.11395757]
 [ 0.16735723  0.13767001  0.165252    0.          0.14712714  0.17750318]
 [ 0.18037912  0.18870707  0.16609228  0.16464597  0.          0.18592124]
 [ 0.14654878  0.18877027  0.17755908  0.14868559  0.16759344  0.        ]]
sum of upward marginals, per word: should be 1's
[ 1.  1.  1.  1.  1.  1.]

## sim.py
# omg it really works?
# Section 3 of Koo et al: http://acl.ldc.upenn.edu/D/D07/D07-1015.pdf
import numpy as np

N = 6

# Exp scores for (head,child) edge
A = np.exp(np.random.random((N,N)))
np.fill_diagonal(A,0)
# Scores to root
rs = np.exp(np.random.random(N))

L = -A
np.fill_diagonal(L, A.sum(0))
L[0] = rs
Linv = np.linalg.inv(L)

root_marginals = rs*Linv[:,0]
marginals = [[ A[h,m]*( int(0!=m)*Linv[m,m] - int(0!=h)*Linv[m,h] )
    for m in range(N)] for h in range(N)]
marginals = np.array(marginals)

print "for each word: prob connect to root"
print root_marginals

print "for (head,child) entries: P(head <- child)"
print marginals

print "sum of upward marginals, per word: should be 1's"
print marginals.sum(0)+root_marginals
	In [9]: run -i sim
	for each word: prob connect to root
	[ 0.28026994 0.16394082 0.10616135 0.17767563 0.12675216 0.1452001 ]
	for (head,child) entries: P(head <- child)
	[[ 0. 0.12563458 0.27335659 0.17451717 0.24229165 0.24617475]
	[ 0.25410789 0. 0.21883649 0.09361327 0.17785164 0.2048422 ]
	[ 0.12280784 0.12047786 0. 0.13921346 0.12119944 0.11342211]
	[ 0.11823039 0.27609723 0.15487263 0. 0.22541093 0.15197973]
	[ 0.11249058 0.1968143 0.09766069 0.21988013 0. 0.13838112]
	[ 0.11209336 0.11703521 0.14911225 0.19510033 0.10649417 0. ]]
	sum of upward marginals, per word: should be 1's
	[ 1. 1. 1. 1. 1. 1.]

	In [10]: run -i sim
	for each word: prob connect to root
	[ 0.1441609 0.18915872 0.20356234 0.1635163 0.18355205 0.11604969]
	for (head,child) entries: P(head <- child)
	[[ 0. 0.18785295 0.11732568 0.16197518 0.11481071 0.21501431]
	[ 0.17023724 0. 0.12637202 0.1875958 0.19765703 0.13494025]
	[ 0.2075128 0.13650031 0. 0.16724631 0.18643813 0.21004565]
	[ 0.20507604 0.11666844 0.1447081 0. 0.17073895 0.12750111]
	[ 0.16216366 0.22632245 0.2347626 0.22055959 0. 0.19644899]
	[ 0.11084936 0.14349712 0.17326925 0.09910682 0.14680313 0. ]]
	sum of upward marginals, per word: should be 1's
	[ 1. 1. 1. 1. 1. 1.]

	In [11]: run -i sim
	for each word: prob connect to root
	[ 0.26605198 0.08445264 0.1239581 0.13587276 0.23178177 0.15788275]
	for (head,child) entries: P(head <- child)
	[[ 0. 0.15971026 0.2217024 0.20083989 0.23890931 0.25569957]
	[ 0.12212709 0. 0.14543613 0.21355699 0.09285587 0.10903569]
	[ 0.11753579 0.24068976 0. 0.1363988 0.12173247 0.11395757]
	[ 0.16735723 0.13767001 0.165252 0. 0.14712714 0.17750318]
	[ 0.18037912 0.18870707 0.16609228 0.16464597 0. 0.18592124]
	[ 0.14654878 0.18877027 0.17755908 0.14868559 0.16759344 0. ]]
	sum of upward marginals, per word: should be 1's
	[ 1. 1. 1. 1. 1. 1.]
	# omg it really works?
	# Section 3 of Koo et al: http://acl.ldc.upenn.edu/D/D07/D07-1015.pdf
	import numpy as np

	N = 6

	# Exp scores for (head,child) edge
	A = np.exp(np.random.random((N,N)))
	np.fill_diagonal(A,0)
	# Scores to root
	rs = np.exp(np.random.random(N))

	L = -A
	np.fill_diagonal(L, A.sum(0))
	L[0] = rs
	Linv = np.linalg.inv(L)

	root_marginals = rs*Linv[:,0]
	marginals = [[ A[h,m]( int(0!=m)Linv[m,m] - int(0!=h)*Linv[m,h] )
	for m in range(N)] for h in range(N)]
	marginals = np.array(marginals)

	print "for each word: prob connect to root"
	print root_marginals

	print "for (head,child) entries: P(head <- child)"
	print marginals

	print "sum of upward marginals, per word: should be 1's"
	print marginals.sum(0)+root_marginals