samuelazran/grouping.py

## grouping.py
"""
feature extraction for representing groups of items
the order of items in a group is irrelevant
==================================================
X = list of tokens:               [t1,t2,t3,t4,t5]
Y = list of group tags:           [S ,C ,C ,S ,C ]
F = list of features representing X:
--------------------------------------------------
X[0] = t1    Y[0] = S    F[0] = {token=t1}
X[1] = t2    Y[1] = C    F[1] = {token=t2, group_has__t1=1}
X[2] = t3    Y[2] = C    F[2] = {token=t3, group_has__t1=1, group_has__t2=1}
X[3] = t4    Y[3] = S    F[3] = {token=t4, group_has__t1=1, group_has__t2=1, group_has__t3=1}
X[4] = t5    Y[4] = C    F[4] = {token=t5, group_has__t4=1}
"""

X = ["t1","t2","t3","t4","t5"]
Y = ["S" ,"C" ,"C" ,"S" ,"C"]
F = [None] * len(X)
for i, x in enumerate(X):
  if i == 0 or Y[i - 1] == "S":
    F[i] = {}
  else:
    F[i] = copy.deepcopy(F[i - 1])
  if i > 0:
    feature = "group_has__{}".format(F[i - 1]["token"])
    F[i][feature] = F[i].get(feature, 0) + 1
  F[i]["token"] = x
	"""
	feature extraction for representing groups of items
	the order of items in a group is irrelevant
	==================================================
	X = list of tokens: [t1,t2,t3,t4,t5]
	Y = list of group tags: [S ,C ,C ,S ,C ]
	F = list of features representing X:
	--------------------------------------------------
	X[0] = t1 Y[0] = S F[0] = {token=t1}
	X[1] = t2 Y[1] = C F[1] = {token=t2, group_has__t1=1}
	X[2] = t3 Y[2] = C F[2] = {token=t3, group_has__t1=1, group_has__t2=1}
	X[3] = t4 Y[3] = S F[3] = {token=t4, group_has__t1=1, group_has__t2=1, group_has__t3=1}
	X[4] = t5 Y[4] = C F[4] = {token=t5, group_has__t4=1}
	"""

	X = ["t1","t2","t3","t4","t5"]
	Y = ["S" ,"C" ,"C" ,"S" ,"C"]
	F = [None] * len(X)
	for i, x in enumerate(X):
	if i == 0 or Y[i - 1] == "S":
	F[i] = {}
	else:
	F[i] = copy.deepcopy(F[i - 1])
	if i > 0:
	feature = "group_has__{}".format(F[i - 1]["token"])
	F[i][feature] = F[i].get(feature, 0) + 1
	F[i]["token"] = x