lemire/printcommonsubsets.py

## printcommonsubsets.py

rawdata = [["age","workclass","capital_loss"],["age","capital_gain","capital_loss"],["age","workclass","capital_loss","x"],["age","capital_gain","capital_loss","x"],["workclass","hours_per_week","native_country"],["age","capital_loss","native_country"],["workclass","hours_per_week","native_country","x"],["age","capital_loss","native_country","x"],["age","capital_loss","x"],["age","capital_gain","native_country"],["age","workclass","capital_gain"],["age","workclass","capital_gain","x"],["age","capital_gain","native_country","x"],["age","capital_gain","x"]]
print("how many sets", len(rawdata))
sdata = map(set,rawdata)
#print(len(sdata))
total = len(sdata)
allkeys = reduce(lambda x,y :x.union(y),sdata)
print("how many attributes ", len(allkeys))
l = set()
d = []
for k1 in allkeys:
    for k2 in allkeys:
        for k3 in allkeys:
          ts = set([k1,k2,k3])
          c = sum([ts.issubset(s)  for s in sdata])
          tss = list(ts)
          tss.sort()
          if(c >= total / 2):
            if((c,str(tss)) in l):
                pass
            else:
                l.add((c,str(tss)))
                d.append(ts)
l = list(l)
l.sort()
l.reverse()
for x in l:
  print(x)

d2 = filter(lambda x: len(x) > 1,d)
print("all sets contain at least two of the following sets (except for some exceptions)")
print(d2)
for r in sdata:
    c = sum([td.issubset(r) and len(d) > 1 for td in d])
    if( c <= 1 ):
        print(r, " has count ", c)

	rawdata = [["age","workclass","capital_loss"],["age","capital_gain","capital_loss"],["age","workclass","capital_loss","x"],["age","capital_gain","capital_loss","x"],["workclass","hours_per_week","native_country"],["age","capital_loss","native_country"],["workclass","hours_per_week","native_country","x"],["age","capital_loss","native_country","x"],["age","capital_loss","x"],["age","capital_gain","native_country"],["age","workclass","capital_gain"],["age","workclass","capital_gain","x"],["age","capital_gain","native_country","x"],["age","capital_gain","x"]]
	print("how many sets", len(rawdata))
	sdata = map(set,rawdata)
	#print(len(sdata))
	total = len(sdata)
	allkeys = reduce(lambda x,y :x.union(y),sdata)
	print("how many attributes ", len(allkeys))
	l = set()
	d = []
	for k1 in allkeys:
	for k2 in allkeys:
	for k3 in allkeys:
	ts = set([k1,k2,k3])
	c = sum([ts.issubset(s) for s in sdata])
	tss = list(ts)
	tss.sort()
	if(c >= total / 2):
	if((c,str(tss)) in l):
	pass
	else:
	l.add((c,str(tss)))
	d.append(ts)
	l = list(l)
	l.sort()
	l.reverse()
	for x in l:
	print(x)

	d2 = filter(lambda x: len(x) > 1,d)
	print("all sets contain at least two of the following sets (except for some exceptions)")
	print(d2)
	for r in sdata:
	c = sum([td.issubset(r) and len(d) > 1 for td in d])
	if( c <= 1 ):
	print(r, " has count ", c)