Skip to content

Instantly share code, notes, and snippets.

@lemire
Created December 7, 2018 19:58
Show Gist options
  • Save lemire/215228f075d9d03fa2122814ca0ef5c8 to your computer and use it in GitHub Desktop.
Save lemire/215228f075d9d03fa2122814ca0ef5c8 to your computer and use it in GitHub Desktop.
rawdata = [["age","workclass","capital_loss"],["age","capital_gain","capital_loss"],["age","workclass","capital_loss","x"],["age","capital_gain","capital_loss","x"],["workclass","hours_per_week","native_country"],["age","capital_loss","native_country"],["workclass","hours_per_week","native_country","x"],["age","capital_loss","native_country","x"],["age","capital_loss","x"],["age","capital_gain","native_country"],["age","workclass","capital_gain"],["age","workclass","capital_gain","x"],["age","capital_gain","native_country","x"],["age","capital_gain","x"]]
print("how many sets", len(rawdata))
sdata = map(set,rawdata)
#print(len(sdata))
total = len(sdata)
allkeys = reduce(lambda x,y :x.union(y),sdata)
print("how many attributes ", len(allkeys))
l = set()
d = []
for k1 in allkeys:
for k2 in allkeys:
for k3 in allkeys:
ts = set([k1,k2,k3])
c = sum([ts.issubset(s) for s in sdata])
tss = list(ts)
tss.sort()
if(c >= total / 2):
if((c,str(tss)) in l):
pass
else:
l.add((c,str(tss)))
d.append(ts)
l = list(l)
l.sort()
l.reverse()
for x in l:
print(x)
d2 = filter(lambda x: len(x) > 1,d)
print("all sets contain at least two of the following sets (except for some exceptions)")
print(d2)
for r in sdata:
c = sum([td.issubset(r) and len(d) > 1 for td in d])
if( c <= 1 ):
print(r, " has count ", c)
@lemire
Copy link
Author

lemire commented Dec 7, 2018

('how many sets', 14)
('how many attributes ', 7)
(12, "['age']")
(8, "['x']")
(7, "['capital_loss']")
(7, "['capital_gain']")
(7, "['age', 'x']")
(7, "['age', 'capital_loss']")
(7, "['age', 'capital_gain']")
all sets contain at least two of the following sets (except for some exceptions)
[set(['x', 'age']), set(['age', 'capital_loss']), set(['age', 'capital_gain'])]
(set(['native_country', 'hours_per_week', 'workclass']), ' has count ', 0)
(set(['native_country', 'hours_per_week', 'workclass', 'x']), ' has count ', 1)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment