Created
December 7, 2018 19:58
-
-
Save lemire/215228f075d9d03fa2122814ca0ef5c8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
rawdata = [["age","workclass","capital_loss"],["age","capital_gain","capital_loss"],["age","workclass","capital_loss","x"],["age","capital_gain","capital_loss","x"],["workclass","hours_per_week","native_country"],["age","capital_loss","native_country"],["workclass","hours_per_week","native_country","x"],["age","capital_loss","native_country","x"],["age","capital_loss","x"],["age","capital_gain","native_country"],["age","workclass","capital_gain"],["age","workclass","capital_gain","x"],["age","capital_gain","native_country","x"],["age","capital_gain","x"]] | |
print("how many sets", len(rawdata)) | |
sdata = map(set,rawdata) | |
#print(len(sdata)) | |
total = len(sdata) | |
allkeys = reduce(lambda x,y :x.union(y),sdata) | |
print("how many attributes ", len(allkeys)) | |
l = set() | |
d = [] | |
for k1 in allkeys: | |
for k2 in allkeys: | |
for k3 in allkeys: | |
ts = set([k1,k2,k3]) | |
c = sum([ts.issubset(s) for s in sdata]) | |
tss = list(ts) | |
tss.sort() | |
if(c >= total / 2): | |
if((c,str(tss)) in l): | |
pass | |
else: | |
l.add((c,str(tss))) | |
d.append(ts) | |
l = list(l) | |
l.sort() | |
l.reverse() | |
for x in l: | |
print(x) | |
d2 = filter(lambda x: len(x) > 1,d) | |
print("all sets contain at least two of the following sets (except for some exceptions)") | |
print(d2) | |
for r in sdata: | |
c = sum([td.issubset(r) and len(d) > 1 for td in d]) | |
if( c <= 1 ): | |
print(r, " has count ", c) |
Author
lemire
commented
Dec 7, 2018
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment