Skip to content

Instantly share code, notes, and snippets.

@marcelcaraciolo
Created December 4, 2011 18:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save marcelcaraciolo/1430848 to your computer and use it in GitHub Desktop.
Save marcelcaraciolo/1430848 to your computer and use it in GitHub Desktop.
apriori_rules.py
def generateRules(L, support_data, min_confidence=0.7):
"""Create the association rules
L: list of frequent item sets
support_data: support data for those itemsets
min_confidence: minimum confidence threshold
"""
rules = []
for i in range(1, len(L)):
for freqSet in L[i]:
H1 = [frozenset([item]) for item in freqSet]
print "freqSet", freqSet, 'H1', H1
if (i > 1):
rules_from_conseq(freqSet, H1, support_data, rules, min_confidence)
else:
calc_confidence(freqSet, H1, support_data, rules, min_confidence)
return rules
def calc_confidence(freqSet, H, support_data, rules, min_confidence=0.7):
"Evaluate the rule generated"
pruned_H = []
for conseq in H:
conf = support_data[freqSet] / support_data[freqSet - conseq]
if conf >= min_confidence:
print freqSet - conseq, '--->', conseq, 'conf:', conf
rules.append((freqSet - conseq, conseq, conf))
pruned_H.append(conseq)
return pruned_H
def rules_from_conseq(freqSet, H, support_data, rules, min_confidence=0.7):
"Generate a set of candidate rules"
m = len(H[0])
if (len(freqSet) > (m + 1)):
Hmp1 = aprioriGen(H, m + 1)
Hmp1 = calc_confidence(freqSet, Hmp1, support_data, rules, min_confidence)
if len(Hmp1) > 1:
rules_from_conseq(freqSet, Hmp1, support_data, rules, min_confidence)
@fgadaleta
Copy link

there is a problem in the calc_confidence.
With low support line 24 generates a key error in support_data.
Never got that?

@ParitoshGoyal
Copy link

Yes I am also getting the same problem. Any possible solutions?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment