Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
PatternDistance
# DB is dictionary with tuple of pattern as key and support as value
# P1 and P2 are lists or arrays of the keys to compare
# this will only work if one of P1 and P2 is a subset of the other
def pattern_distance(P1, P2, db):
P1_support = db[tuple(P1)]
P2_support = db[tuple(P2)]
# if P1 is a subset of P2 then we just use their supports
if set(P1).issubset(set(P2)):
return 1 - (P2_support / P1_support)
# else if P2 is a subset of P1 we flip it around
elif set(P2).issubset(set(P1)):
return 1 - (P1_support / P2_support)
else:
return 0
# takes two patterns and a complete DB and returns the
# pattern distance, calculating the supports from the db
def pattern_distance2(P1, P2, db):
# initialize our counts
P1orP2_count = 0
P1andP2_count = 0
# convert P1 and P2 to sets so we can do subset testing
P1 = set(P1)
P2 = set(P2)
# loop through the DB getting the support for (P1 and P2) and (P1 or P2)
for transaction in db:
trans_set = set(transaction)
# if both P1 and P2 are in the transaction then we increment both counts
if P1.issubset(trans_set) and P2.issubset(trans_set):
P1andP2_count += 1
P1orP2_count += 1
# else if only one of them is we increment the intersect count
elif P1.issubset(trans_set) or P2.issubset(trans_set):
P1orP2_count += 1
return 1 - (P1andP2_count / P1orP2_count)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.