Skip to content
{{ message }}

Instantly share code, notes, and snippets.

# escuccim/gist:3ad7f9b4b538384602ef6df0462564d8

Last active Feb 3, 2019
PatternDistance
 # DB is dictionary with tuple of pattern as key and support as value # P1 and P2 are lists or arrays of the keys to compare # this will only work if one of P1 and P2 is a subset of the other def pattern_distance(P1, P2, db): P1_support = db[tuple(P1)] P2_support = db[tuple(P2)] # if P1 is a subset of P2 then we just use their supports if set(P1).issubset(set(P2)): return 1 - (P2_support / P1_support) # else if P2 is a subset of P1 we flip it around elif set(P2).issubset(set(P1)): return 1 - (P1_support / P2_support) else: return 0 # takes two patterns and a complete DB and returns the # pattern distance, calculating the supports from the db def pattern_distance2(P1, P2, db): # initialize our counts P1orP2_count = 0 P1andP2_count = 0 # convert P1 and P2 to sets so we can do subset testing P1 = set(P1) P2 = set(P2) # loop through the DB getting the support for (P1 and P2) and (P1 or P2) for transaction in db: trans_set = set(transaction) # if both P1 and P2 are in the transaction then we increment both counts if P1.issubset(trans_set) and P2.issubset(trans_set): P1andP2_count += 1 P1orP2_count += 1 # else if only one of them is we increment the intersect count elif P1.issubset(trans_set) or P2.issubset(trans_set): P1orP2_count += 1 return 1 - (P1andP2_count / P1orP2_count)
to join this conversation on GitHub. Already have an account? Sign in to comment