Skip to content

Instantly share code, notes, and snippets.

@mmxgn
Last active August 29, 2015 14:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mmxgn/2693bebb28d5a71f42c7 to your computer and use it in GitHub Desktop.
Save mmxgn/2693bebb28d5a71f42c7 to your computer and use it in GitHub Desktop.
def find_frequent_subsequences(seq, K, minsup, maxgap):
S = seq
L = []
# Map each individual symbol to its position in the sequence
for k in range(0, len(seq)):
L.append((seq[k],k))
# Reduce [(symbol1, position1), (symbol1, position2), ... ] to {symbol1: [position1, position2, ...]}
Lr = {}
for k in range(0, len(L)):
if L[k][0] in Lr:
Lr[L[k][0]].append(L[k][1])
else:
Lr[L[k][0]] = [L[k][1]]
# Filter out the symbols with less than minsup support
Lk = dict((k,v) for k,v in Lr.items() if len(v) >= minsup)
Lr = Lk
# Recursively combine->reduce
for k in range(1, K):
L = []
for l1 in Lk:
for l2 in Lr:
# Combine symbol1,..,symbolN-1 and symbolN
subseq = tuple(l1)+tuple(l2)
for v1 in Lk[l1]:
for v2 in Lr[l2]:
if k == 1:
v = [v1, v2]
else:
v = v1 + [v2]
# Keep only those that are sorted in order with a maximum gap of maxgap
if all([v[i] < v[i+1] and v[i+1]-v[i] < maxgap+1 for i in range(0, len(v)-1)]):
L.append((subseq, v))
# Reduce again and filter
Lk = {}
for k in range(0, len(L)):
if L[k][0] in Lk:
Lk[L[k][0]].append(L[k][1])
else:
Lk[L[k][0]] = [L[k][1]]
Lk = dict((k,v) for k,v in Lk.items() if len(v) >= minsup)
# Return a record with gap symbols and frequent subsequences
record = {}
for lk in Lk:
length = len(lk)
gap_symbols = []
for i in range(0, len(Lk[lk])):
gap = S[Lk[lk][i][0]+1:Lk[lk][i][1]]
if gap:
gap_symbols += gap
record[lk] = (len(Lk[lk]), gap_symbols)
return record
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment