Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
The original minimal 15 lines implementation of PrefixSpan. Full library at https://github.com/chuanconggao/PrefixSpan-py.
from collections import defaultdict
def frequent_rec(patt, mdb):
results.append((len(mdb), patt))
occurs = defaultdict(list)
for (i, startpos) in mdb:
seq = db[i]
for j in range(startpos + 1, len(seq)):
l = occurs[seq[j]]
if len(l) == 0 or l[-1][0] != i:
l.append((i, j))
for (c, newmdb) in occurs.items():
if len(newmdb) >= minsup:
frequent_rec(patt + [c], newmdb)
db = [
[0, 1, 2, 3, 4],
[1, 1, 1, 3, 4],
[2, 1, 2, 2, 0],
[1, 1, 1, 2, 2],
]
minsup = 2
results = []
frequent_rec([], [(i, -1) for i in range(len(db))])
print(results)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.