Skip to content

Instantly share code, notes, and snippets.

@evertheylen
Last active January 2, 2017 10:44
Show Gist options
  • Save evertheylen/ece1e3494c233e17d04be0f8a23dd464 to your computer and use it in GitHub Desktop.
Save evertheylen/ece1e3494c233e17d04be0f8a23dd464 to your computer and use it in GitHub Desktop.
Simple Eclat algorithm
import sys
from collections import defaultdict
import random
def tidlists(transactions):
tl = defaultdict(set)
for tid, t in enumerate(transactions):
for item in t:
tl[item].add(tid)
return list(tl.items())
class IntersectAll:
def __and__(self, other):
return other
IntersectAll = IntersectAll()
def eclat(items, minsup=0, minlen=1):
frequent_itemsets = {(): IntersectAll}
def recurse(items, prefix):
while len(items) > 0:
item, item_tidlist = items.pop()
l = prefix + (item,) # l is the (ordered) tuple of items we are looking for
new_tidlist = frequent_itemsets[prefix] & item_tidlist
if len(new_tidlist) >= minsup: # add frequent_itemsets to the new frequent_itemsets
frequent_itemsets[l] = new_tidlist
# define the new l-conditional database
new_items = []
for new_item, _item_tidlist in items:
new_item_tidlist = _item_tidlist & item_tidlist
if len(new_item_tidlist) >= minsup:
new_items.append((new_item, new_item_tidlist))
# recurse, with l as prefix
recurse(new_items, l)
recurse(items.copy(), ())
return {k: len(v) for k, v in frequent_itemsets.items() if len(k) >= minlen}
if __name__ == "__main__":
transaction_db = [
{1,2,3,4,5},
{1,2,3},
{2,3,4},
{2,3,5},
{2,4,5},
{3,4,5}
]
# As is obvious, {2,3} is rather frequent
tl = tidlists(transaction_db)
print(eclat(tl, minsup=4, minlen=2))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment