Created
February 7, 2013 10:58
-
-
Save Stiivi/4730288 to your computer and use it in GitHub Desktop.
Data toy: Apriori algorithm in Python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import Counter | |
from itertools import combinations | |
def distinct_items(transactions, support=None): | |
"""Returns counted set of distinct items in transactions""" | |
counter = Counter() | |
for trans in transactions: | |
counter.update(trans) | |
if support is not None: | |
return set(item for item in counter if counter[item] >= support) | |
else: | |
return set(counter) | |
def frequent_single_itemsets(transactions, support): | |
"""Return one-item itemsets with at least `support` support.""" | |
distinct = distinct_items(transactions, support) | |
return set(frozenset([i]) for i in distinct) | |
def generate_candidates(L, k): | |
"""Generate candidate set from `L` with size `k`""" | |
candidates = set() | |
for a in L: | |
for b in L: | |
union = a | b | |
if len(union) == k and a != b: | |
candidates.add(union) | |
return candidates | |
def itemsets_support(transactions, itemsets): | |
"""Get support for `itemsets`""" | |
support_set = Counter() | |
for trans in transactions: | |
subsets = [itemset for itemset in itemsets if itemset < trans] | |
support_set.update(subsets) | |
return support_set | |
def min_support_set(counter, support): | |
"""Return sets with minimal `support`""" | |
items = [item for item in counter if counter[item] >= support] | |
return set(items) | |
def apriori(transactions, support): | |
candidates = frequent_single_itemsets(transactions, support) | |
result = list() | |
k = 2 | |
while candidates: | |
# Get candidate set | |
candidates = generate_candidates(candidates, k) | |
supported = itemsets_support(transactions, candidates) | |
candidates = min_support_set(supported, support) | |
result += candidates | |
k += 1 | |
return result |
Thank you for the coding. It works perfectly after modifying from:
subsets = [itemset for itemset in itemsets if itemset < trans]
to:
subsets = [itemset for itemset in itemsets if itemset <= trans]
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I wanted to know what is the data type of variable "transactions" in the above code. Also please tell me how to call the function Apriori.