Skip to content

Instantly share code, notes, and snippets.

@mukund-murali
Created July 27, 2018 08:06
Show Gist options
  • Save mukund-murali/e113427e51b083e42bfa61d934a41a22 to your computer and use it in GitHub Desktop.
Save mukund-murali/e113427e51b083e42bfa61d934a41a22 to your computer and use it in GitHub Desktop.
Apriori algorithm implementation
"""
Implements Apriori algorithm as defined in https://www.slideshare.net/INSOFE/apriori-algorithm-36054672
Usage:
from apriori import apriori
dataset = [
[1, 2],
[2, 3],
[1, 3, 4],
[2, 3, 4],
[2, 3, 5],
[2, 3, 5],
]
df = apriori(dataset, 0.2)
print(df)
"""
from collections import defaultdict
from collections import namedtuple
from itertools import combinations
import pandas as pd
FrequentItemset = namedtuple('FrequentItemset', 'itemset support')
FrequentItemsetResponse = namedtuple('FrequentItemsetResponse', 'keys_only itemsets')
def apriori(transactions, min_support):
l1_candidate_itemsets = set()
new_transactions = []
for row in transactions:
new_transactions.append(frozenset(row))
all_frequent_itemsets = {'support': [], 'itemset': []}
# Step 1
for transaction in new_transactions:
for item in transaction:
l1_candidate_itemsets.add(frozenset([item]))
l1_all = _get_frequent_itemsets(l1_candidate_itemsets, new_transactions, min_support)
_append_to_all(all_frequent_itemsets, l1_all)
lk_minus_1 = l1_all.keys_only
while True:
# Step 2
candidate_itemsets = _generate_candidate_itemsets(lk_minus_1)
# Step 3
lk_all = _get_frequent_itemsets(candidate_itemsets, new_transactions, min_support)
_append_to_all(all_frequent_itemsets, lk_all)
lk = lk_all.keys_only
if len(lk) <= 0:
break
lk_minus_1 = lk
df = pd.DataFrame(all_frequent_itemsets)
return df
def _append_to_all(all_frequent_itemsets, lk_all):
for frequent_itemset in lk_all.itemsets:
all_frequent_itemsets['itemset'].append(frequent_itemset.itemset)
all_frequent_itemsets['support'].append(frequent_itemset.support)
def _get_frequent_itemsets(candidate_itemsets, new_transactions, min_support):
num_transactions = float(len(new_transactions))
itemset_counts = defaultdict(int)
for itemset in candidate_itemsets:
for transaction in new_transactions:
if itemset.issubset(transaction):
itemset_counts[itemset] += 1
frequent_itemsets = []
keys = []
for k, v in itemset_counts.iteritems():
support = v / num_transactions
if support < min_support:
continue
frequent_itemsets.append(FrequentItemset(k, support))
keys.append(k)
return FrequentItemsetResponse(keys, frequent_itemsets)
def _generate_candidate_itemsets(lk_minus_1):
if len(lk_minus_1) <= 0:
return set()
candidate_itemsets = set()
prev_level = len(lk_minus_1[0])
for i in range(len(lk_minus_1)):
for j in range(i+1, len(lk_minus_1)):
for item in lk_minus_1[j]:
candidate = lk_minus_1[i].union(frozenset([item]))
# Handles set having same item multiple times.
# Example: Merging {1, 2} + {2, 3} => {1, 3}. This is not a new level item.
if len(candidate) <= prev_level:
continue
# Applying apriori property
for combination in combinations(candidate, prev_level):
if frozenset(combination) not in lk_minus_1:
break
else:
candidate_itemsets.add(candidate)
return candidate_itemsets
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment