Instantly share code, notes, and snippets.

Embed
What would you like to do?
Apriori.py
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from random import randint, sample
ALPHABET = list('ABCDE')
# 生成数据集
def generate_data(quantity: int = 20):
return [sample(ALPHABET, randint(1, len(ALPHABET))) for i in range(quantity)]
# 构建单项集C_1
def generate_C1(dateset):
C_1 = []
for row in dateset:
for item in row:
if [item] not in C_1:
C_1.append([item])
C_1.sort()
return list(map(frozenset, C_1))
# 筛选满足最小支持度的项集
def scan(transactions, C_k, minsup_count):
Cnt, res, Freq = {}, [], {}
for t in transactions:
for c in C_k:
if c.issubset(t):
if c not in Cnt:
Cnt[c] = 1
else:
Cnt[c] += 1
for key in Cnt:
if Cnt[key] >= minsup_count:
res.append(key)
Freq[key] = Cnt[key]
return res, Freq
# 由频繁(k-1)-项集生成候选k-项集
def apriori_gen(L_k, k):
res, length = [], len(L_k)
for i in range(length):
for j in range(i + 1, length):
L1 = list(L_k[i])[:k - 2]
L2 = list(L_k[j])[:k - 2]
L1.sort()
L2.sort()
if L1 == L2:
res.append(L_k[i] | L_k[j])
return res
# 寻找频繁项目集
def apriori(dataset, minsup_count):
C_1 = generate_C1(dataset)
T = list(map(set, dataset))
L_1, Freq = scan(T, C_1, minsup_count)
L = [L_1]
k = 2
while len(L[k - 2]) > 0:
C_k = apriori_gen(L[k - 2], k)
L_k, FreqK = scan(T, C_k, minsup_count)
Freq.update(FreqK)
L.append(L_k)
k += 1
return L, Freq
data = generate_data()
L = apriori(data, 8)
[print(f"{item}=>{L[1][item]}") if item in L[1] else None for li in L[0] for item in li]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment