Created
April 8, 2017 03:41
-
-
Save chiaki64/3a43f44a8e53500a749aa2c44e808795 to your computer and use it in GitHub Desktop.
Apriori.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8 -*- | |
from random import randint, sample | |
ALPHABET = list('ABCDE') | |
# 生成数据集 | |
def generate_data(quantity: int = 20): | |
return [sample(ALPHABET, randint(1, len(ALPHABET))) for i in range(quantity)] | |
# 构建单项集C_1 | |
def generate_C1(dateset): | |
C_1 = [] | |
for row in dateset: | |
for item in row: | |
if [item] not in C_1: | |
C_1.append([item]) | |
C_1.sort() | |
return list(map(frozenset, C_1)) | |
# 筛选满足最小支持度的项集 | |
def scan(transactions, C_k, minsup_count): | |
Cnt, res, Freq = {}, [], {} | |
for t in transactions: | |
for c in C_k: | |
if c.issubset(t): | |
if c not in Cnt: | |
Cnt[c] = 1 | |
else: | |
Cnt[c] += 1 | |
for key in Cnt: | |
if Cnt[key] >= minsup_count: | |
res.append(key) | |
Freq[key] = Cnt[key] | |
return res, Freq | |
# 由频繁(k-1)-项集生成候选k-项集 | |
def apriori_gen(L_k, k): | |
res, length = [], len(L_k) | |
for i in range(length): | |
for j in range(i + 1, length): | |
L1 = list(L_k[i])[:k - 2] | |
L2 = list(L_k[j])[:k - 2] | |
L1.sort() | |
L2.sort() | |
if L1 == L2: | |
res.append(L_k[i] | L_k[j]) | |
return res | |
# 寻找频繁项目集 | |
def apriori(dataset, minsup_count): | |
C_1 = generate_C1(dataset) | |
T = list(map(set, dataset)) | |
L_1, Freq = scan(T, C_1, minsup_count) | |
L = [L_1] | |
k = 2 | |
while len(L[k - 2]) > 0: | |
C_k = apriori_gen(L[k - 2], k) | |
L_k, FreqK = scan(T, C_k, minsup_count) | |
Freq.update(FreqK) | |
L.append(L_k) | |
k += 1 | |
return L, Freq | |
data = generate_data() | |
L = apriori(data, 8) | |
[print(f"{item}=>{L[1][item]}") if item in L[1] else None for li in L[0] for item in li] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment