Skip to content

Instantly share code, notes, and snippets.

@billju
Created December 1, 2021 03:23
Show Gist options
  • Save billju/ee0ed4d08b974d8ba25e8aebac65e1d5 to your computer and use it in GitHub Desktop.
Save billju/ee0ed4d08b974d8ba25e8aebac65e1d5 to your computer and use it in GitHub Desktop.
import io
import pandas as pd
from itertools import permutations, combinations
CSV = """
交易紀錄 牛奶(A) 麵包(B) 餅乾(C) 柳橙汁(D) 汽水(E) 泡麵(F) 水果(G)
101 1 1 1 1 0 0 0
102 0 1 1 0 1 1 0
103 1 0 1 0 0 0 1
104 1 1 0 1 0 1 1
105 0 0 1 0 1 0 1
"""
df = pd.read_csv(io.StringIO(CSV), sep=' ').drop(columns=['交易紀錄'])
prob = lambda cols: (df[cols].sum(axis=1)==len(cols)).mean() # 計算某集合的出現比例
min_support = 0.4 # 出現比例
min_confidence = 0.8 # A交集B比例 / A比例
cols = [c for c in df.columns if prob([c])>=min_support] # 達標的單一項
comb = [tup for i in range(2,len(cols)+1) for tup in combinations(cols,i) if prob([*tup])>=min_support] # 達標的組合(至少兩個)
perm = [(t[:i],t[i:]) for tup in comb for t in permutations(tup) for i in range(1,len(t))] # 排列組合
result = []
for A,B in perm:
support = prob([*A,*B])
confidence = support/prob([*B])
lift = confidence/prob([*A])
result.append([A,B,support,confidence,lift])
pd.DataFrame(result,columns=['A','B','SUP','CONF','LIFT'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment