Skip to content

Instantly share code, notes, and snippets.

@axjack
Created August 3, 2019 09:23
Show Gist options
  • Save axjack/066a79ad07bde41fcd9dc07d9719988e to your computer and use it in GitHub Desktop.
Save axjack/066a79ad07bde41fcd9dc07d9719988e to your computer and use it in GitHub Desktop.
アソシエーション分析の断片。length == 1を除外すると、transaction数を減らすことができる。
# load library ####
library(dplyr)
library(readr)
rm(list=ls())
# single形式のCSVファイルを読み込む
mytemp <- read_csv(file="dat/dat_single.v2.csv"
,col_types = c("cc")
,col_names = c("tranID","item")
,quote = '\"'
#,skip = 1 #ヘッダがある場合は1行目をスキップする
)
# transactionの長さ1ではないものを抽出し
mytemp %>% unique() %>% group_by(tranID) %>% filter( n() != 1 ) -> mytemp.f
# ファイルに書き出す
write_csv(mytemp.f,path="dat/dat_single.f.csv", col_names = FALSE)
###########################
# アソシエーション分析 ####
###########################
# load library ####
library(arules)
# load CSV ####
# (1)元データ
dsv2 <- read.transactions(
file = "dat/dat_single.v2.csv"
, format = "single"
, sep = ","
, cols = c(1,2)
, rm.duplicates = T
# , skip = 1
)
# (1)元データ加工後
dsf <- read.transactions(
file = "dat/dat_single.f.csv"
, format = "single"
, sep = ","
, cols = c(1,2)
, rm.duplicates = T
# , skip = 1
)
summary(dsv2)
summary(dsf)
aparam <- list(minlen=2,maxlen=2)
# length 制約なし
dsv2.a <- apriori(dsv2, parameter = list(support=0.01) )
# length == 1 を除外
dsf.a <- apriori(dsf, parameter = list(support=0.01) )
inspect( subset(dsv2.a, subset= lhs %ain% c('おむつ','ビール') ) )
inspect( subset(dsf.a, subset= lhs %ain% c('ビール','おむつ' ) ) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment