Skip to content

Instantly share code, notes, and snippets.

@brooksandrew
Last active May 19, 2022 08:14
Show Gist options
  • Star 6 You must be signed in to star a gist
  • Fork 5 You must be signed in to fork a gist
  • Save brooksandrew/706a28f832a33e90283b to your computer and use it in GitHub Desktop.
Save brooksandrew/706a28f832a33e90283b to your computer and use it in GitHub Desktop.
Launches a Shiny App that provides an interactive interface to the arules and arulesViz package which train and visualize association rules
#' @title Assocation Rules Visualization Shiny App
#' @description Launches a Shiny App that provides an interactive interface to the visualizations of the \code{arulesViz} package.
#' The app allows users to mine rules based on all or just subsets of features, sort by criteria (lift, support, confidence) and visualize
#' using network graph, grouped bubble and scatter plots. \cr
#' Users filter rules to target only those with a certain variable on the RHS or LHS of the rule.
#' Rule mining is computed using the \link{apriori} algorithm from \code{arules}.
#'
#' @param dataset data.frame, this is the dataset that association rules will be mined from. Each row is treated as a transaction. Seems to work
#' OK when a the S4 transactions class from \code{arules} is used, however this is not thoroughly tested.
#' @param bin logical, \code{TRUE} will automatically discretize/bin numerical data into categorical features that can be used for association analysis.
#' @param vars integer, how many variables to include in initial rule mining
#' @param supp numeric, the support parameter for initializing visualization. Useful when it is known that a high support is needed to not crash computationally.
#' @param conf numeric, the confidence parameter for initializing visualization. Similarly useful when it is known that a high confidence is needed to not crash computationally.
#' @seealso \code{arulesViz}, \code{arules}
#' @return Shiny App
#' @import shiny arulesViz arules
#' @export
#'
#' @examples
#' ## creating some data
#' n <- 10000 # of obs
#' d <- data.frame(
#' eye = sample(c('brown', 'green', 'blue', 'hazel'), n, replace=T),
#' gender = sample(c('male', 'female'), n, replace=T),
#' height = sort(sample(c('dwarf', 'short', 'average', 'above average', 'giant'), n, replace=T)),
#' wealth = sort(sample(c('poor', 'struggling', 'middle', 'uppermiddle', 'comfortable', 'rich', '1%', 'millionaire', 'billionaire'), n, replace=T)),
#' favoriteAnimal = sample(c('dog', 'cat', 'bat', 'frog', 'lion', 'cheetah', 'lion', 'walrus', 'squirrel'), n, replace=T),
#' numkids = abs(round(rnorm(n, 2, 1)))
#' )
#'
#' ## adding some pattern
#' d$numkids[d$gender=='male'] <- d$numkids[d$gender=='male'] + sample(0:3, sum(d$gender=='male'), replace=T)
#' d$numkids <- factor(d$numkids)
#'
#' ## calling Shiny App to visualize association rules
#' arulesApp(d)
# dependencies:
devtools::source_url('https://raw.githubusercontent.com/brooksandrew/Rsenal/master/R/rules2df.R')
devtools::source_url('https://raw.githubusercontent.com/brooksandrew/Rsenal/master/R/bin.R')
arulesApp <- function (dataset, bin=T, vars=5, supp=0.1, conf=0.5) {
## binning numeric data
for(i in 1:ncol(dataset)) {
if(class(dataset[,i]) %in% c('numeric', 'integer')) dataset[,i] <- Rsenal::depthbin(dataset[,i], nbins=10)
}
## calling Shiny App
shinyApp(ui = shinyUI(pageWithSidebar(
headerPanel("Association Rules"),
sidebarPanel(
conditionalPanel(
condition = "input.samp=='Sample'",
numericInput("nrule", 'Number of Rules', 5), br()
),
conditionalPanel(
condition = "input.mytab=='graph'",
radioButtons('graphType', label='Graph Type', choices=c('itemsets','items'), inline=T), br()
),
conditionalPanel(
condition = "input.lhsv=='Subset'",
uiOutput("choose_lhs"), br()
),
conditionalPanel(
condition = "input.rhsv=='Subset'",
uiOutput("choose_rhs"), br()
),
conditionalPanel(
condition = "input.mytab=='grouped'",
sliderInput('k', label='Choose # of rule clusters', min=1, max=150, step=1, value=15), br()
),
conditionalPanel(
condition = "input.mytab %in%' c('grouped', 'graph', 'table', 'datatable', 'scatter', 'paracoord', 'matrix', 'itemFreq')",
radioButtons('samp', label='Sample', choices=c('All Rules', 'Sample'), inline=T), br(),
uiOutput("choose_columns"), br(),
sliderInput("supp", "Support:", min = 0, max = 1, value = supp , step = 1/10000), br(),
sliderInput("conf", "Confidence:", min = 0, max = 1, value = conf , step = 1/10000), br(),
selectInput('sort', label='Sorting Criteria:', choices = c('lift', 'confidence', 'support')), br(), br(),
numericInput("minL", "Min. items per set:", 2), br(),
numericInput("maxL", "Max. items per set::", 3), br(),
radioButtons('lhsv', label='LHS variables', choices=c('All', 'Subset')), br(),
radioButtons('rhsv', label='RHS variables', choices=c('All', 'Subset')), br(),
downloadButton('downloadData', 'Download Rules as CSV')
)
),
mainPanel(
tabsetPanel(id='mytab',
tabPanel('Grouped', value='grouped', plotOutput("groupedPlot", width='100%', height='100%')),
tabPanel('Graph', value='graph', plotOutput("graphPlot", width='100%', height='100%')),
tabPanel('Scatter', value='scatter', plotOutput("scatterPlot", width='100%', height='100%')),
tabPanel('Parallel Coordinates', value='paracoord', plotOutput("paracoordPlot", width='100%', height='100%')),
tabPanel('Matrix', value='matrix', plotOutput("matrixPlot", width='100%', height='100%')),
tabPanel('ItemFreq', value='itemFreq', plotOutput("itemFreqPlot", width='100%', height='100%')),
tabPanel('Table', value='table', verbatimTextOutput("rulesTable")),
tabPanel('Data Table', value='datatable', dataTableOutput("rulesDataTable"))
)
)
)),
server = function(input, output) {
output$choose_columns <- renderUI({
checkboxGroupInput("cols", "Choose variables:",
choices = colnames(dataset),
selected = colnames(dataset)[1:vars])
})
output$choose_lhs <- renderUI({
checkboxGroupInput("colsLHS", "Choose LHS variables:",
choices = input$cols,
selected = input$cols[1])
})
output$choose_rhs <- renderUI({
checkboxGroupInput("colsRHS", "Choose RHS variables:",
choices = input$cols,
selected = input$cols[1])
})
## Extracting and Defining arules
rules <- reactive({
tr <- as(dataset[,input$cols], 'transactions')
arAll <- apriori(tr, parameter=list(support=input$supp, confidence=input$conf, minlen=input$minL, maxlen=input$maxL))
if(input$rhsv=='Subset' & input$lhsv!='Subset'){
varsR <- character()
for(i in 1:length(input$colsRHS)){
tmp <- with(dataset, paste(input$colsRHS[i], '=', levels(as.factor(get(input$colsRHS[i]))), sep=''))
varsR <- c(varsR, tmp)
}
ar <- subset(arAll, subset=rhs %in% varsR)
} else if(input$lhsv=='Subset' & input$rhsv!='Subset') {
varsL <- character()
for(i in 1:length(input$colsLHS)){
tmp <- with(dataset, paste(input$colsLHS[i], '=', levels(as.factor(get(input$colsLHS[i]))), sep=''))
varsL <- c(varsL, tmp)
}
ar <- subset(arAll, subset=lhs %in% varsL)
} else if(input$lhsv=='Subset' & input$rhsv=='Subset') {
varsL <- character()
for(i in 1:length(input$colsLHS)){
tmp <- with(dataset, paste(input$colsLHS[i], '=', levels(as.factor(get(input$colsLHS[i]))), sep=''))
varsL <- c(varsL, tmp)
}
varsR <- character()
for(i in 1:length(input$colsRHS)){
tmp <- with(dataset, paste(input$colsRHS[i], '=', levels(as.factor(get(input$colsRHS[i]))), sep=''))
varsR <- c(varsR, tmp)
}
ar <- subset(arAll, subset=lhs %in% varsL & rhs %in% varsR)
} else {
ar <- arAll
}
quality(ar)$conviction <- interestMeasure(ar, method='conviction', transactions=tr)
quality(ar)$hyperConfidence <- interestMeasure(ar, method='hyperConfidence', transactions=tr)
quality(ar)$cosine <- interestMeasure(ar, method='cosine', transactions=tr)
quality(ar)$chiSquare <- interestMeasure(ar, method='chiSquare', transactions=tr)
quality(ar)$coverage <- interestMeasure(ar, method='coverage', transactions=tr)
quality(ar)$doc <- interestMeasure(ar, method='doc', transactions=tr)
quality(ar)$gini <- interestMeasure(ar, method='gini', transactions=tr)
quality(ar)$hyperLift <- interestMeasure(ar, method='hyperLift', transactions=tr)
ar
})
# Rule length
nR <- reactive({
nRule <- ifelse(input$samp == 'All Rules', length(rules()), input$nrule)
})
## Grouped Plot #########################
output$groupedPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='grouped', control=list(k=input$k))
}, height=800, width=800)
## Graph Plot ##########################
output$graphPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='graph', control=list(type=input$graphType))
}, height=800, width=800)
## Scatter Plot ##########################
output$scatterPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='scatterplot')
}, height=800, width=800)
## Parallel Coordinates Plot ###################
output$paracoordPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='paracoord')
}, height=800, width=800)
## Matrix Plot ###################
output$matrixPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='matrix', control=list(reorder=T))
}, height=800, width=800)
## Item Frequency Plot ##########################
output$itemFreqPlot <- renderPlot({
trans <- as(dataset[,input$cols], 'transactions')
itemFrequencyPlot(trans)
}, height=800, width=800)
## Rules Data Table ##########################
output$rulesDataTable <- renderDataTable({
ar <- rules()
rulesdt <- rules2df(ar)
rulesdt
})
## Rules Printed ########################
output$rulesTable <- renderPrint({
#hack to disply results... make sure this match line above!!
#ar <- apriori(dataset[,input$cols], parameter=list(support=input$supp, confidence=input$conf, minlen=input$minL, maxlen=input$maxL))
ar <- rules()
inspect(sort(ar, by=input$sort))
})
## Download data to csv ########################
output$downloadData <- downloadHandler(
filename = 'arules_data.csv',
content = function(file) {
write.csv(rules2df(rules()), file)
}
)
}
)
}
@helciopdelima
Copy link

Hi Andrew. Thanks for your arulesApp(), it's really amazing. I have a problem to publish into shinyapps.io. When I run the app locally, it works fine but when I publish into shinyapps, the interface is ok but no data is presented. Could you please help me with that? I'm trying to do an app to analyze rules using the web and your app will be unbelievable for that!!

These is the modified code that I'm using to do that:

library(shiny)
library (arules)
library (arulesSequences)
library (arulesViz)
library(shinydashboard)
library(shinyapps)
library(googleVis)

data('Adult')

dataset <- AdultUCI

rules2df <- function(rules, list=F){
df <- as(rules, 'data.frame')
df[,1] <- as.character(df[,1])
df$lhs <- sapply(df[,1], function(x) strsplit(x, split=' => ')[[1]][1])
df$rhs <- sapply(df[,1], function(x) strsplit(x, split=' => ')[[1]][2])
df$lhs <- gsub(pattern='\{', replacement='', x=df$lhs)
df$lhs <- gsub(pattern='}', replacement='', x=df$lhs)
df$rhs <- gsub(pattern='\{', replacement='', x=df$rhs)
df$rhs <- gsub(pattern='}', replacement='', x=df$rhs)

if(list==T){
p <- rules@lhs@data@p
i <- rules@lhs@data@i+1
lhsItems <- unlist(rules@lhs@itemInfo@.Data)
lhsL <- list()
for(j in 2:length(p)) lhsL[[j-1]] <- lhsItems[i[(p[j-1]+1):(p[j])]]
df$lhs <- lhsL

p <- rules@rhs@data@p
i <- rules@rhs@data@i+1
rhsItems <- unlist(rules@rhs@itemInfo@.Data)
rhsL <- list()
for(j in 2:length(p)) rhsL[[j-1]] <- rhsItems[i[(p[j-1]+1):(p[j])]]
df$rhs <- rhsL

}
return(df)
}

depthbin <- function(ser, nbins=10, qtype=7, digits=10, labelRange=T, labelPct=F, labelOrder=F) {
cutpts <- quantile(ser, probs=seq(0, 1, 1/nbins), na.rm=T, type=qtype)
if(length(unique(cutpts))==nbins+1) {
returnser <- cut(ser, breaks=cutpts, right=T, include.lowest=T)
} else {
alldup <- vector()
while(length(unique(cutpts))+length(alldup) < nbins+1) {
dup <- cutpts[duplicated(cutpts)]
dups <- unique(dup)
alldup <- c(alldup, dups)
dupL <- length(alldup) + length(dups)
ser2 <- ser[which(!ser %in% alldup)]
cutpts <- quantile(ser2, probs=seq(0, 1, 1/(nbins-length(dups))), na.rm=T, type=qtype)
}
cutpts <- c(unique(cutpts), alldup)
returnser <- cut(ser, breaks=cutpts, include.lowest=T, dig.lab=digits, right=F)
}
if(sum(labelRange, labelPct, labelOrder)==0) {
labelRange <- T
warning('arguments labelRange, labelOrder, labelPct should not all be set to FALSE. Setting labelRange to TRUE.')
}
rawlev <- levels(returnser)
if (labelRange==T) levels(returnser) <- paste0(levels(returnser), rawlev)
if (labelOrder==T) levels(returnser) <- paste0(levels(returnser), ' ', 1:length(rawlev), '/', length(rawlev))
if (labelPct==T) levels(returnser) <- paste0(levels(returnser), ' ', paste0('(', as.character(round(table(returnser)/length(returnser)*100, 1)), '%)'))
for(i in 1:length(levels(returnser))) levels(returnser)[i] <- substr(levels(returnser)[i], nchar(rawlev[i])+1, nchar(levels(returnser)[i]))
return(returnser)
}

roundCut <- function(x, r=1){
x <- as.character(x)
b <- substr(x,0,1)
e <- substr(x, nchar(x), nchar(x))
xx <- substr(x, 2, nchar(x)-1)
xx1 <- round(as.numeric(sapply(xx, function(z) strsplit(z, ',')[[1]][1])), r)
xx2 <- round(as.numeric(sapply(xx, function(z) strsplit(z, ',')[[1]][2])), r)
return(paste(b, xx1, ', ', xx2, e, sep=''))
}

binCat <- function(x, ncat=NULL, maxp=NULL, results=F, setNA=NA, keepNA=F) {
if(is.null(maxp)==F & is.null(ncat)==F) warning("Parameters 'ncat' and 'maxp' are both specified. It is advisable to only specify one of these criteria. Algorithm will stop at the first criteria met.")
if(is.na(setNA)==F) x[is.na(x)] <- setNA

ncat <- min(ncat, length(unique(x)))
x <- as.character(x)
n <- length(x)
if(is.null(maxp)) maxp <- 1

for(i in 1:length(unique(x))){
xc <- x
x1 <- sort(table(xc, exclude=NULL), decreasing=T)[1:i]
catp <- sum(x1)/n
if(i==ncat | catp>maxp) {
x2 <- sort(table(xc, exclude=NULL), decreasing=T)[1:(i+1)]
if(keepNA==T) {xc[which(!xc %in% c(names(x2), setNA))] <- 'other'
} else {xc[which(!xc %in% names(x2))] <- 'other'}
returnser <- xc
break
}
}
if(results==T) print(sort(table(returnser)/n, decreasing=T))
return(returnser)
}

supp <- 0.1

conf <- 0.5

vars <- 15

bin <- T

for(i in 1:ncol(dataset)) {
if(class(dataset[,i]) %in% c('numeric', 'integer')) dataset[,i] <- depthbin(dataset[,i], nbins=10)
}

ui <- shinyUI(pageWithSidebar(

headerPanel("TTL Drive"),

sidebarPanel(

conditionalPanel(
  condition = "input.samp=='Amostra'",
  numericInput("nrule", 'Numero de Regras', 5), br()
),

conditionalPanel(
  condition = "input.mytab=='graph'",
  radioButtons('graphType', label='Tipo de Grafico', choices=c('itemsets','items'), inline=T), br()
),

conditionalPanel(
  condition = "input.lhsv=='Subset'", 
  uiOutput("choose_lhs"), br()
),

conditionalPanel(
  condition = "input.rhsv=='Subset'", 
  uiOutput("choose_rhs"), br()
),

conditionalPanel(
  condition = "input.mytab=='grouped'",
  sliderInput('k', label='Selecione # de grupos de regras', min=1, max=150, step=1, value=15), br()
),

conditionalPanel(
  condition = "input.mytab %in%' c('grouped', 'graph', 'table', 'datatable', 'scatter', 'paracoord', 'matrix', 'itemFreq')", 
  radioButtons('samp', label='Amostra', choices=c('Todas as regras', 'Amostra'), inline=T), br(),
  uiOutput("choose_columns"), br(),
  sliderInput("supp", "Popularidade da Regra (support):", min = 0, max = 1, value = supp , step = 1/10000), br(),
  sliderInput("conf", "Certeza da inferencia da Regra (confidence):", min = 0, max = 1, value = conf , step = 1/10000), br(),
  selectInput('sort', label='Criterio de Ordenacao:', choices = c('lift', 'confidence', 'support')), br(), br(),
  numericInput("minL", "Min. itens por regra:", 2), br(), 
  numericInput("maxL", "Max. itens por regra:", 3), br(),
  radioButtons('lhsv', label='variaveis LHS', choices=c('All', 'Subset')), br(),
  radioButtons('rhsv', label='variaveis RHS', choices=c('All', 'Subset')), br()
)

),

mainPanel(
tabsetPanel(id='mytab',
tabPanel('GruposRegras', value='grouped', plotOutput("groupedPlot", width='100%', height='100%')),
tabPanel('Grafo', value='graph', plotOutput("graphPlot", width='100%', height='100%')),
tabPanel('Scatterplot', value='scatter', plotOutput("scatterPlot", width='100%', height='100%')),
tabPanel('CoordenadasParalelas', value='paracoord', plotOutput("paracoordPlot", width='100%', height='100%')),
tabPanel('Matriz', value='matrix', plotOutput("matrixPlot", width='100%', height='100%')),
tabPanel('DistribuicaoFrequencia', value='itemFreq', plotOutput("itemFreqPlot", width='100%', height='100%')),
tabPanel('TabelaRegras', value='table', verbatimTextOutput("rulesTable")),
tabPanel('TabelaDadosRegras', value='datatable', dataTableOutput("rulesDataTable"))
)
)

))

server = function(input, output) {

output$choose_columns <- renderUI({
checkboxGroupInput("cols", "Selecione as variaveis:",
choices = colnames(dataset),
selected = colnames(dataset)[1:vars])
})

output$choose_lhs <- renderUI({
checkboxGroupInput("colsLHS", "Selecione as variaveis LHS:",
choices = input$cols,
selected = input$cols[1])
})

output$choose_rhs <- renderUI({
checkboxGroupInput("colsRHS", "Selecione as variaveis RHS:",
choices = input$cols,
selected = input$cols[1])
})

Extracting and Defining arules

rules <- reactive({
tr <- as(dataset[,input$cols], 'transactions')
arAll <- apriori(tr, parameter=list(support=input$supp, confidence=input$conf, minlen=input$minL, maxlen=input$maxL))

if(input$rhsv=='Subset' & input$lhsv!='Subset'){
  varsR <- character()
  for(i in 1:length(input$colsRHS)){
    tmp <- with(dataset, paste(input$colsRHS[i], '=', levels(as.factor(get(input$colsRHS[i]))), sep=''))
    varsR <- c(varsR, tmp)
  }
  ar <- subset(arAll, subset=rhs %in% varsR)
  
} else if(input$lhsv=='Subset' & input$rhsv!='Subset') {
  varsL <- character()
  for(i in 1:length(input$colsLHS)){
    tmp <- with(dataset, paste(input$colsLHS[i], '=', levels(as.factor(get(input$colsLHS[i]))), sep=''))
    varsL <- c(varsL, tmp)
  }
  ar <- subset(arAll, subset=lhs %in% varsL)
  
} else if(input$lhsv=='Subset' & input$rhsv=='Subset') {
  varsL <- character()
  for(i in 1:length(input$colsLHS)){
    tmp <- with(dataset, paste(input$colsLHS[i], '=', levels(as.factor(get(input$colsLHS[i]))), sep=''))
    varsL <- c(varsL, tmp)
  }
  varsR <- character()
  for(i in 1:length(input$colsRHS)){
    tmp <- with(dataset, paste(input$colsRHS[i], '=', levels(as.factor(get(input$colsRHS[i]))), sep=''))
    varsR <- c(varsR, tmp)
  }
  ar <- subset(arAll, subset=lhs %in% varsL & rhs %in% varsR)
  
} else {
  ar <- arAll
}
quality(ar)$conviction <- interestMeasure(ar, method='conviction', transactions=tr)
quality(ar)$hyperConfidence <- interestMeasure(ar, method='hyperConfidence', transactions=tr)
quality(ar)$cosine <- interestMeasure(ar, method='cosine', transactions=tr)
quality(ar)$chiSquare <- interestMeasure(ar, method='chiSquare', transactions=tr)
quality(ar)$coverage <- interestMeasure(ar, method='coverage', transactions=tr)
quality(ar)$doc <- interestMeasure(ar, method='doc', transactions=tr)
quality(ar)$gini <- interestMeasure(ar, method='gini', transactions=tr)
quality(ar)$hyperLift <- interestMeasure(ar, method='hyperLift', transactions=tr)
ar

})

Rule length

nR <- reactive({
nRule <- ifelse(input$samp == 'Todas as regras', length(rules()), input$nrule)
})

Grouped Plot

output$groupedPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='grouped', control=list(k=input$k))
}, height=800, width=800)

Graph Plot

output$graphPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='graph', control=list(type=input$graphType))
}, height=800, width=800)

Scatter Plot

output$scatterPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='scatterplot')
}, height=800, width=800)

Parallel Coordinates Plot

output$paracoordPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='paracoord')
}, height=800, width=800)

Matrix Plot

output$matrixPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='matrix', control=list(reorder=T))
}, height=800, width=800)

Item Frequency Plot

output$itemFreqPlot <- renderPlot({
trans <- as(dataset[,input$cols], 'transactions')
itemFrequencyPlot(trans)
}, height=800, width=800)

Rules Data Table

output$rulesDataTable <- renderDataTable({
ar <- rules()
rulesdt <- rules2df(ar)
rulesdt
})

Rules Printed

output$rulesTable <- renderPrint({
#hack to disply results... make sure this match line above!!
ar <- apriori(dataset[,input$cols], parameter=list(support=input$supp, confidence=input$conf, minlen=input$minL, maxlen=input$maxL))
ar <- rules()
inspect(sort(ar, by=input$sort))
})

}

shinyApp(ui,server)

Cheers,

@tfuzi
Copy link

tfuzi commented Nov 22, 2017

Hello,
Please it doen't work for me.
Could you help me please.
ASPA

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment