Skip to content

Instantly share code, notes, and snippets.

@semenoffalex
Forked from brooksandrew/shiny_arules.R
Created December 31, 2017 18:56
Show Gist options
  • Save semenoffalex/2e440a87002785fdd40953f3261faf30 to your computer and use it in GitHub Desktop.
Save semenoffalex/2e440a87002785fdd40953f3261faf30 to your computer and use it in GitHub Desktop.
Launches a Shiny App that provides an interactive interface to the arules and arulesViz package which train and visualize association rules
#' @title Assocation Rules Visualization Shiny App
#' @description Launches a Shiny App that provides an interactive interface to the visualizations of the \code{arulesViz} package.
#' The app allows users to mine rules based on all or just subsets of features, sort by criteria (lift, support, confidence) and visualize
#' using network graph, grouped bubble and scatter plots. \cr
#' Users filter rules to target only those with a certain variable on the RHS or LHS of the rule.
#' Rule mining is computed using the \link{apriori} algorithm from \code{arules}.
#'
#' @param dataset data.frame, this is the dataset that association rules will be mined from. Each row is treated as a transaction. Seems to work
#' OK when a the S4 transactions class from \code{arules} is used, however this is not thoroughly tested.
#' @param bin logical, \code{TRUE} will automatically discretize/bin numerical data into categorical features that can be used for association analysis.
#' @param vars integer, how many variables to include in initial rule mining
#' @param supp numeric, the support parameter for initializing visualization. Useful when it is known that a high support is needed to not crash computationally.
#' @param conf numeric, the confidence parameter for initializing visualization. Similarly useful when it is known that a high confidence is needed to not crash computationally.
#' @seealso \code{arulesViz}, \code{arules}
#' @return Shiny App
#' @import shiny arulesViz arules
#' @export
#'
#' @examples
#' ## creating some data
#' n <- 10000 # of obs
#' d <- data.frame(
#' eye = sample(c('brown', 'green', 'blue', 'hazel'), n, replace=T),
#' gender = sample(c('male', 'female'), n, replace=T),
#' height = sort(sample(c('dwarf', 'short', 'average', 'above average', 'giant'), n, replace=T)),
#' wealth = sort(sample(c('poor', 'struggling', 'middle', 'uppermiddle', 'comfortable', 'rich', '1%', 'millionaire', 'billionaire'), n, replace=T)),
#' favoriteAnimal = sample(c('dog', 'cat', 'bat', 'frog', 'lion', 'cheetah', 'lion', 'walrus', 'squirrel'), n, replace=T),
#' numkids = abs(round(rnorm(n, 2, 1)))
#' )
#'
#' ## adding some pattern
#' d$numkids[d$gender=='male'] <- d$numkids[d$gender=='male'] + sample(0:3, sum(d$gender=='male'), replace=T)
#' d$numkids <- factor(d$numkids)
#'
#' ## calling Shiny App to visualize association rules
#' arulesApp(d)
# dependencies:
devtools::source_url('https://raw.githubusercontent.com/brooksandrew/Rsenal/master/R/rules2df.R')
devtools::source_url('https://raw.githubusercontent.com/brooksandrew/Rsenal/master/R/bin.R')
arulesApp <- function (dataset, bin=T, vars=5, supp=0.1, conf=0.5) {
## binning numeric data
for(i in 1:ncol(dataset)) {
if(class(dataset[,i]) %in% c('numeric', 'integer')) dataset[,i] <- Rsenal::depthbin(dataset[,i], nbins=10)
}
## calling Shiny App
shinyApp(ui = shinyUI(pageWithSidebar(
headerPanel("Association Rules"),
sidebarPanel(
conditionalPanel(
condition = "input.samp=='Sample'",
numericInput("nrule", 'Number of Rules', 5), br()
),
conditionalPanel(
condition = "input.mytab=='graph'",
radioButtons('graphType', label='Graph Type', choices=c('itemsets','items'), inline=T), br()
),
conditionalPanel(
condition = "input.lhsv=='Subset'",
uiOutput("choose_lhs"), br()
),
conditionalPanel(
condition = "input.rhsv=='Subset'",
uiOutput("choose_rhs"), br()
),
conditionalPanel(
condition = "input.mytab=='grouped'",
sliderInput('k', label='Choose # of rule clusters', min=1, max=150, step=1, value=15), br()
),
conditionalPanel(
condition = "input.mytab %in%' c('grouped', 'graph', 'table', 'datatable', 'scatter', 'paracoord', 'matrix', 'itemFreq')",
radioButtons('samp', label='Sample', choices=c('All Rules', 'Sample'), inline=T), br(),
uiOutput("choose_columns"), br(),
sliderInput("supp", "Support:", min = 0, max = 1, value = supp , step = 1/10000), br(),
sliderInput("conf", "Confidence:", min = 0, max = 1, value = conf , step = 1/10000), br(),
selectInput('sort', label='Sorting Criteria:', choices = c('lift', 'confidence', 'support')), br(), br(),
numericInput("minL", "Min. items per set:", 2), br(),
numericInput("maxL", "Max. items per set::", 3), br(),
radioButtons('lhsv', label='LHS variables', choices=c('All', 'Subset')), br(),
radioButtons('rhsv', label='RHS variables', choices=c('All', 'Subset')), br(),
downloadButton('downloadData', 'Download Rules as CSV')
)
),
mainPanel(
tabsetPanel(id='mytab',
tabPanel('Grouped', value='grouped', plotOutput("groupedPlot", width='100%', height='100%')),
tabPanel('Graph', value='graph', plotOutput("graphPlot", width='100%', height='100%')),
tabPanel('Scatter', value='scatter', plotOutput("scatterPlot", width='100%', height='100%')),
tabPanel('Parallel Coordinates', value='paracoord', plotOutput("paracoordPlot", width='100%', height='100%')),
tabPanel('Matrix', value='matrix', plotOutput("matrixPlot", width='100%', height='100%')),
tabPanel('ItemFreq', value='itemFreq', plotOutput("itemFreqPlot", width='100%', height='100%')),
tabPanel('Table', value='table', verbatimTextOutput("rulesTable")),
tabPanel('Data Table', value='datatable', dataTableOutput("rulesDataTable"))
)
)
)),
server = function(input, output) {
output$choose_columns <- renderUI({
checkboxGroupInput("cols", "Choose variables:",
choices = colnames(dataset),
selected = colnames(dataset)[1:vars])
})
output$choose_lhs <- renderUI({
checkboxGroupInput("colsLHS", "Choose LHS variables:",
choices = input$cols,
selected = input$cols[1])
})
output$choose_rhs <- renderUI({
checkboxGroupInput("colsRHS", "Choose RHS variables:",
choices = input$cols,
selected = input$cols[1])
})
## Extracting and Defining arules
rules <- reactive({
tr <- as(dataset[,input$cols], 'transactions')
arAll <- apriori(tr, parameter=list(support=input$supp, confidence=input$conf, minlen=input$minL, maxlen=input$maxL))
if(input$rhsv=='Subset' & input$lhsv!='Subset'){
varsR <- character()
for(i in 1:length(input$colsRHS)){
tmp <- with(dataset, paste(input$colsRHS[i], '=', levels(as.factor(get(input$colsRHS[i]))), sep=''))
varsR <- c(varsR, tmp)
}
ar <- subset(arAll, subset=rhs %in% varsR)
} else if(input$lhsv=='Subset' & input$rhsv!='Subset') {
varsL <- character()
for(i in 1:length(input$colsLHS)){
tmp <- with(dataset, paste(input$colsLHS[i], '=', levels(as.factor(get(input$colsLHS[i]))), sep=''))
varsL <- c(varsL, tmp)
}
ar <- subset(arAll, subset=lhs %in% varsL)
} else if(input$lhsv=='Subset' & input$rhsv=='Subset') {
varsL <- character()
for(i in 1:length(input$colsLHS)){
tmp <- with(dataset, paste(input$colsLHS[i], '=', levels(as.factor(get(input$colsLHS[i]))), sep=''))
varsL <- c(varsL, tmp)
}
varsR <- character()
for(i in 1:length(input$colsRHS)){
tmp <- with(dataset, paste(input$colsRHS[i], '=', levels(as.factor(get(input$colsRHS[i]))), sep=''))
varsR <- c(varsR, tmp)
}
ar <- subset(arAll, subset=lhs %in% varsL & rhs %in% varsR)
} else {
ar <- arAll
}
quality(ar)$conviction <- interestMeasure(ar, method='conviction', transactions=tr)
quality(ar)$hyperConfidence <- interestMeasure(ar, method='hyperConfidence', transactions=tr)
quality(ar)$cosine <- interestMeasure(ar, method='cosine', transactions=tr)
quality(ar)$chiSquare <- interestMeasure(ar, method='chiSquare', transactions=tr)
quality(ar)$coverage <- interestMeasure(ar, method='coverage', transactions=tr)
quality(ar)$doc <- interestMeasure(ar, method='doc', transactions=tr)
quality(ar)$gini <- interestMeasure(ar, method='gini', transactions=tr)
quality(ar)$hyperLift <- interestMeasure(ar, method='hyperLift', transactions=tr)
ar
})
# Rule length
nR <- reactive({
nRule <- ifelse(input$samp == 'All Rules', length(rules()), input$nrule)
})
## Grouped Plot #########################
output$groupedPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='grouped', control=list(k=input$k))
}, height=800, width=800)
## Graph Plot ##########################
output$graphPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='graph', control=list(type=input$graphType))
}, height=800, width=800)
## Scatter Plot ##########################
output$scatterPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='scatterplot')
}, height=800, width=800)
## Parallel Coordinates Plot ###################
output$paracoordPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='paracoord')
}, height=800, width=800)
## Matrix Plot ###################
output$matrixPlot <- renderPlot({
ar <- rules()
plot(sort(ar, by=input$sort)[1:nR()], method='matrix', control=list(reorder=T))
}, height=800, width=800)
## Item Frequency Plot ##########################
output$itemFreqPlot <- renderPlot({
trans <- as(dataset[,input$cols], 'transactions')
itemFrequencyPlot(trans)
}, height=800, width=800)
## Rules Data Table ##########################
output$rulesDataTable <- renderDataTable({
ar <- rules()
rulesdt <- rules2df(ar)
rulesdt
})
## Rules Printed ########################
output$rulesTable <- renderPrint({
#hack to disply results... make sure this match line above!!
#ar <- apriori(dataset[,input$cols], parameter=list(support=input$supp, confidence=input$conf, minlen=input$minL, maxlen=input$maxL))
ar <- rules()
inspect(sort(ar, by=input$sort))
})
## Download data to csv ########################
output$downloadData <- downloadHandler(
filename = 'arules_data.csv',
content = function(file) {
write.csv(rules2df(rules()), file)
}
)
}
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment