Skip to content

Instantly share code, notes, and snippets.

@gluc
Last active September 9, 2015 20:29
Show Gist options
  • Save gluc/be4e684c91e7f1f3dd93 to your computer and use it in GitHub Desktop.
Save gluc/be4e684c91e7f1f3dd93 to your computer and use it in GitHub Desktop.
responses
.Rproj.user
.Rhistory
.RData
#library(devtools)
#devtools::install_github("gluc/data.tree@v0.2.1")
library(data.tree)
#load data
csv <- "location,type,customer,response
suburban,ranch,yes,no
rural,ranch,yes,yes
urban,ranch,yes,no
suburban,multi-story,yes,yes
rural,multi-story,yes,yes
urban,multi-story,yes,no
suburban,ranch,no,no
rural,ranch,no,yes
urban,ranch,no,yes
suburban,multi-story,no,yes
rural,multi-story,no,yes
urban,multi-story,no,yes"
#Convert to tree
df <- read.csv(textConnection(csv), stringsAsFactors = FALSE)
df$pathString <- paste("response", df$location, df$type, df$customer, rownames(df), sep = "/")
responses <- as.Node(df[,c(4,5)])
responses$Set(type = "location", filterFun = function(x) x$level == 2)
responses$Set(type = "type", filterFun = function(x) x$level == 3)
responses$Set(type = "customer", filterFun = function(x) x$level == 4)
#Aggregate responses
Aggregate(responses, "response", function(responses) {
if (all(responses == "yes")) return ("yes")
if (all(responses == "no")) return ("no")
return ("mixed")
}, "response")
#Make a clone, in case we want to compare later
responsesOrig <- Clone(responses)
#Now, we simplify the tree by mergin nodes that are not discriminating
#if two nodes are equal in terms of their child decisions, they are not discriminating
AreNodesEqual <- function(node1, node2) {
n1t <- node1$Get("response", filterFun = isNotLeaf)
n2t <- node2$Get("response", filterFun = isNotLeaf)
names(n1t) <- node1$Get("type", filterFun = isNotLeaf)
names(n2t) <- node2$Get("type", filterFun = isNotLeaf)
isTRUE(all.equal(n1t, n2t))
}
#group siblings into a single else node if they are non-discriminating
GroupNodes <- function(node1, node2) {
CreateElse <- function(node) {
nc <- Clone(node)
nc$parent <- NULL
nc$name <- "else"
nc$type <- "else"
nc$response <- node$response
return (nc)
}
n1c <- CreateElse(node1)
n2c <- CreateElse(node2)
elseNode <- as.Node( rbind(ToDataFrameTree(n1c, "pathString", "type", "response"),
ToDataFrameTree(n2c, "pathString", "type", "response"))
)
parent <- node1$parent
parent$RemoveChild(node1$name)
parent$RemoveChild(node2$name)
parent$AddChildNode(elseNode)
}
GroupChildren <- function(parent) {
if (parent$count <= 1) return()
combos <- combn(lapply(parent$children, function(x) x$name), 2)
for (i in 1: ncol(combos)) {
n1 <- parent$children[[combos[[1, i]]]]
n2 <- parent$children[[combos[[2, i]]]]
if (AreNodesEqual(n1, n2)) {
GroupNodes(n1, n2)
GroupChildren(parent)
break
}
}
}
DoGroupChildren <- function(node) {
if (node$height <= 2) return()
GroupChildren(node)
sapply(node$children, function(x) DoGroupChildren(x))
}
DoGroupChildren(responses)
responses$Do(function(x) x$obsCount <- x$totalCount)
responses$Prune(pruneFun = isNotLeaf)
print(responses, "response", "type", "obsCount")
#print(responsesOrig, "response", "type")
#Finally, print the statement
GetIfStatement <- function(node) {
paste0("IF(", node$type, " = ", node$name, ") ")
}
GetStatement <- function(node) {
paste(c(rev(node$Get(GetIfStatement,
traversal = "ancestor",
filterFun = function(x) !x$isRoot && ( x$type != "else" || x$parent$count > 1) )
),
", THEN (response = ", node$response, ") with ", node$obsCount, " obs"),
collapse = ""
)
}
stmt <- paste(responses$Get(GetStatement, filterFun = isLeaf), collapse = "\n")
cat(stmt)
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment