Skip to content

Instantly share code, notes, and snippets.

@mertedali
Last active August 21, 2019 08:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mertedali/ab7078b9c29dea18c72525239d636b96 to your computer and use it in GitHub Desktop.
Save mertedali/ab7078b9c29dea18c72525239d636b96 to your computer and use it in GitHub Desktop.
#####################################################
#####################################################
### R Script for Metamodeling and Rule Extraction ###
#####################################################
#####################################################
#############################
### Step I: Load Packages ###
#############################
library(e1071) # For support vector regression metamodeling
library(lhs) # For maximin Latin hypercube sampling
library(RNetLogo) # For connecting R to NetLogo
library(rpart) # For decision tree fitting
library(rpart.plot) # For decision tree visualization
#####################################
### Step II: Connect R to NetLogo ###
#####################################
NLStart("/your/directory/here", gui = FALSE, nl.jarname = "netlogo-X.X.X.jar")
NLLoadModel("/your/directory/here/yourmodel.nlogo")
# See examples below:
# NLStart("C:/Program Files/NetLogo 6.0.1/app", gui = FALSE, nl.jarname = "netlogo-6.0.1.jar")
# NLLoadModel("C:/Users/User/Desktop/Traffic Basic.nlogo")
# For detailed information, see RNetLogo document (https://cran.r-project.org/web/packages/RNetLogo/RNetLogo.pdf) and RNetLogo website (http://rnetlogo.r-forge.r-project.org/)
###########################################################
### Step III: Generate a Training Set using Maximin LHS ###
###########################################################
nofinstances = 30 # Number of instances in the training set
nofrep = 30 # Number of replications for each instance
nofparams = 3 # Number of input parameters of the agent-based model
set.seed(1)
dat = maximinLHS(n = nofinstances, k = nofparams, dup = 5)
# Since maximinLHS generates a sample whose interval is [0, 1] in each dimension,
# we map each parameter to its respective interval.
# See the example below:
training = matrix(NA, nrow = nrow(dat), ncol = (ncol(dat) + 1)) # Generate a matrix of NA's
training[, 1] = round(qunif(dat[, 1], 1, 41)) # Number of cars: [1, 41]
training[, 2] = qunif(dat[, 2], 0, 0.01) # Acceleration: [0, 0.01]
training[, 3] = qunif(dat[, 3], 0, 0.1) # Deceleration: [0, 0.1]
colnames(training) = c("nofcars", "acc", "dec", "avg_sp")
for (i in 1:nrow(training)){
result = matrix(NA, ncol = nofrep, nrow = 1) # Save the result of each replication
for (j in 1:nofrep){
NLCommand("set number-of-cars", training[i, 1])
NLCommand("set acceleration", training[i, 2])
NLCommand("set deceleration", training[i, 3])
NLCommand("setup")
ts = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE)
result[j] = mean(ts[, 1])
}
training[i, 4] = mean(result)
}
######################################
### Step IV: Develop the Metamodel ###
######################################
# Generate the error function (RMSE)
rmse = function(real, pred){
error = pred - real
return(sqrt(mean(error^2)))
}
# Tune the hyperparameters of support vector regression model with radial basis kernel
# Then, train the metamodel with the optimum hyperparameter combination
tuning = tune(svm, avg_sp ~ nofcars + acc + dec, data = as.data.frame(training), ranges = list(cost = 10^(-3:3), epsilon = 10^(-3:0), gamma = 10^(-3:3)), tunecontrol = tune.control(sampling = "cross", cross = nrow(training), error.fun = rmse))
s = svm(avg_sp ~ nofcars + acc + dec, data = training, cost = as.numeric(tuning$best.parameters$cost), epsilon = as.numeric(tuning$best.parameters$epsilon), gamma = as.numeric(tuning$best.parameters$gamma), kernel = "radial", type = "eps-regression")
# Generate a matrix of an unlabeled instance and predict its output
test = matrix(NA, ncol = 3, nrow = 1)
colnames(test) = c("nofcars", "acc", "dec")
test[1, 1] = 28
test[1, 2] = 0.005
test[1, 3] = 0.08
predict(s, newdata = test)
# It is also possible to generate a matrix of random instances
# For example, generate 100 random instances and predict their outputs
test = matrix(NA, ncol = 3, nrow = 100)
test[, 1] = round(runif(100, 1, 41)) # Number of cars: [0, 41]
test[, 2] = runif(100, 0, 0.01) # Acceleration: [0, 0.01]
test[, 3] = runif(100, 0, 0.1) # Deceleration: [0, 0.1]
colnames(test) = c("nofcars", "acc", "dec")
predict(s, newdata = test)
###################################
### Step V: Fit a Decision Tree ###
###################################
# First generate an unlabeled dataset
usetsize = 500 # Number of instances in the unlabeled dataset
training_tree = matrix(NA, ncol = 4, nrow = usetsize)
training_tree[, 1] = round(runif(usetsize, 1, 41)) # Number of cars: [0, 41]
training_tree[, 2] = runif(usetsize, 0, 0.01) # Acceleration: [0, 0.01]
training_tree[, 3] = runif(usetsize, 0, 0.1) # Deceleration: [0, 0.1]
colnames(training_tree) = c("nofcars", "acc", "dec", "avg_sp")
# Store the predictions returned by the support vector regression metamodel
training_tree[, 4] = predict(s, newdata = training_tree[, 1:3])
# Fit the decision tree
regtree = rpart(avg_sp ~ nofcars + acc + dec, data = as.data.frame(training_tree))
# Visualize the fitted tree
rpart.plot(regtree, extra = 1, tweak = 1.1, digits = 3)
###############################
### Step VI: List the Rules ###
###############################
frm = regtree$frame
frmsorted = frm[order(frm[, "yval"]), ]
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ]))
rn = rev(rn)
ruleset = NULL
for (i in 1:length(rn)){
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE)
p = unlist(p)[-1:-2]
ruleset = rbind(ruleset, c(i, paste("IF", paste(p, collapse=" AND ")), round(frm[toString(rn[i]), "yval"], 4)))
}
colnames(ruleset) = c("RuleID", "Rule", "Output")
ruleset = as.data.frame(ruleset)
print(ruleset)
#####################
### Optional Step ###
#####################
# rulesample() function generates unlabeled samples for each rule
# regtree: Regression tree object
# nofparams: Number of input parameters
# bounds: maximum and minimum values of each input parameter
# paramnames: input parameter names
# outputname: model output name
# nofsamples: number of samples (instances) for each rule
rulesample = function(regtree, nofparams, bounds, paramnames, outputname, nofsamples){
frm = regtree$frame
frmsorted = frm[order(frm[, "yval"]), ]
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ]))
rn = rev(rn)
dataset = NULL
for (i in 1:length(rn)){
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE)
p = unlist(p)[-1:-2]
evl = NULL
boundstemp = bounds
boundstemp = as.data.frame(boundstemp)
colnames(boundstemp) = c("lower", "upper")
rownames(boundstemp) = paramnames
for (j in 1:length(p)){
upper = NA
lower = NA
if(length(unlist(strsplit(as.character(p[j]), "<"))) > 1){
upper = as.numeric(unlist(strsplit(as.character(p[j]), "<"))[2])
evl = rbind(evl, c(unlist(strsplit(as.character(p[j]), "<"))[1], boundstemp[unlist(strsplit(as.character(p[j]), "<"))[1], "lower"], upper))
} else{
lower = as.numeric(unlist(strsplit(as.character(p[j]), ">="))[2])
evl = rbind(evl, c(unlist(strsplit(as.character(p[j]), ">="))[1], lower, boundstemp[unlist(strsplit(as.character(p[j]), ">="))[1], "upper"]))
}
}
for(k in 1:nofparams){
boundselect = subset(evl, evl[, 1] == rownames(boundstemp)[k])
if(nrow(boundselect) > 0)
boundstemp[k, c(1:2)] = c(max(boundselect[, 2]), min(boundselect[, 3]))
}
dataset = rbind(dataset, cbind(i, sample(ceiling(as.numeric(boundstemp[1, 1])):floor(as.numeric(boundstemp[1, 2])), nofsamples, replace = TRUE), runif(nofsamples, as.numeric(boundstemp[2, 1]), as.numeric(boundstemp[2, 2])), runif(nofsamples, as.numeric(boundstemp[3, 1]), as.numeric(boundstemp[3, 2]))))
}
colnames(dataset) = c("RuleID", paramnames)
dataset = cbind(dataset, NA)
colnames(dataset)[nofparams + 2] = outputname
return(as.data.frame(dataset))
}
# Example
bounds = rbind(c(1, 41), c(0, 0.01), c(0, 0.1))
unl = rulesample(regtree, 3, bounds, c("nofcars", "acc", "dec"), "avg_sp", 10)
# Obtain the actual simulation model outputs of the unlabeled instances
nofrep = 5
for (i in 1:nrow(unl)){
res = matrix(NA, ncol = nofrep, nrow = 1)
for (j in 1:nofrep){
NLCommand("set number-of-cars", unl[i, 2])
NLCommand("set acceleration", unl[i, 3])
NLCommand("set deceleration", unl[i, 4])
NLCommand("setup")
th = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE)
res[j] = mean(th[, 1])
}
unl[i, 5] = mean(res)
}
# Plot
w = 10
h = 8
x11(width = w / 2.54, height = h / 2.54)
par(cex = 1, mar = c(2.5, 2.5 , 1, 1), mgp = c(1.5,0.5,0), tcl = -0.3)
boxplot(unl$avg_sp ~ unl$RuleID, xlab = "Decision Rule Number", ylab = "Output")
# Alternative approach for ruleset() function
# Generate a pool of unlabeled instances (e.g., 1000 unlabeled instances)
set.seed(1000)
unl = matrix(NA, nrow = 1000, ncol = 3)
unl[, 1] = sample(1:41, 1000, replace = TRUE)
unl[, 2] = runif(1000, 0, 0.01)
unl[, 3] = runif(1000, 0, 0.1)
unl = as.data.frame(unl)
colnames(unl) = c("nofcars", "acc", "dec")
# For each rule, randomly select 10 unlabeled instances
frm = regtree$frame
frmsorted = frm[order(frm[, "yval"]), ]
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ]))
rn = rev(rn)
unlfinal = NULL
for (i in 1:length(rn)){
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE)
p = unlist(p)[-1:-2]
txt = NULL
evl = NULL
evlfinal = NULL
for (j in 1:length(p)){
txt = rbind(txt, paste("unl$", p[j]))
evl = cbind(evl, eval(parse(text = txt[j])))
}
for (k in 1:nrow(unl))
evlfinal = rbind(evlfinal, all(evl[k, ]))
unlfinal = rbind(unlfinal, cbind(unl[sample(which(evlfinal == TRUE), 10), ], i, paste(p, collapse=" AND ")))
}
colnames(unlfinal)[4:5] = c("RuleID", "Rule")
unlfinal = cbind(unlfinal, NA)
colnames(unlfinal)[6] = "avg_sp"
# Obtain the actual simulation model outputs of the unlabeled instances
nofrep = 5
for (i in 1:nrow(unlfinal)){
res = matrix(NA, ncol = nofrep, nrow = 1)
for (j in 1:nofrep){
NLCommand("set number-of-cars", unlfinal[i, 1])
NLCommand("set acceleration", unlfinal[i, 2])
NLCommand("set deceleration", unlfinal[i, 3])
NLCommand("setup")
th = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE)
res[j] = mean(th[, 1])
}
unlfinal[i, 6] = mean(res)
}
# Plot
w = 10
h = 8
x11(width = w / 2.54, height = h / 2.54)
par(cex = 1, mar = c(2.5, 2.5 , 1, 1), mgp = c(1.5,0.5,0), tcl = -0.3)
boxplot(unlfinal$avg_sp ~ unlfinal$RuleID, xlab = "Decision Rule Number", ylab = "Output")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment