Skip to content

Instantly share code, notes, and snippets.

@mertedali mertedali/JASSSCode.r
Last active Jul 13, 2018

Embed
What would you like to do?
#####################################################
#####################################################
### R Script for Metamodeling and Rule Extraction ###
#####################################################
#####################################################
#############################
### Step I: Load Packages ###
#############################
library(e1071) # For support vector regression metamodeling
library(lhs) # For maximin Latin hypercube sampling
library(RNetLogo) # For connecting R to NetLogo
library(rpart) # For decision tree fitting
library(rpart.plot) # For decision tree visualization
#####################################
### Step II: Connect R to NetLogo ###
#####################################
NLStart("/your/directory/here", gui = FALSE, nl.jarname = "netlogo-X.X.X.jar")
NLLoadModel("/your/directory/here/yourmodel.nlogo")
# See examples below:
# NLStart("C:/Program Files/NetLogo 6.0.1/app", gui = FALSE, nl.jarname = "netlogo-6.0.1.jar")
# NLLoadModel("C:/Users/User/Desktop/Traffic Basic.nlogo")
# For detailed information, see RNetLogo document (https://cran.r-project.org/web/packages/RNetLogo/RNetLogo.pdf) and RNetLogo website (http://rnetlogo.r-forge.r-project.org/)
###########################################################
### Step III: Generate a Training Set using Maximin LHS ###
###########################################################
nofinstances = 30 # Number of instances in the training set
nofrep = 30 # Number of replications for each instance
nofparams = 3 # Number of input parameters of the agent-based model
set.seed(1)
dat = maximinLHS(n = nofinstances, k = nofparams, dup = 5)
# Since maximinLHS generates a sample whose interval is [0, 1] in each dimension,
# we map each parameter to its respective interval.
# See the example below:
training = matrix(NA, nrow = nrow(dat), ncol = (ncol(dat) + 1)) # Generate a matrix of NA's
training[, 1] = round(qunif(dat[, 1], 1, 41)) # Number of cars: [1, 41]
training[, 2] = qunif(dat[, 2], 0, 0.01) # Acceleration: [0, 0.01]
training[, 3] = qunif(dat[, 3], 0, 0.1) # Deceleration: [0, 0.1]
colnames(training) = c("nofcars", "acc", "dec", "avg_sp")
for (i in 1:nrow(training)){
result = matrix(NA, ncol = nofrep, nrow = 1) # Save the result of each replication
for (j in 1:nofrep){
NLCommand("set number-of-cars", training[i, 1])
NLCommand("set acceleration", training[i, 2])
NLCommand("set deceleration", training[i, 3])
NLCommand("setup")
ts = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE)
result[j] = mean(ts[, 1])
}
training[i, 4] = mean(result)
}
######################################
### Step IV: Develop the Metamodel ###
######################################
# Generate the error function (RMSE)
rmse = function(real, pred){
error = pred - real
return(sqrt(mean(error^2)))
}
# Tune the hyperparameters of support vector regression model with radial basis kernel
# Then, train the metamodel with the optimum hyperparameter combination
tuning = tune(svm, avg_sp ~ nofcars + acc + dec, data = as.data.frame(training), ranges = list(cost = 10^(-3:3), epsilon = 10^(-3:0), gamma = 10^(-3:3)), tunecontrol = tune.control(sampling = "cross", cross = nrow(training), error.fun = rmse))
s = svm(avg_sp ~ nofcars + acc + dec, data = training, cost = as.numeric(tuning$best.parameters$cost), epsilon = as.numeric(tuning$best.parameters$epsilon), gamma = as.numeric(tuning$best.parameters$gamma), kernel = "radial", type = "eps-regression")
# Generate a matrix of an unlabeled instance and predict its output
test = matrix(NA, ncol = 3, nrow = 1)
colnames(test) = c("nofcars", "acc", "dec")
test[1, 1] = 28
test[1, 2] = 0.005
test[1, 3] = 0.08
predict(s, newdata = test)
# It is also possible to generate a matrix of random instances
# For example, generate 100 random instances and predict their outputs
test = matrix(NA, ncol = 3, nrow = 100)
test[, 1] = round(runif(100, 1, 41)) # Number of cars: [0, 41]
test[, 2] = runif(100, 0, 0.01) # Acceleration: [0, 0.01]
test[, 3] = runif(100, 0, 0.1) # Deceleration: [0, 0.1]
colnames(test) = c("nofcars", "acc", "dec")
predict(s, newdata = test)
###################################
### Step V: Fit a Decision Tree ###
###################################
# First generate an unlabeled dataset
usetsize = 500 # Number of instances in the unlabeled dataset
training_tree = matrix(NA, ncol = 4, nrow = usetsize)
training_tree[, 1] = round(runif(usetsize, 1, 41)) # Number of cars: [0, 41]
training_tree[, 2] = runif(usetsize, 0, 0.01) # Acceleration: [0, 0.01]
training_tree[, 3] = runif(usetsize, 0, 0.1) # Deceleration: [0, 0.1]
colnames(training_tree) = c("nofcars", "acc", "dec", "avg_sp")
# Store the predictions returned by the support vector regression metamodel
training_tree[, 4] = predict(s, newdata = training_tree[, 1:3])
# Fit the decision tree
regtree = rpart(avg_sp ~ nofcars + acc + dec, data = as.data.frame(training_tree))
# Visualize the fitted tree
rpart.plot(regtree, extra = 1, tweak = 1.1, digits = 3)
###############################
### Step VI: List the Rules ###
###############################
frm = regtree$frame
frmsorted = frm[order(frm[, "yval"]), ]
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ]))
rn = rev(rn)
ruleset = NULL
for (i in 1:length(rn)){
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE)
p = unlist(p)[-1:-2]
ruleset = rbind(ruleset, c(i, paste("IF", paste(p, collapse=" AND ")), round(frm[toString(rn[i]), "yval"], 4)))
}
colnames(ruleset) = c("RuleID", "Rule", "Output")
ruleset = as.data.frame(ruleset)
print(ruleset)
#####################
### Optional Step ###
#####################
# rulesample() function generates unlabeled samples for each rule
# regtree: Regression tree object
# nofparams: Number of input parameters
# bounds: maximum and minimum values of each input parameter
# paramnames: input parameter names
# outputname: model output name
# nofsamples: number of samples (instances) for each rule
rulesample = function(regtree, nofparams, bounds, paramnames, outputname, nofsamples){
frm = regtree$frame
frmsorted = frm[order(frm[, "yval"]), ]
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ]))
rn = rev(rn)
dataset = NULL
for (i in 1:length(rn)){
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE)
p = unlist(p)[-1:-2]
evl = NULL
boundstemp = bounds
boundstemp = as.data.frame(boundstemp)
colnames(boundstemp) = c("lower", "upper")
rownames(boundstemp) = paramnames
for (j in 1:length(p)){
upper = NA
lower = NA
if(length(unlist(strsplit(as.character(p[j]), "<"))) > 1){
upper = as.numeric(unlist(strsplit(as.character(p[j]), "<"))[2])
evl = rbind(evl, c(unlist(strsplit(as.character(p[j]), "<"))[1], boundstemp[unlist(strsplit(as.character(p[j]), "<"))[1], "lower"], upper))
} else{
lower = as.numeric(unlist(strsplit(as.character(p[j]), ">="))[2])
evl = rbind(evl, c(unlist(strsplit(as.character(p[j]), ">="))[1], lower, boundstemp[unlist(strsplit(as.character(p[j]), ">="))[1], "upper"]))
}
}
for(k in 1:nofparams){
boundselect = subset(evl, evl[, 1] == rownames(boundstemp)[k])
if(nrow(boundselect) > 0)
boundstemp[k, c(1:2)] = c(max(boundselect[, 2]), min(boundselect[, 3]))
}
dataset = rbind(dataset, cbind(i, sample(ceiling(as.numeric(boundstemp[1, 1])):floor(as.numeric(boundstemp[1, 2])), nofsamples, replace = TRUE), runif(nofsamples, as.numeric(boundstemp[2, 1]), as.numeric(boundstemp[2, 2])), runif(nofsamples, as.numeric(boundstemp[3, 1]), as.numeric(boundstemp[3, 2]))))
}
colnames(dataset) = c("RuleID", paramnames)
dataset = cbind(dataset, NA)
colnames(dataset)[nofparams + 2] = outputname
return(as.data.frame(dataset))
}
# Example
bounds = rbind(c(1, 41), c(0, 0.01), c(0, 0.1))
unl = rulesample(regtree, 3, bounds, c("nofcars", "acc", "dec"), "avg_sp", 10)
# Obtain the actual simulation model outputs of the unlabeled instances
nofrep = 5
for (i in 1:nrow(unl)){
res = matrix(NA, ncol = nofrep, nrow = 1)
for (j in 1:nofrep){
NLCommand("set number-of-cars", unl[i, 2])
NLCommand("set acceleration", unl[i, 3])
NLCommand("set deceleration", unl[i, 4])
NLCommand("setup")
th = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE)
res[j] = mean(th[, 1])
}
unl[i, 5] = mean(res)
}
# Plot
w = 10
h = 8
x11(width = w / 2.54, height = h / 2.54)
par(cex = 1, mar = c(2.5, 2.5 , 1, 1), mgp = c(1.5,0.5,0), tcl = -0.3)
boxplot(unl$avg_sp ~ unl$RuleID, xlab = "Decision Rule Number", ylab = "Output")
# Alternative approach for ruleset() function
# Generate a pool of unlabeled instances (e.g., 1000 unlabeled instances)
set.seed(1000)
unl = matrix(NA, nrow = 1000, ncol = 3)
unl[, 1] = sample(1:41, 1000, replace = TRUE)
unl[, 2] = runif(1000, 0, 0.01)
unl[, 3] = runif(1000, 0, 0.1)
unl = as.data.frame(unl)
colnames(unl) = c("nofcars", "acc", "dec")
# For each rule, randomly select 10 unlabeled instances
frm = regtree$frame
frmsorted = frm[order(frm[, "yval"]), ]
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ]))
rn = rev(rn)
unlfinal = NULL
for (i in 1:length(rn)){
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE)
p = unlist(p)[-1:-2]
txt = NULL
evl = NULL
evlfinal = NULL
for (j in 1:length(p)){
txt = rbind(txt, paste("unl$", p[j]))
evl = cbind(evl, eval(parse(text = txt[j])))
}
for (k in 1:nrow(unl))
evlfinal = rbind(evlfinal, all(evl[k, ]))
unlfinal = rbind(unlfinal, cbind(unl[sample(which(evlfinal == TRUE), 10), ], i, paste(p, collapse=" AND ")))
}
colnames(unlfinal)[4:5] = c("RuleID", "Rule")
unlfinal = cbind(unlfinal, NA)
colnames(unlfinal)[6] = "avg_sp"
# Obtain the actual simulation model outputs of the unlabeled instances
nofrep = 5
for (i in 1:nrow(unlfinal)){
res = matrix(NA, ncol = nofrep, nrow = 1)
for (j in 1:nofrep){
NLCommand("set number-of-cars", unlfinal[i, 1])
NLCommand("set acceleration", unlfinal[i, 2])
NLCommand("set deceleration", unlfinal[i, 3])
NLCommand("setup")
th = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE)
res[j] = mean(th[, 1])
}
unlfinal[i, 6] = mean(res)
}
# Plot
w = 10
h = 8
x11(width = w / 2.54, height = h / 2.54)
par(cex = 1, mar = c(2.5, 2.5 , 1, 1), mgp = c(1.5,0.5,0), tcl = -0.3)
boxplot(unlfinal$avg_sp ~ unlfinal$RuleID, xlab = "Decision Rule Number", ylab = "Output")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.