Last active
August 21, 2019 08:01
-
-
Save mertedali/ab7078b9c29dea18c72525239d636b96 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
##################################################### | |
##################################################### | |
### R Script for Metamodeling and Rule Extraction ### | |
##################################################### | |
##################################################### | |
############################# | |
### Step I: Load Packages ### | |
############################# | |
library(e1071) # For support vector regression metamodeling | |
library(lhs) # For maximin Latin hypercube sampling | |
library(RNetLogo) # For connecting R to NetLogo | |
library(rpart) # For decision tree fitting | |
library(rpart.plot) # For decision tree visualization | |
##################################### | |
### Step II: Connect R to NetLogo ### | |
##################################### | |
NLStart("/your/directory/here", gui = FALSE, nl.jarname = "netlogo-X.X.X.jar") | |
NLLoadModel("/your/directory/here/yourmodel.nlogo") | |
# See examples below: | |
# NLStart("C:/Program Files/NetLogo 6.0.1/app", gui = FALSE, nl.jarname = "netlogo-6.0.1.jar") | |
# NLLoadModel("C:/Users/User/Desktop/Traffic Basic.nlogo") | |
# For detailed information, see RNetLogo document (https://cran.r-project.org/web/packages/RNetLogo/RNetLogo.pdf) and RNetLogo website (http://rnetlogo.r-forge.r-project.org/) | |
########################################################### | |
### Step III: Generate a Training Set using Maximin LHS ### | |
########################################################### | |
nofinstances = 30 # Number of instances in the training set | |
nofrep = 30 # Number of replications for each instance | |
nofparams = 3 # Number of input parameters of the agent-based model | |
set.seed(1) | |
dat = maximinLHS(n = nofinstances, k = nofparams, dup = 5) | |
# Since maximinLHS generates a sample whose interval is [0, 1] in each dimension, | |
# we map each parameter to its respective interval. | |
# See the example below: | |
training = matrix(NA, nrow = nrow(dat), ncol = (ncol(dat) + 1)) # Generate a matrix of NA's | |
training[, 1] = round(qunif(dat[, 1], 1, 41)) # Number of cars: [1, 41] | |
training[, 2] = qunif(dat[, 2], 0, 0.01) # Acceleration: [0, 0.01] | |
training[, 3] = qunif(dat[, 3], 0, 0.1) # Deceleration: [0, 0.1] | |
colnames(training) = c("nofcars", "acc", "dec", "avg_sp") | |
for (i in 1:nrow(training)){ | |
result = matrix(NA, ncol = nofrep, nrow = 1) # Save the result of each replication | |
for (j in 1:nofrep){ | |
NLCommand("set number-of-cars", training[i, 1]) | |
NLCommand("set acceleration", training[i, 2]) | |
NLCommand("set deceleration", training[i, 3]) | |
NLCommand("setup") | |
ts = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE) | |
result[j] = mean(ts[, 1]) | |
} | |
training[i, 4] = mean(result) | |
} | |
###################################### | |
### Step IV: Develop the Metamodel ### | |
###################################### | |
# Generate the error function (RMSE) | |
rmse = function(real, pred){ | |
error = pred - real | |
return(sqrt(mean(error^2))) | |
} | |
# Tune the hyperparameters of support vector regression model with radial basis kernel | |
# Then, train the metamodel with the optimum hyperparameter combination | |
tuning = tune(svm, avg_sp ~ nofcars + acc + dec, data = as.data.frame(training), ranges = list(cost = 10^(-3:3), epsilon = 10^(-3:0), gamma = 10^(-3:3)), tunecontrol = tune.control(sampling = "cross", cross = nrow(training), error.fun = rmse)) | |
s = svm(avg_sp ~ nofcars + acc + dec, data = training, cost = as.numeric(tuning$best.parameters$cost), epsilon = as.numeric(tuning$best.parameters$epsilon), gamma = as.numeric(tuning$best.parameters$gamma), kernel = "radial", type = "eps-regression") | |
# Generate a matrix of an unlabeled instance and predict its output | |
test = matrix(NA, ncol = 3, nrow = 1) | |
colnames(test) = c("nofcars", "acc", "dec") | |
test[1, 1] = 28 | |
test[1, 2] = 0.005 | |
test[1, 3] = 0.08 | |
predict(s, newdata = test) | |
# It is also possible to generate a matrix of random instances | |
# For example, generate 100 random instances and predict their outputs | |
test = matrix(NA, ncol = 3, nrow = 100) | |
test[, 1] = round(runif(100, 1, 41)) # Number of cars: [0, 41] | |
test[, 2] = runif(100, 0, 0.01) # Acceleration: [0, 0.01] | |
test[, 3] = runif(100, 0, 0.1) # Deceleration: [0, 0.1] | |
colnames(test) = c("nofcars", "acc", "dec") | |
predict(s, newdata = test) | |
################################### | |
### Step V: Fit a Decision Tree ### | |
################################### | |
# First generate an unlabeled dataset | |
usetsize = 500 # Number of instances in the unlabeled dataset | |
training_tree = matrix(NA, ncol = 4, nrow = usetsize) | |
training_tree[, 1] = round(runif(usetsize, 1, 41)) # Number of cars: [0, 41] | |
training_tree[, 2] = runif(usetsize, 0, 0.01) # Acceleration: [0, 0.01] | |
training_tree[, 3] = runif(usetsize, 0, 0.1) # Deceleration: [0, 0.1] | |
colnames(training_tree) = c("nofcars", "acc", "dec", "avg_sp") | |
# Store the predictions returned by the support vector regression metamodel | |
training_tree[, 4] = predict(s, newdata = training_tree[, 1:3]) | |
# Fit the decision tree | |
regtree = rpart(avg_sp ~ nofcars + acc + dec, data = as.data.frame(training_tree)) | |
# Visualize the fitted tree | |
rpart.plot(regtree, extra = 1, tweak = 1.1, digits = 3) | |
############################### | |
### Step VI: List the Rules ### | |
############################### | |
frm = regtree$frame | |
frmsorted = frm[order(frm[, "yval"]), ] | |
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ])) | |
rn = rev(rn) | |
ruleset = NULL | |
for (i in 1:length(rn)){ | |
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE) | |
p = unlist(p)[-1:-2] | |
ruleset = rbind(ruleset, c(i, paste("IF", paste(p, collapse=" AND ")), round(frm[toString(rn[i]), "yval"], 4))) | |
} | |
colnames(ruleset) = c("RuleID", "Rule", "Output") | |
ruleset = as.data.frame(ruleset) | |
print(ruleset) | |
##################### | |
### Optional Step ### | |
##################### | |
# rulesample() function generates unlabeled samples for each rule | |
# regtree: Regression tree object | |
# nofparams: Number of input parameters | |
# bounds: maximum and minimum values of each input parameter | |
# paramnames: input parameter names | |
# outputname: model output name | |
# nofsamples: number of samples (instances) for each rule | |
rulesample = function(regtree, nofparams, bounds, paramnames, outputname, nofsamples){ | |
frm = regtree$frame | |
frmsorted = frm[order(frm[, "yval"]), ] | |
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ])) | |
rn = rev(rn) | |
dataset = NULL | |
for (i in 1:length(rn)){ | |
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE) | |
p = unlist(p)[-1:-2] | |
evl = NULL | |
boundstemp = bounds | |
boundstemp = as.data.frame(boundstemp) | |
colnames(boundstemp) = c("lower", "upper") | |
rownames(boundstemp) = paramnames | |
for (j in 1:length(p)){ | |
upper = NA | |
lower = NA | |
if(length(unlist(strsplit(as.character(p[j]), "<"))) > 1){ | |
upper = as.numeric(unlist(strsplit(as.character(p[j]), "<"))[2]) | |
evl = rbind(evl, c(unlist(strsplit(as.character(p[j]), "<"))[1], boundstemp[unlist(strsplit(as.character(p[j]), "<"))[1], "lower"], upper)) | |
} else{ | |
lower = as.numeric(unlist(strsplit(as.character(p[j]), ">="))[2]) | |
evl = rbind(evl, c(unlist(strsplit(as.character(p[j]), ">="))[1], lower, boundstemp[unlist(strsplit(as.character(p[j]), ">="))[1], "upper"])) | |
} | |
} | |
for(k in 1:nofparams){ | |
boundselect = subset(evl, evl[, 1] == rownames(boundstemp)[k]) | |
if(nrow(boundselect) > 0) | |
boundstemp[k, c(1:2)] = c(max(boundselect[, 2]), min(boundselect[, 3])) | |
} | |
dataset = rbind(dataset, cbind(i, sample(ceiling(as.numeric(boundstemp[1, 1])):floor(as.numeric(boundstemp[1, 2])), nofsamples, replace = TRUE), runif(nofsamples, as.numeric(boundstemp[2, 1]), as.numeric(boundstemp[2, 2])), runif(nofsamples, as.numeric(boundstemp[3, 1]), as.numeric(boundstemp[3, 2])))) | |
} | |
colnames(dataset) = c("RuleID", paramnames) | |
dataset = cbind(dataset, NA) | |
colnames(dataset)[nofparams + 2] = outputname | |
return(as.data.frame(dataset)) | |
} | |
# Example | |
bounds = rbind(c(1, 41), c(0, 0.01), c(0, 0.1)) | |
unl = rulesample(regtree, 3, bounds, c("nofcars", "acc", "dec"), "avg_sp", 10) | |
# Obtain the actual simulation model outputs of the unlabeled instances | |
nofrep = 5 | |
for (i in 1:nrow(unl)){ | |
res = matrix(NA, ncol = nofrep, nrow = 1) | |
for (j in 1:nofrep){ | |
NLCommand("set number-of-cars", unl[i, 2]) | |
NLCommand("set acceleration", unl[i, 3]) | |
NLCommand("set deceleration", unl[i, 4]) | |
NLCommand("setup") | |
th = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE) | |
res[j] = mean(th[, 1]) | |
} | |
unl[i, 5] = mean(res) | |
} | |
# Plot | |
w = 10 | |
h = 8 | |
x11(width = w / 2.54, height = h / 2.54) | |
par(cex = 1, mar = c(2.5, 2.5 , 1, 1), mgp = c(1.5,0.5,0), tcl = -0.3) | |
boxplot(unl$avg_sp ~ unl$RuleID, xlab = "Decision Rule Number", ylab = "Output") | |
# Alternative approach for ruleset() function | |
# Generate a pool of unlabeled instances (e.g., 1000 unlabeled instances) | |
set.seed(1000) | |
unl = matrix(NA, nrow = 1000, ncol = 3) | |
unl[, 1] = sample(1:41, 1000, replace = TRUE) | |
unl[, 2] = runif(1000, 0, 0.01) | |
unl[, 3] = runif(1000, 0, 0.1) | |
unl = as.data.frame(unl) | |
colnames(unl) = c("nofcars", "acc", "dec") | |
# For each rule, randomly select 10 unlabeled instances | |
frm = regtree$frame | |
frmsorted = frm[order(frm[, "yval"]), ] | |
rn = as.numeric(row.names(frmsorted[which(frmsorted$var == "<leaf>"), ])) | |
rn = rev(rn) | |
unlfinal = NULL | |
for (i in 1:length(rn)){ | |
p = path.rpart(regtree, node = c(1, rn[i]), print.it = FALSE) | |
p = unlist(p)[-1:-2] | |
txt = NULL | |
evl = NULL | |
evlfinal = NULL | |
for (j in 1:length(p)){ | |
txt = rbind(txt, paste("unl$", p[j])) | |
evl = cbind(evl, eval(parse(text = txt[j]))) | |
} | |
for (k in 1:nrow(unl)) | |
evlfinal = rbind(evlfinal, all(evl[k, ])) | |
unlfinal = rbind(unlfinal, cbind(unl[sample(which(evlfinal == TRUE), 10), ], i, paste(p, collapse=" AND "))) | |
} | |
colnames(unlfinal)[4:5] = c("RuleID", "Rule") | |
unlfinal = cbind(unlfinal, NA) | |
colnames(unlfinal)[6] = "avg_sp" | |
# Obtain the actual simulation model outputs of the unlabeled instances | |
nofrep = 5 | |
for (i in 1:nrow(unlfinal)){ | |
res = matrix(NA, ncol = nofrep, nrow = 1) | |
for (j in 1:nofrep){ | |
NLCommand("set number-of-cars", unlfinal[i, 1]) | |
NLCommand("set acceleration", unlfinal[i, 2]) | |
NLCommand("set deceleration", unlfinal[i, 3]) | |
NLCommand("setup") | |
th = NLDoReport(1000, "go", "[speed] of sample-car", as.data.frame = TRUE) | |
res[j] = mean(th[, 1]) | |
} | |
unlfinal[i, 6] = mean(res) | |
} | |
# Plot | |
w = 10 | |
h = 8 | |
x11(width = w / 2.54, height = h / 2.54) | |
par(cex = 1, mar = c(2.5, 2.5 , 1, 1), mgp = c(1.5,0.5,0), tcl = -0.3) | |
boxplot(unlfinal$avg_sp ~ unlfinal$RuleID, xlab = "Decision Rule Number", ylab = "Output") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment