Last active
March 21, 2017 14:51
-
-
Save ngopal/0c9c825e1db7dd8dfd81044c1638dea5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(randomForest) | |
library(RJSONIO) | |
library(ROCR) | |
# Research Questions | |
# 1. What are the ranked importances of node encodings? | |
# 2. What are the ranked importances of edge encodings? | |
# 3. How important is network structure to noticeability? | |
# 3a. Where do participants tend to click? | |
#Converting HEX color to INT | |
rgbToInt <- function(red,green,blue) { | |
# formula from https://www.shodor.org/stella2java/rgbint.html | |
return(256*256*red+256*green+blue) | |
} | |
calcPerceivedBrightness <- function(red,green,blue) { | |
# formula from http://stackoverflow.com/questions/596216/formula-to-determine-brightness-of-rgb-color | |
# Another potential option http://stackoverflow.com/questions/12043187/how-to-check-if-hex-color-is-too-black | |
return( (0.299*red + 0.587*green + 0.114*blue) ) | |
} | |
### Connecting through MONGO #### | |
## http://stackoverflow.com/questions/30738974/rjava-load-error-in-rstudio-r-after-upgrading-to-osx-yosemite | |
library(rJava) | |
library(RMongo) | |
library(plyr) | |
pilotdb <- mongoDbConnect('pilot') | |
dbGetQuery(pilotdb, 'evaldata', '{}')['X_id'] | |
connectSurveyToClickData <- function() { | |
uniqueSessions <- unlist(unique(dbGetQuery(pilotdb, 'evaldata', '{}', skip=0, limit=1000000)['user'])) | |
connectedData <- c() | |
for (u in uniqueSessions) { | |
cat(u,'\n') | |
cldata <- dbGetQuery(pilotdb, 'evaldata', paste('{ user : "',u,'", "page" : { "$ne" : "survey"} }', sep='')) | |
sudata <- dbGetQuery(pilotdb, 'evaldata', paste('{ user : "',u,'", "page" : "survey" }', sep='')) | |
if (dim(sudata)[1] == 0 || dim(sudata)[2] == 0) { | |
next | |
} | |
else { | |
sudata <- sudata[c("question1", "question2", "question3", "question4", "question5", "question6")] | |
} | |
combinedData <- cbind(cldata, sudata) | |
connectedData <- rbind(connectedData, combinedData) | |
} | |
return( data.frame(connectedData) ) | |
} | |
surveydata <- dbGetQuery(pilotdb, 'evaldata', '{ page: "survey" }') | |
# Survey Data | |
survey.df <- data.frame(surveydata[c("question1", "question2", "question3", "question4", "question5", "question6")]) | |
par(mar=c(5.1, 13 ,4.1 ,2.1)) | |
barplot(table(survey.df[,1]), las=2, horiz = T, main="Level of Education") | |
barplot(table(survey.df[,2]), las=2, horiz = T) | |
barplot(table(survey.df[,3]), las=2, horiz = T) | |
barplot(table(survey.df[,4]), las=2, horiz = T) | |
barplot(table(survey.df[,5]), las=2, horiz = T) | |
# Click Data | |
#collectedData <- dbGetQuery(pilotdb, 'evaldata', '{ "page": {"$ne":"survey"} }') | |
#collectedData <- dbGetQuery(pilotdb, 'evaldata', '{ "page": {"$ne":"survey"}, "user":"488238d8-99be-e65d-ebb8-ce7c04c92b25" }') | |
#expd.dat <- data.frame(collectedData[names(head(collectedData))]) | |
expd.dat <- connectSurveyToClickData() | |
expd.dat$linewidth <- as.numeric(gsub('px','',expd.dat$linewidth)) | |
expd.dat$nodeheight <- as.numeric(gsub('px','',expd.dat$nodeheight)) | |
expd.dat$nodeborderwidth <- as.numeric(gsub('px','',expd.dat$nodeborderwidth)) | |
#replace "cy.js selection blue" with "normal gray" | |
#expd.dat$nodebackground <- revalue(expd.dat$nodebackground, c("#0169D9"="#999999")) | |
expd.dat$nodebackground <- revalue(expd.dat$nodebackground, c("#999"="#999999")) | |
#expd.dat$linecolor <- revalue(expd.dat$linecolor, c("#0169D9"="#999999")) | |
expd.dat$linecolor <- revalue(expd.dat$linecolor, c("#999"="#999999")) | |
#rgbtpint | |
# tt <- makeRGBMat(expd.dat, 5) | |
# expd.dat[,5] <- as.numeric(rgbToInt(tt[,1], tt[,2], tt[,3])) | |
# tt2 <- makeRGBMat(expd.dat, 15) | |
# expd.dat[,15] <- as.numeric(rgbToInt(tt2[,1], tt2[,2], tt2[,3])) | |
#brightness | |
# expd.dat <- cbind(expd.dat, as.numeric(calcPerceivedBrightness(tt[,1], tt[,2], tt[,3])), as.numeric(calcPerceivedBrightness(tt2[,1], tt2[,2], tt2[,3]))) | |
# colnames(expd.dat) <- c(colnames(expd.dat)[c(-35,-36)],"nodeBrightness", "lineBrightness") | |
nodett <- t(rgb2hsv((col2rgb(expd.dat$nodebackground)))) | |
edgett <- t(rgb2hsv((col2rgb(expd.dat$linecolor)))) | |
expd.dat <- cbind(expd.dat, | |
as.numeric(nodett[,1]), as.numeric(nodett[,2]), as.numeric(nodett[,3]), | |
as.numeric(edgett[,1]), as.numeric(edgett[,2]), as.numeric(edgett[,3])) | |
colnames(expd.dat) <- c(colnames(expd.dat)[1:(length(colnames(expd.dat))-6)], "nodeHue", "nodeSaturation", "nodeValue", "edgeHue", "edgeSaturation", "edgeValue") | |
# Without Binned Data | |
expd.dat.nobin <- expd.dat[Reduce(intersect, list( grep("bin", expd.dat$nodeEncoding2, invert = T), grep("bin", expd.dat$nodeEncoding1, invert = T), grep("bin", expd.dat$edgeEncoding1, invert = T), grep("bin", expd.dat$edgeEncoding2, invert = T) )), ] | |
expd.dat <- expd.dat.nobin | |
# sampleBalancedData <- function(ds,type) { | |
# pos <- 0; | |
# if (type == "nodes") { | |
# pos = "NA" | |
# } | |
# else { | |
# pos = "edge" | |
# } | |
# users <- unique(ds$user) | |
# networks <- unique(ds$network) | |
# newds <- c() | |
# for (u in users) { | |
# for (n in networks) { | |
# selected <- ds[which( (ds$user == u) & (ds$network == n) & (ds$xposition != pos) & (ds$selected == 1) ),] | |
# numNotSelected <- length(which( (ds$user == u) & (ds$network == n) & (ds$xposition != pos) & (ds$selected != 1) )) | |
# notSelected <- ds[which( (ds$user == u) & (ds$network == n) & (ds$xposition != pos) & (ds$selected != 1) ),][sample(1:numNotSelected, 1, replace = F),] | |
# newds <- rbind(newds, selected, notSelected) | |
# #cat(selected$selected, '\n') | |
# #cat(notSelected$selected, '\n') | |
# } | |
# } | |
# return( newds ) | |
# } | |
sampleBalancedData <- function(ds,type) { | |
pos <- 0; | |
if (type == "nodes") { | |
users <- unique(ds$user) | |
networks <- unique(ds$network) | |
newds <- c() | |
for (u in users) { | |
for (n in networks) { | |
selected <- ds[which( (ds$user == u) & (ds$network == n) & (ds$eletype == "node") & (ds$selected == 1) ),] | |
numNotSelected <- length(which( (ds$user == u) & (ds$network == n) & (ds$eletype == "node") & (ds$selected != 1) )) | |
notSelected <- ds[which( (ds$user == u) & (ds$network == n) & (ds$eletype == "node") & (ds$selected != 1) ),][sample(1:numNotSelected, dim(selected)[1], replace = F),] | |
newds <- rbind(newds, selected, notSelected) | |
} | |
} | |
return( newds ) | |
} | |
else { | |
users <- unique(ds$user) | |
networks <- unique(ds$network) | |
newds <- c() | |
for (u in users) { | |
for (n in networks) { | |
selected <- ds[which( (ds$user == u) & (ds$network == n) & (ds$eletype == "edge") & (ds$selected == 1) ),] | |
numNotSelected <- length(which( (ds$user == u) & (ds$network == n) & (ds$eletype == "edge") & (ds$selected != 1) )) | |
notSelected <- ds[which( (ds$user == u) & (ds$network == n) & (ds$eletype == "edge") & (ds$selected != 1) ),][sample(1:numNotSelected, dim(selected)[1], replace = F),] | |
newds <- rbind(newds, selected, notSelected) | |
} | |
} | |
return( newds ) | |
} | |
} | |
expd.dat.nodes.balanced <- sampleBalancedData(expd.dat, "nodes") | |
expd.dat.edges.balanced <- sampleBalancedData(expd.dat, "edges") | |
# Sampling Idea | |
# I suppose I can have up to 6 nodes without having to use SMOTE | |
# I will put this on hold because I don't think I need to balance classes yet | |
#dbGetQuery(pilotdb, 'evaldata', '{ "page": {"$ne":"survey"}, "selected":0, "network":"rn2", "name" : {"$ne" : "NA"} }') | |
# Click Map | |
plot(expd.dat$xposition, expd.dat$yposition, xlim=c(0,max(expd.dat$xposition, na.rm=T)), ylim=c(0,max(expd.dat$yposition, na.rm=T)), col="gray" ) | |
points(expd.dat$xposition[which(expd.dat$selected == 1)], expd.dat$yposition[which(expd.dat$selected == 1)], col="red") | |
points(expd.dat$clickX, expd.dat$clickY, col="red") | |
# expd.nodes <- data.frame(expd.dat[which(!is.na(expd.dat$xposition)),]) | |
# expd.nodes <- expd.nodes[which(as.numeric(as.character(expd.nodes$selected)) <= 1),] | |
# expd.edges <- data.frame(expd.dat[which(is.na(expd.dat$xposition)),]) | |
# expd.edges <- expd.edges[which(as.numeric(as.character(expd.edges$selected)) <= 1),] | |
expd.edges <- expd.dat.edges.balanced | |
expd.nodes <- expd.dat.nodes.balanced | |
# Node Encodings Only Model / Selection | |
#expd.nodes.1 <- data.frame(expd.nodes[,c(3,5,10,14,19,24,35,42:44)]) | |
expd.nodes <- expd.nodes[-which(expd.nodes$question4 == "colorBlind"),] # Removing colorblind | |
expd.nodes <- expd.nodes[which(!is.na(expd.nodes$selected)),] # Removing NA | |
expd.nodes.1 <- data.frame(expd.nodes[,c(5,9,10,14,19,24,35,42:44)]) | |
expd.nodes.1$nodeshape <- as.factor(expd.nodes.1$nodeshape) | |
expd.nodes.1$network <- as.factor(expd.nodes.1$network) | |
# Should I be excluding those who answered as "colorblind"? I removed them for the analysis above | |
#expd.nodes.1$network <- as.factor(expd.nodes.1$network) | |
#expd.nodes.1$nodebackground <- as.factor(expd.nodes.1$nodebackground) | |
expd.nodes.1$selected <- as.factor(expd.nodes.1$selected) # This will make it classification | |
expd.nodes.1$selected <- as.numeric(as.character(expd.nodes.1$selected)) # This will make it regression | |
expd.nodes.1$eletype <- as.factor(expd.nodes.1$eletype) | |
expd.nodes.1$question1 <- as.factor(expd.nodes.1$question1) | |
expd.nodes.1$question2 <- as.factor(expd.nodes.1$question2) | |
expd.nodes.1$question3 <- as.factor(expd.nodes.1$question3) | |
expd.nodes.1$question4 <- as.factor(expd.nodes.1$question4) | |
expd.nodes.1$question5 <- as.factor(expd.nodes.1$question5) | |
# Creating a dataset with click location and node positions | |
expd.nodes.2 <- data.frame(expd.nodes[,c(5,9,10,14,19,24,35,42:44, 1, 2, 7, 8, 12, 28)]) | |
expd.nodes.2$clickX <- expd.nodes.2$clickX/expd.nodes.2$windowwidth | |
expd.nodes.2$clickY <- expd.nodes.2$clickY/expd.nodes.2$windowheight | |
expd.nodes.2$xposition <- expd.nodes.2$xposition/expd.nodes.2$windowwidth | |
expd.nodes.2$yposition <- expd.nodes.2$yposition/expd.nodes.2$windowheight | |
expd.nodes.2 <- expd.nodes.2[,1:14] | |
expd.nodes.2 <- expd.nodes.2[,c(-11,-12)] | |
expd.nodes.2$nodeshape <- as.factor(expd.nodes.2$nodeshape) | |
expd.nodes.2$network <- as.factor(expd.nodes.2$network) | |
# Creating a dataset to test the radius distance hypothesis with | |
expd.nodes.4 <- data.frame(expd.nodes[,c(5,9,10,14,19,24,35,42:44, 1, 2, 7, 8, 12, 28)]) | |
expd.nodes.2$clickX <- expd.nodes.2$clickX/expd.nodes.2$windowwidth | |
expd.nodes.2$clickY <- expd.nodes.2$clickY/expd.nodes.2$windowheight | |
expd.nodes.2$xposition <- expd.nodes.2$xposition/expd.nodes.2$windowwidth | |
expd.nodes.2$yposition <- expd.nodes.2$yposition/expd.nodes.2$windowheight | |
expd.nodes.2 <- expd.nodes.2[,1:14] | |
expd.nodes.2 <- expd.nodes.2[,c(-11,-12)] | |
expd.nodes.2$nodeshape <- as.factor(expd.nodes.2$nodeshape) | |
expd.nodes.2$network <- as.factor(expd.nodes.2$network) | |
## Downsampling is necessary if minority class is 15% or less | |
# Removing NA Values | |
# expd.nodes.1 <- expd.nodes.1[-which(is.na(expd.nodes.1$selected)),] | |
# Calculating minorty class presence | |
sum(expd.nodes.1$selected == 1) / length(expd.nodes.1$selected) | |
# Shows us that minority class is 16.9%, which is borderline! | |
expd.nodes.1c <- expd.nodes.1 | |
expd.nodes.1c$selected <- as.factor(as.character(expd.nodes.1c$selected)) | |
# Check for multicollinearily | |
library(rfUtilities) | |
#multi.collinear(expd.nodes.1[,c(-2)]) | |
multi.collinear(expd.nodes.1[,c(-1,-2)]) | |
# I could consider using "network" as strata below | |
#selectedPrevalence.nodes.1 <- sum(as.numeric(expd.nodes.1$selected))/length(as.numeric(expd.nodes.1$selected)) | |
#unselectedPrevalence.nodes.1 <- 100-sum(as.numeric(expd.nodes.1$selected))/length(as.numeric(expd.nodes.1$selected)) | |
tuneRF(x = expd.nodes.1[,c(-4)], y = expd.nodes.1$selected, importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1)) | |
#rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1[,c(-3, -14)], importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1)) | |
#rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1[,c(-3, -8:-14)], importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1)) | |
#rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1[,c(-5, -6, -13)], importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1)) | |
#rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1[,-7], importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1), keep.inbag = TRUE) | |
#rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1, importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1), keep.inbag = TRUE) | |
rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.1c <- randomForest(as.factor(selected) ~ ., data=expd.nodes.1[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
#rf1.nodes.1c <- randomForest(selected ~ ., data=expd.nodes.1c[,-6], importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1), keep.inbag = TRUE, ntree=1500) | |
print(rf1.nodes.1) | |
rf1.nodes.1$importance | |
write.table(rf1.nodes.1$importance, file="~/Documents/Evaluation of Importance Experiment/node_importance.csv", quote = F, row.names = T, col.names = T, sep=",") | |
varImpPlot(rf1.nodes.1,type=2, main = "Node Variable Importance") | |
nlabs <- rev(c("Network", "Node Border Size", "Node Size", "Node Saturation", "Node Value", "Node Shape", "Node Hue", "Node Degree")) | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodeVarImp.jpg", res=300, height = 1000, width=1500) | |
varImpPlot(rf1.nodes.1,type=2, main = "Node Variable Importance", labels=nlabs) | |
dev.off() | |
unique(expd.dat$participantResponse[which(!is.na(expd.dat$participantResponse))]) | |
print(rf1.nodes.1c) | |
rf1.nodes.1c$importance | |
varImpPlot(rf1.nodes.1c,type=2) | |
# rf1.nodes.1 <- randomForest(selected ~ ., data=expd.nodes.1[,c(1,3,4,7,14,15,16)], importance=TRUE, proximity=TRUE, classwt = c(selectedPrevalence.nodes.1, unselectedPrevalence.nodes.1)) | |
# print(rf1.nodes.1) | |
# rf1.nodes.1$importance | |
# varImpPlot(rf1.nodes.1,type=2) | |
# http://stats.stackexchange.com/questions/144700/negative-r2-at-random-regression-forest | |
# http://stats.stackexchange.com/questions/21152/obtaining-knowledge-from-a-random-forest | |
abline(v = abs(min(rf1.nodes.1$importance[,4])), lty="longdash", lwd=2) | |
rf1.nodes.1.p <- classCenter(expd.nodes.1[,c(-4)], expd.nodes.1[,4], rf1.nodes.1$proximity) | |
write.table(rf1.nodes.1.p, file="~/Documents/Evaluation of Importance Experiment/node_prototype.csv", quote = F, row.names = T, col.names = T, sep=",") | |
# Node Encodings with Position | |
rf1.nodes.2 <- randomForest(selected ~ xposition + yposition, data=expd.nodes.2[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.2 <- randomForest(selected ~ xposition + yposition + nodeborderwidth + nodeheight + nodeValue + nodeSaturation, | |
data=expd.nodes.2[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.2 <- randomForest(selected ~ ., data=expd.nodes.2[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.2) # The negative variance means we would have been better off with a simpler model! | |
# Including X and Y position increased %Var Explained from 23.08 to 27.62 | |
# Node Encodings with Position and Radius | |
expd.nodes.2$radius_distance <- apply(expd.nodes.2, MAR=1, FUN = function(x){ | |
return( sqrt( (as.numeric(x[11]) - 0.5)^2 + (as.numeric(x[12]) - 0.5)^2 ) ) | |
}) | |
rf1.nodes.2wr <- randomForest(selected ~ ., data=expd.nodes.2[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.2wr) # The negative variance means we would have been better off with a simpler model! | |
# Including X and Y position and distance to from center increased %Var Explained from 23.08 to 27.53 | |
# Which is a bit lower than 27.62 from above. | |
varImpPlot(rf1.nodes.2wr) | |
# Shows that radius_distance is 4th most important, which is more than node degree. | |
# Node Encodings with Position and Network Motif Groups | |
motif_group_map <- list("1" = c(16, 24, 21), | |
"2" = c(3, 13), | |
"3" = c(26), | |
"4" = c(18), | |
"5" = c(7, 31, 30, 8, 29, 17, 14), | |
"6" = c(27, 33, 9, 28), | |
"7" = c(22, 12, 25, 32, 19, 1, 2, 6, 23, 15, 11, 34, 10, 4, 5, 20)) | |
# Sub-groupings within 7 | |
# 7.1 = 22 | |
# 7.2 = 12 | |
# 7.3 = 25 | |
# 7.4 = 32, 19 | |
# 7.5 = 1, 2, 6, 23 | |
# 7.6 = 15, 11, 34, 10, 4, 5, 20 | |
invertList = function(ll, prefix = '') { | |
nll <- list() | |
for (key in 1:max(as.numeric(names(ll)))) { | |
groupNumber <- key | |
for (value in ll[[key]]) { | |
nll[[ paste(prefix, value, sep='') ]] <- key | |
} | |
} | |
return(nll) | |
} | |
motif_group_maprn <- invertList(motif_group_map, "rn") | |
expd.nodes.3 <- expd.nodes.2 | |
motif_group <- c() | |
for (i in expd.nodes.3$network) { | |
print(motif_group_maprn[[i]]) | |
motif_group <- append(motif_group, motif_group_maprn[[i]]) | |
} | |
expd.nodes.3 <- cbind(expd.nodes.3, motif_group) | |
expd.nodes.3$motif_group <- as.factor(expd.nodes.3$motif_group) | |
rf1.nodes.3 <- randomForest(selected ~ ., data=expd.nodes.3[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.3) | |
varImpPlot(rf1.nodes.3) | |
# When omitting XY variables, 24.75% of variance is explained, which is less than the XY positions of nodes. | |
# When including XY variables, 28.63% of variance is explained, which is the best performing model thus far | |
multi.collinear(expd.nodes.3[,c(-1,-2,-13)]) # motif_group is a factor, and this function only uses numeric input | |
expd.nodes.4 <- expd.nodes.3 | |
expd.nodes.4$radius_distance <- apply(expd.nodes.3, MAR=1, FUN = function(x){ | |
return( sqrt( (as.numeric(x[11]) - 0.5)^2 + (as.numeric(x[12]) - 0.5)^2 ) ) | |
}) | |
rf1.nodes.4 <- randomForest(selected ~ ., data=expd.nodes.4[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.4) | |
varImpPlot(rf1.nodes.4) | |
rf1.nodes.5 <- randomForest(selected ~ ., data=expd.nodes.4[,c(-6, -11, -12, -13)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.5) | |
varImpPlot(rf1.nodes.5) | |
expd.nodes.5 <- expd.nodes.2 | |
clusteringCoefficientGlobal <- c() | |
for (i in expd.nodes.5$network) { | |
#print(analyzeNetworkGlobal(eval(parse(text=paste("net_",strsplit(i, "rn")[[1]][2],"_g",sep=''))))) | |
clusteringCoefficientGlobal <- append(clusteringCoefficientGlobal, analyzeNetworkGlobal(eval(parse(text=paste("net_",strsplit(i, "rn")[[1]][2],"_g",sep=''))), silent=TRUE)) | |
} | |
expd.nodes.5 <- cbind(expd.nodes.5, clusteringCoefficientGlobal) | |
# Model with only clustering coefficient added | |
rf1.nodes.6 <- randomForest(selected ~ ., data=expd.nodes.5[,c(-6,-11,-12,-13)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.6) | |
varImpPlot(rf1.nodes.6) | |
# Model with everything including clustering coefficient | |
rf1.nodes.7 <- randomForest(selected ~ ., data=expd.nodes.5[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.7) | |
varImpPlot(rf1.nodes.7) | |
nodeCentralityData <- c() | |
for (i in 1:dim(expd.dat)[1]) { | |
if (expd.dat[i,]$eletype == "node") { | |
networkAtHand <- which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user) | |
#expd.dat[which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user & expd.dat$eletype == "node"),] | |
#valid_nodes <- networkAtHand[1:4] | |
valid_nodes <- as.numeric(row.names(expd.dat[which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user & expd.dat$eletype == "node"),])) | |
a <- analyzeNetworkBetweenness(eval(parse(text=paste("net_",strsplit(expd.dat[i,]$network, "rn")[[1]][2],"_g",sep=''))))[which(i == valid_nodes)] | |
if (length(a) == 0) { | |
print("length zero") | |
print(i) | |
print(valid_nodes) | |
} | |
nodeCentralityData <- append(nodeCentralityData, a) | |
# for (a in analyzeNetworkBetweenness(eval(parse(text=paste("net_",strsplit(expd.dat[i,]$network, "rn")[[1]][2],"_g",sep='')))) ) { | |
# nodeCentralityData <- append(nodeCentralityData, a) | |
# } | |
} | |
else if (expd.dat[i,]$eletype == "edge") { | |
nodeCentralityData <- append(nodeCentralityData, NA) | |
} | |
else { | |
# edge | |
print("ERROR") | |
} | |
} | |
head( cbind(expd.dat,nodeCentralityData) ) | |
nodeCentralityData[as.numeric(row.names(expd.nodes.5))] | |
expd.nodes.6 <- cbind(expd.nodes.5, nodeCentralityData[as.numeric(row.names(expd.nodes.5))]) | |
names(expd.nodes.6) <- c(names(expd.nodes.6)[1:14], "BetweennessCentrality") | |
# Model with betweenness centrality only | |
rf1.nodes.8 <- randomForest(selected ~ ., data=expd.nodes.6[which(!is.na(expd.nodes.6[,15])),c(-5,-6,-11,-12,-13,-14)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.8) # 22.43% Variance Explained | |
varImpPlot(rf1.nodes.8) | |
# Network | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Color Value | |
# Node Shape | |
# Node Color Hue | |
# Node Betweenness Centrality | |
# Model with everything, including betweenness centrality | |
rf1.nodes.9 <- randomForest(selected ~ ., data=expd.nodes.6[which(!is.na(expd.nodes.6[,15])),c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.9) # 27.07% Variance Explained | |
varImpPlot(rf1.nodes.9) | |
# Network | |
# Node Border Width | |
# Y Position | |
# Disance from center of screen | |
# X Position | |
# Node Size | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Node Color Hue | |
# Node Betweenness Centrality | |
# Node Degree | |
# Clustering Coefficient (Global) | |
# Analyze degree centrality | |
# | |
# Can confirm that degree centrality is identical to numConnectedNodes | |
# | |
nodeDegreeCentralityData <- c() | |
nodeDegreeClosenessData <- c() | |
nodeDegreeEigenvectorData <- c() | |
for (i in 1:dim(expd.dat)[1]) { | |
if (expd.dat[i,]$eletype == "node") { | |
networkAtHand <- which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user) | |
#expd.dat[which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user & expd.dat$eletype == "node"),] | |
#valid_nodes <- networkAtHand[1:4] | |
valid_nodes <- as.numeric(row.names(expd.dat[which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user & expd.dat$eletype == "node"),])) | |
a <- analyzeNetworkDegree(eval(parse(text=paste("net_",strsplit(expd.dat[i,]$network, "rn")[[1]][2],"_g",sep=''))))[which(i == valid_nodes)] | |
b <- analyzeNetworkCloseness(eval(parse(text=paste("net_",strsplit(expd.dat[i,]$network, "rn")[[1]][2],"_g",sep=''))))[which(i == valid_nodes)] | |
c <- analyzeNetworkEigenvector(eval(parse(text=paste("net_",strsplit(expd.dat[i,]$network, "rn")[[1]][2],"_g",sep=''))))[which(i == valid_nodes)] | |
if (length(a) == 0) { | |
print("length zero") | |
print(i) | |
print(valid_nodes) | |
} | |
if (length(b) == 0) { | |
print("length zero") | |
print(i) | |
print(valid_nodes) | |
} | |
if (length(c) == 0) { | |
print("length zero") | |
print(i) | |
print(valid_nodes) | |
} | |
nodeDegreeCentralityData <- append(nodeDegreeCentralityData, a) | |
nodeDegreeClosenessData <- append(nodeDegreeClosenessData, b) | |
nodeDegreeEigenvectorData <- append(nodeDegreeEigenvectorData, c) | |
} | |
else if (expd.dat[i,]$eletype == "edge") { | |
nodeDegreeCentralityData <- append(nodeDegreeCentralityData, NA) | |
nodeDegreeClosenessData <- append(nodeDegreeClosenessData, NA) | |
nodeDegreeEigenvectorData <- append(nodeDegreeEigenvectorData, NA) | |
} | |
else { | |
# edge | |
print("ERROR") | |
} | |
} | |
expd.nodes.6a <- cbind(expd.nodes.5, nodeDegreeCentralityData[as.numeric(row.names(expd.nodes.5))], | |
nodeDegreeClosenessData[as.numeric(row.names(expd.nodes.5))], | |
nodeDegreeEigenvectorData[as.numeric(row.names(expd.nodes.5))]) | |
names(expd.nodes.6a) <- c(names(expd.nodes.6a)[1:14], "DegreeCentrality", "ClosenessCentrality", "EigenvectorCentrality") | |
# # Model with degree centrality only | |
rf1.nodes.8a <- randomForest(selected ~ ., data=expd.nodes.6a[which(!is.na(expd.nodes.6a[,15])),c(-5, -6,-11,-12,-13,-14,-16,-17)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.8b <- randomForest(selected ~ ., data=expd.nodes.6a[which(!is.na(expd.nodes.6a[,16])),c(-5,-6,-11,-12,-13,-14,-15,-17)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.8c <- randomForest(selected ~ ., data=expd.nodes.6a[which(!is.na(expd.nodes.6a[,17])),c(-5,-6,-11,-12,-13,-14,-15,-16)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.8a) # 22.25% variance explained (degree centrality, which is equivalent to numConnectedNodes) | |
print(rf1.nodes.8b) # 22.53% variance explained (closeness centrality) | |
print(rf1.nodes.8c) # 22.21% variance explained (eigenvector centrality) | |
varImpPlot(rf1.nodes.8a) | |
# Variable Importances: | |
# Network | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Color Value | |
# Node Shape | |
# Node Color Hue | |
# Degree Centrality | |
varImpPlot(rf1.nodes.8b) | |
# Variable Importances: | |
# Network | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Color Value | |
# Node Shape | |
# Node Color Hue | |
# Closeness Centrality | |
varImpPlot(rf1.nodes.8c) | |
# Variable Importances: | |
# Network | |
# Node Border Width | |
# Eigenvector Centrality ### WOW, INTERESTING FINDING | |
# Node Size | |
# Node Color Saturation | |
# Node Color Value | |
# Node Shape | |
# Node Color Hue | |
multi.collinear(expd.nodes.6a[which(!is.na(expd.nodes.6a[,15])),c(-1,-2)]) | |
expd.nodes.7 <- cbind(expd.nodes.5, expd.dat$user[as.numeric(rownames(expd.nodes.1))]) | |
names(expd.nodes.7) <- c(names(expd.nodes.7)[1:14], "user") | |
# Model using User variable only - Can't make model due to too many levels of users | |
rf1.nodes.10 <- randomForest(selected ~ ., data=expd.nodes.7[,c(-6,-11,-12,-13,-14)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.10) | |
varImpPlot(rf1.nodes.10) | |
# "Default" Model without Node Degree included | |
rf1.nodes.11 <- randomForest(selected ~ ., data=expd.nodes.6[,c(-5,-6,-11,-12,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.11) | |
varImpPlot(rf1.nodes.11) | |
# Using communities to more formally evaluate what I call motif groups | |
compare(cluster_spinglass(net_16_g), cluster_spinglass(net_31_g)) | |
communityComparisons <- matrix(nrow = 34, ncol = 34) | |
communityComparisons_walktrap <- matrix(nrow = 34, ncol = 34) | |
communityComparisons_edge_betweenness <- matrix(nrow = 34, ncol = 34) | |
communityComparisons_infomap <- matrix(nrow = 34, ncol = 34) | |
communityComparisons_labelprop <- matrix(nrow = 34, ncol = 34) | |
for (i in 1:34) { | |
for (k in 1:34) { | |
communityComparisons[i,k] <- compare( cluster_spinglass(eval(parse(text=paste("net_",i,"_g",sep='')))), cluster_spinglass(eval(parse(text=paste("net_",k,"_g",sep='')))) ) | |
communityComparisons_walktrap[i,k] <- compare( cluster_walktrap(eval(parse(text=paste("net_",i,"_g",sep='')))), cluster_walktrap(eval(parse(text=paste("net_",k,"_g",sep='')))) ) | |
communityComparisons_edge_betweenness[i,k] <- compare( cluster_edge_betweenness(eval(parse(text=paste("net_",i,"_g",sep='')))), cluster_edge_betweenness(eval(parse(text=paste("net_",k,"_g",sep='')))) ) | |
communityComparisons_infomap[i,k] <- compare( cluster_infomap(eval(parse(text=paste("net_",i,"_g",sep='')))), cluster_infomap(eval(parse(text=paste("net_",k,"_g",sep='')))) ) | |
communityComparisons_labelprop[i,k] <- compare( cluster_label_prop(eval(parse(text=paste("net_",i,"_g",sep='')))), cluster_label_prop(eval(parse(text=paste("net_",k,"_g",sep='')))) ) | |
} | |
} | |
plot(hclust(dist(communityComparisons))) | |
plot(hclust(dist(communityComparisons_walktrap))) | |
plot(hclust(dist(communityComparisons_edge_betweenness))) | |
plot(hclust(dist(communityComparisons_infomap))) | |
plot(hclust(dist(communityComparisons_labelprop))) | |
communityGroupData <- c() | |
lookupCommunityGroups <- cutree(hclust(d = dist(communityComparisons)), k = 3) | |
hcresults <- hclust(d = dist(communityComparisons)) | |
for (i in 1:dim(expd.dat)[1]) { | |
if (expd.dat[i,]$eletype == "node") { | |
communityGroupData <- append(communityGroupData, lookupCommunityGroups[which(hcresults$order == strsplit(expd.dat[i,]$network, "rn")[[1]][2])] ) | |
} | |
else if (expd.dat[i,]$eletype == "edge") { | |
communityGroupData <- append(communityGroupData, NA) | |
} | |
else { | |
# edge | |
print("ERROR") | |
} | |
} | |
expd.nodes.8 <- cbind(expd.nodes.5, communityGroupData[as.numeric(row.names(expd.nodes.5))] ) | |
names(expd.nodes.8) <- c(names(expd.nodes.8)[1:14], "communityGroup") | |
# Model with community only | |
rf1.nodes.12 <- randomForest(selected ~ ., data=expd.nodes.8[,c(-5,-6,-11,-12,-13,-14)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.12) # 18.24% variance explained | |
varImpPlot(rf1.nodes.12) | |
# Network | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Coloe Value | |
# Node Shape | |
# Node Hue | |
# Community Group | |
# Model with everything, including community | |
rf1.nodes.13 <- randomForest(selected ~ ., data=expd.nodes.8[,c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.13) # 27.66% variance explained | |
varImpPlot(rf1.nodes.13) | |
# Network | |
# Node Border Width | |
# Y Position | |
# Distance from Center of screen | |
# X Position | |
# Node Size | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Node Hue | |
# Node Degree | |
# Community Group | |
# Clustering Coefficient | |
# Model with XY Positions only | |
rf1.nodes.13a <- randomForest(selected ~ ., data=expd.nodes.8[,c(-5,-6,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.13a) # 26.58% variance explained | |
varImpPlot(rf1.nodes.13a) | |
# Network | |
# Y Position | |
# Node Border Width | |
# X Position | |
# Node Size | |
# Node Shape | |
# Node Color Saturation | |
# Node Coloe Value | |
# Node Hue | |
# Model with Distance from center of screen only | |
rf1.nodes.13b <- randomForest(selected ~ ., data=expd.nodes.8[,c(-5,-6,-11,-12,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.13b) # 24.21% variance explained | |
varImpPlot(rf1.nodes.13b) | |
# Network | |
# Distance from Center of Screen | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Coloe Value | |
# Node Shape | |
# Node Hue | |
# Model with clustering coefficient only | |
rf1.nodes.13c <- randomForest(selected ~ ., data=expd.nodes.8[,c(-5,-6,-11,-12,-13,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.13c) # 18.4% variance explained | |
varImpPlot(rf1.nodes.13c) | |
# Network | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Coloe Value | |
# Node Shape | |
# Node Hue | |
# Clustering Coefficient | |
# "Default" Model without Network included, but including XY Positions | |
rf1.nodes.14 <- randomForest(selected ~ ., data=expd.nodes.6[,c(-5,-6,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.14) | |
varImpPlot(rf1.nodes.14) | |
# Encodings only model | |
rf1.nodes.15 <- randomForest(selected ~ ., data=expd.nodes.6[,c(-2,-5,-6,-11,-12,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.15) # 21.96% variance explained | |
varImpPlot(rf1.nodes.15) | |
# Importances: | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Color Value | |
# Node Shape | |
# Node Color Hue | |
# Encodings + Network only model | |
rf1.nodes.15a <- randomForest(selected ~ ., data=expd.nodes.6[,c(-5,-6,-11,-12,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.15a) #18.25% variance explained | |
varImpPlot(rf1.nodes.15a) | |
# Importances: | |
# Network | |
# Node Border Width | |
# Node Size | |
# Node Color Saturation | |
# Node Color Value | |
# Node Shape | |
# Node Color Hue | |
expd.nodes.9 <- cbind(expd.nodes.5, communityGroupData[as.numeric(row.names(expd.nodes.5))], nodeDegreeEigenvectorData[as.numeric(row.names(expd.nodes.5))] ) | |
names(expd.nodes.9) <- c(names(expd.nodes.9)[1:14], "communityGroup", "EigenvectorCentality") | |
# everything in model except numConnected | |
rf1.nodes.16 <- randomForest(selected ~ ., data=expd.nodes.9[which(!is.na(expd.nodes.9[,16])),c(-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.16b <- randomForest(selected ~ ., data=expd.nodes.9[which(!is.na(expd.nodes.9[,16])),c(-5,-6)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.16) # 26.92% variance explained (with the addition of numConnected) | |
print(rf1.nodes.16b) # 27.14% variance explained (without numConnected) | |
varImpPlot(rf1.nodes.16) | |
# Network | |
# Node Border Width | |
# Y Position | |
# Distance from Center of Screen | |
# Node Size | |
# X Position | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Eigenvector Centrality | |
# Node Color Hue | |
# Number of Connected Nodes (Degree Centrality) | |
# Community Group | |
# Clustering Coefficient | |
varImpPlot(rf1.nodes.16b) | |
# Network | |
# Node Border Width | |
# Y Position | |
# Distance from Center of Screen | |
# Node Size | |
# X Position | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Eigenvector Centrality | |
# Node Color Hue | |
# Community Group | |
# Clustering Coefficient | |
# Model with EigenCentrality and Distance from center of screen and Network | |
rf1.nodes.17 <- randomForest(selected ~ ., data=expd.nodes.9[which(!is.na(expd.nodes.9[,16])),c(-5,-6,-11,-12,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.17) # 24.04 % Variance Explained | |
varImpPlot(rf1.nodes.17) | |
# Network | |
# Distance from Center of Screen | |
# Node Border Width | |
# Node Size | |
# Eigenvector Centrality | |
# Node Color Saturation | |
# Node Shape | |
# Node Color Value | |
# Node Color Hue | |
# Model with EigenCentrality and XY Position and Network | |
rf1.nodes.17a <- randomForest(selected ~ ., data=expd.nodes.9[which(!is.na(expd.nodes.9[,16])),c(-5,-6,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.17a1 <- randomForest(selected ~ ., data=expd.nodes.9[which( !is.na(expd.nodes.9[,16]) & !(expd.nodes.9$yposition > 1) ),c(-5,-6,-13,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.17a) # 27.02% Variance Explained | |
print(rf1.nodes.17a1) # 26.7% Variance Explained | |
varImpPlot(rf1.nodes.17a) | |
# Network | |
# Y Position | |
# Node Border Width | |
# X Position | |
# Node Size | |
# Eigenvector Centrality | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Node Color Hue | |
varImpPlot(rf1.nodes.17a1) | |
write.table(rf1.nodes.17a$importance, file="~/Documents/Evaluation of Importance Experiment/node_importance17a.csv", quote = F, row.names = T, col.names = T, sep=",") | |
nlabs.17a <- rev(c("Network", "Y Coordinate Position", "Node Border Size", "X Coordinate Position", "Node Size", "Eigenvector Centrality", "Node Shape", "Node Saturation", "Node Value", "Node Hue")) | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodeVarImp2.jpg", res=300, height = 1000, width=1500) | |
varImpPlot(rf1.nodes.17a,type=2, main = "Node Variable Importance", labels=nlabs.17a) | |
dev.off() | |
rf1.nodes.17a$importance[order(rf1.nodes.17a$importance[,2], decreasing = T),] | |
# Model with EigenCentrality and Distance from center of screen and clusteringCoefficient and Network | |
rf1.nodes.17b <- randomForest(selected ~ ., data=expd.nodes.9[which(!is.na(expd.nodes.9[,16])),c(-5,-6,-13,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.17b) # 26.82 % Variance Explained | |
varImpPlot(rf1.nodes.17b) | |
# Network | |
# Y Position | |
# Node Border Width | |
# X Position | |
# Node Size | |
# Eigenvector Centrality | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Node Color Hue | |
# Clustering Coefficient | |
# Model with EigenCentrality and DistanceFromCenter and Network | |
rf1.nodes.17c <- randomForest(selected ~ ., data=expd.nodes.9[which(!is.na(expd.nodes.9[,16])),c(-5,-6,-11,-12,-14,-15)], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.nodes.17c) # 23.93% Variance Explained | |
varImpPlot(rf1.nodes.17c) | |
# Network | |
# DistanceFromCenter | |
# Node Border Width | |
# Node Size | |
# Eigenvector Centrality | |
# Node Shape | |
# Node Color Saturation | |
# Node Color Value | |
# Node Color Hue | |
# Node Encodings Only Model / Reaction Time? | |
# Trying to see if I can't calculate edge lengths | |
determineEdges <- function(networkIn) { | |
#return( is.na(networkIn$xposition) ) | |
return( networkIn$eletype == "edge" ) | |
} | |
determineNodes <- function(networkIn) { | |
# return( !is.na(networkIn$xposition) ) | |
return( networkIn$eletype == "node" ) | |
} | |
eucDist <- function(x1,y1,x2,y2) { | |
return( sqrt( (y2-y1)^2+(x2-x1)^2 ) ) | |
} | |
# Get networks | |
edgeLengthData <- c() | |
for (u in unique(expd.dat$user)) { | |
#subFrame <- expd.dat[which(expd.dat$user == u),] | |
for (n in unique(expd.dat$network)) { | |
#networkAtHand <- which(subFrame$network == n) | |
networkAtHand <- which(expd.dat$network == n & expd.dat$user == u) | |
if (length(networkAtHand) == 0) { | |
next | |
} | |
edgesInNet <- cbind(expd.dat[networkAtHand,][determineEdges(expd.dat[networkAtHand,]),]$elesource, | |
expd.dat[networkAtHand,][determineEdges(expd.dat[networkAtHand,]),]$eletarget, | |
expd.dat[networkAtHand,][determineEdges(expd.dat[networkAtHand,]),]$eleid) | |
edgeLengths <- apply(edgesInNet, MAR=1, FUN=function(row) { | |
eucDist(expd.dat[row[1],]$xposition / expd.dat[row[1],]$windowwidth, | |
expd.dat[row[1],]$yposition / expd.dat[row[1],]$windowheight, | |
expd.dat[row[2],]$xposition / expd.dat[row[2],]$windowwidth, | |
expd.dat[row[2],]$yposition / expd.dat[row[2],]$windowheight) | |
}) | |
# cat(u, n, '\n') | |
# print(edgeLengths) | |
for (e in 1:(sum(determineNodes(expd.dat[networkAtHand,])) + length(edgeLengths))) { | |
if (e %in% which(determineNodes(expd.dat[networkAtHand,]))) { | |
edgeLengthData <- rbind(edgeLengthData, c(u, n, e, NA)) | |
} | |
else { | |
edgeLengthData <- rbind(edgeLengthData, c(u, n, edgesInNet[e - sum(determineNodes(expd.dat[networkAtHand,])),3], edgeLengths[e - sum(determineNodes(expd.dat[networkAtHand,]))])) | |
} | |
} | |
} | |
} | |
edgeLengthData <- c() | |
for (i in 1:dim(expd.dat)[1]) { | |
if (expd.dat[i,]$eletype == "edge") { | |
#cat(expd.dat[i,]$network, expd.dat[i,]$user, '\n') | |
networkAtHand <- which(expd.dat$network == expd.dat[i,]$network & expd.dat$user == expd.dat[i,]$user) | |
sourceNode <- expd.dat[i,]$elesource | |
targetNode <- expd.dat[i,]$eletarget | |
edgeLength <- eucDist(expd.dat[networkAtHand[sourceNode],]$xposition / expd.dat[i,]$windowwidth, | |
expd.dat[networkAtHand[sourceNode],]$yposition / expd.dat[i,]$windowheight, | |
expd.dat[networkAtHand[targetNode],]$xposition / expd.dat[i,]$windowwidth, | |
expd.dat[networkAtHand[targetNode],]$yposition / expd.dat[i,]$windowheight) | |
#cat(expd.dat[i,]$network, expd.dat[i,]$user, edgeLength, '\n') | |
edgeLengthData <- cbind(edgeLengthData, edgeLength) | |
} | |
else if (expd.dat[i,]$eletype == "node") { | |
edgeLengthData <- cbind(edgeLengthData, NA) | |
} | |
else { | |
# node | |
print("ERROR") | |
} | |
} | |
dim(expd.dat) | |
dim(edgeLengthData) | |
# expd.dat.edgelength <- cbind(expd.dat, edgeLengthData[,3], edgeLengthData[,4]) | |
# names(expd.dat.edgelength) <- c(names(expd.dat.edgelength)[1:47], "SanityEleID", "edgeLength") | |
expd.dat.edgelength <- cbind(expd.dat, as.vector(edgeLengthData)) | |
names(expd.dat.edgelength) <- c(names(expd.dat.edgelength)[1:47], "edgeLength") | |
# Edge Encodings Only Model / Selection | |
###expd.dat.edges.balanced2 <- expd.dat.edgelength[which(expd.dat.edgelength$X_id %in% expd.dat.edges.balanced$X_id),] | |
# expd.dat.edges.balanced2 <- merge(expd.dat.edgelength, expd.dat.edges.balanced, by=c("X_id", "X_id"))[,1:49] | |
# names(expd.dat.edges.balanced2) <- names(expd.dat.edgelength) | |
# expd.edges.bak <- expd.edges | |
# expd.edges <- expd.edges.bak # run this to go back to normal***** | |
#expd.edges <- expd.dat.edgelength[which(expd.edges$X_id %in% expd.dat.edgelength$X_id),] # Refer to line 171 of the code | |
expd.dat.edges.balanced2 <- sampleBalancedData(expd.dat.edgelength, "edges") | |
expd.edges <- expd.dat.edges.balanced2 | |
expd.edges <- expd.edges[-which(expd.edges$question4 == "colorBlind"),] # Removing colorblind | |
expd.edges <- expd.edges[which(!is.na(expd.edges$selected)),] # Removing NA | |
#expd.edges.1 <- data.frame(expd.edges[,c(3, 14, 21, 23, 45:47)]) | |
# expd.edges.1 <- data.frame(expd.edges[,c(9, 14, 21, 23, 45:47)]) #for bak | |
expd.edges.1 <- data.frame(expd.edges[,c(9, 14, 21, 23, 45:47, 48)]) | |
expd.edges.1$linestyle <- as.factor(expd.edges.1$linestyle) | |
expd.edges.1$network <- as.factor(expd.edges.1$network) | |
## Downsampling is necessary if minority class is 15% or less | |
# Calculating minorty class presence | |
sum(expd.edges.1$selected == 1) / length(expd.edges.1$selected) | |
# Shows us that minority class is 16.9%, which is borderline! | |
#multi.collinear(expd.edges.1[,c(-4)]) | |
multi.collinear(expd.edges.1[,c(-1,-4)]) | |
rf1.edges.1 <- randomForest(selected ~ ., data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
print(rf1.edges.1) | |
rf1.edges.1$importance | |
write.table(rf1.edges.1$importance, file="~/Documents/Evaluation of Importance Experiment/edge_importance.csv", quote = F, row.names = T, col.names = T, sep=",") | |
varImpPlot(rf1.edges.1,type=2) | |
elabs <- rev(c("Edge Width", "Network", "Edge Saturation", "Edge Value", "Edge Length", "Edge Hue", "Edge Pattern")) | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgeVarImp.jpg", res=300, height = 1000, width=1500) | |
varImpPlot(rf1.edges.1,type=2, main = "Edge Variable Importance", labels=elabs) | |
dev.off() | |
rf1.edges.1.p <- classCenter(expd.edges.1[,c(-2)], expd.edges.1[,2], rf1.edges.1$proximity) | |
write.table(rf1.edges.1.p, file="~/Documents/Evaluation of Importance Experiment/edge_prototype.csv", quote = F, row.names = T, col.names = T, sep=",") | |
# Edge Encodings Only Model / Reaction Time? | |
# negative value means the mean error is larger than the variance of the response | |
# y. This could be because the predictor performs really poorly but also | |
# because of some calibration issue. | |
# Try to attach demographic information to the DF and see how that affects selection | |
# cbind(expd.both, surveydata) | |
# NOTE THAT THE TREE IS UNBALANCED RIGHT NOW, AND MUST BE SAMPLED | |
# BALANCED BEFORE RESULTS ARE RELIABLE | |
rf1.perf.nodes = performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"tpr","fpr") | |
rf1.perf.edges = performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"tpr","fpr") | |
rf1.perf.nodes.2 = performance( prediction(labels = expd.nodes.2$selected, predictions = rf1.nodes.2$predicted) ,"tpr","fpr") | |
#rf1.perf.edges.2 = performance( prediction(labels = expd.edges.2$selected, predictions = rf1.edges.2$predicted) ,"tpr","fpr") | |
rf1.perf.nodes.3 = performance( prediction(labels = expd.nodes.3$selected, predictions = rf1.nodes.3$predicted) ,"tpr","fpr") | |
#rf1.perf.edges.3 = performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"tpr","fpr") | |
rf1.perf.nodes.4 = performance( prediction(labels = expd.nodes.2$selected, predictions = rf1.nodes.2wr$predicted) ,"tpr","fpr") | |
rf1.perf.nodes.5 = performance( prediction(labels = expd.nodes.4$selected, predictions = rf1.nodes.4$predicted) ,"tpr","fpr") | |
rf1.perf.nodes.17a = performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"tpr","fpr") | |
par(mfrow=c(1,2)) | |
#plot the curve | |
plot(rf1.perf.nodes,main="ROC Curve for Random Forest (Nodes)",col=2,lwd=2) | |
lines(unlist(rf1.perf.nodes@x.values),unlist(rf1.perf.nodes@y.values), col=4, lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
par(mfrow=c(1,2)) | |
#plot the curve | |
plot(rf1.perf.nodes.17a,main="ROC Curve for Random Forest (Nodes)",col=2,lwd=2) | |
lines(unlist(rf1.perf.nodes.17a@x.values),unlist(rf1.perf.nodes.17a@y.values), col=4, lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
# plot(rf1.perf.nodes.2,main="ROC Curve for Random Forest (Nodes)",col=2,lwd=2) | |
# lines(unlist(rf1.perf.nodes.2@x.values),unlist(rf1.perf.nodes.2@y.values), col=4, lwd=2) | |
# abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
# plot(rf1.perf.nodes.3,main="ROC Curve for Random Forest (Nodes)",col=2,lwd=2) | |
# lines(unlist(rf1.perf.nodes.3@x.values),unlist(rf1.perf.nodes.3@y.values), col=4, lwd=2) | |
# abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
# Originally in chapter | |
# jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodesROC.jpg", res=300, height = 1500, width=1200) | |
# plot(rf1.perf.nodes,main="ROC Curve for Nodes (AUC = 0.78)",col=2,lwd=2) | |
# abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
# dev.off() | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodesROC2.jpg", res=300, height = 1500, width=1200) | |
plot(rf1.perf.nodes.17a,main="ROC Curve for Nodes (AUC = 0.80)",col=2,lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
dev.off() | |
plot(rf1.perf.edges,main="ROC Curve for Random Forest (Edges)",col=2,lwd=2) | |
lines(unlist(rf1.perf.edges@x.values),unlist(rf1.perf.edges@y.values), col=4, lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
#jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgesROC.jpg", res=300, height = 1500, width=1200) | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgesROC2.jpg", res=300, height = 1500, width=1200) | |
plot(rf1.perf.edges,main="ROC Curve for Edges (AUC = 0.86)",col=2,lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
dev.off() | |
#compute area under curve | |
auc.rf1.nodes <- performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"auc") | |
auc.rf1.nodes <- unlist(slot(auc.rf1.nodes, "y.values")) | |
minauc<-min(round(auc.rf1.nodes, digits = 2)) | |
maxauc<-max(round(auc.rf1.nodes, digits = 2)) | |
minauct <- paste(c("min(AUC) = "),minauc,sep="") | |
maxauct <- paste(c("max(AUC) = "),maxauc,sep="") | |
minauct | |
maxauct | |
auc.rf1.edges <- performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"auc") | |
auc.rf1.edges <- unlist(slot(auc.rf1.edges, "y.values")) | |
minauc<-min(round(auc.rf1.edges, digits = 2)) | |
maxauc<-max(round(auc.rf1.edges, digits = 2)) | |
minauct <- paste(c("min(AUC) = "),minauc,sep="") | |
maxauct <- paste(c("max(AUC) = "),maxauc,sep="") | |
minauct | |
maxauct | |
auc.rf1.nodes.2 <- performance( prediction(labels = expd.nodes.2$selected, predictions = rf1.nodes.2$predicted) ,"auc") | |
auc.rf1.nodes.2 <- unlist(slot(auc.rf1.nodes.2, "y.values")) | |
# > max(round(auc.rf1.nodes.2, digits = 2)) | |
# [1] 0.81 | |
auc.rf1.nodes.3 <- performance( prediction(labels = expd.nodes.3$selected, predictions = rf1.nodes.3$predicted) ,"auc") | |
auc.rf1.nodes.3 <- unlist(slot(auc.rf1.nodes.3, "y.values")) | |
# > max(round(auc.rf1.nodes.3, digits = 2)) | |
# [1] 0.81 | |
auc.rf1.nodes.4 <- performance( prediction(labels = expd.nodes.2$selected, predictions = rf1.nodes.2wr$predicted) ,"auc") | |
auc.rf1.nodes.4 <- unlist(slot(auc.rf1.nodes.4, "y.values")) | |
# > max(round(auc.rf1.nodes.4, digits = 2)) | |
# [1] 0.81 | |
# Including radius distance and XY lowers performance from 0.86 to 0.81. | |
auc.rf1.nodes.5 <- performance( prediction(labels = expd.nodes.4$selected, predictions = rf1.nodes.4$predicted) ,"auc") | |
auc.rf1.nodes.5 <- unlist(slot(auc.rf1.nodes.5, "y.values")) | |
# > max(round(auc.rf1.nodes.5, digits = 2)) | |
# [1] 0.81 | |
auc.rf1.nodes.6 <- performance( prediction(labels = expd.nodes.4$selected, predictions = rf1.nodes.5$predicted) ,"auc") | |
auc.rf1.nodes.6 <- unlist(slot(auc.rf1.nodes.6, "y.values")) | |
# > max(round(auc.rf1.nodes.5, digits = 2)) | |
# [1] 0.79 | |
auc.rf1.nodes.7 <- performance( prediction(labels = expd.nodes.5$selected, predictions = rf1.nodes.6$predicted) ,"auc") | |
auc.rf1.nodes.7 <- unlist(slot(auc.rf1.nodes.7, "y.values")) | |
# > max(round(auc.rf1.nodes.5, digits = 2)) | |
# [1] 0.77 | |
auc.rf1.nodes.8 <- performance( prediction(labels = expd.nodes.5$selected, predictions = rf1.nodes.7$predicted) ,"auc") | |
auc.rf1.nodes.8 <- unlist(slot(auc.rf1.nodes.8, "y.values")) | |
# > max(round(auc.rf1.nodes.8, digits = 2)) | |
# [1] 0.81 | |
auc.rf1.nodes.9 <- performance( prediction(labels = expd.nodes.6$selected[which(!is.na(expd.nodes.6[,15]))], predictions = rf1.nodes.8$predicted) ,"auc") | |
auc.rf1.nodes.9 <- unlist(slot(auc.rf1.nodes.9, "y.values")) | |
max(round(auc.rf1.nodes.9, digits = 2)) | |
# [1] 0.77 | |
auc.rf1.nodes.9a <- performance( prediction(labels = expd.nodes.6a$selected[which(!is.na(expd.nodes.6a[,15]))], predictions = rf1.nodes.8a$predicted) ,"auc") | |
auc.rf1.nodes.9a <- unlist(slot(auc.rf1.nodes.9a, "y.values")) | |
max(round(auc.rf1.nodes.9a, digits = 2)) | |
# [1] 0.77 | |
auc.rf1.nodes.9b <- performance( prediction(labels = expd.nodes.6a$selected[which(!is.na(expd.nodes.6a[,16]))], predictions = rf1.nodes.8b$predicted) ,"auc") | |
auc.rf1.nodes.9b <- unlist(slot(auc.rf1.nodes.9b, "y.values")) | |
max(round(auc.rf1.nodes.9b, digits = 2)) | |
# [1] 0.77 | |
auc.rf1.nodes.9c <- performance( prediction(labels = expd.nodes.6a$selected[which(!is.na(expd.nodes.6a[,17]))], predictions = rf1.nodes.8c$predicted) ,"auc") | |
auc.rf1.nodes.9c <- unlist(slot(auc.rf1.nodes.9c, "y.values")) | |
max(round(auc.rf1.nodes.9c, digits = 2)) | |
# [1] 0.77 | |
auc.rf1.nodes.10 <- performance( prediction(labels = expd.nodes.6$selected[which(!is.na(expd.nodes.6[,15]))], predictions = rf1.nodes.9$predicted) ,"auc") | |
auc.rf1.nodes.10 <- unlist(slot(auc.rf1.nodes.10, "y.values")) | |
# > max(round(auc.rf1.nodes.10, digits = 2)) | |
# [1] 0.80 | |
auc.rf1.nodes.11 <- performance( prediction(labels = expd.nodes.6$selected, predictions = rf1.nodes.11$predicted) ,"auc") | |
auc.rf1.nodes.11 <- unlist(slot(auc.rf1.nodes.11, "y.values")) | |
# > max(round(auc.rf1.nodes.11, digits = 2)) | |
# [1] 0.75 | |
auc.rf1.nodes.12 <- performance( prediction(labels = expd.nodes.8$selected, predictions = rf1.nodes.12$predicted) ,"auc") | |
auc.rf1.nodes.12 <- unlist(slot(auc.rf1.nodes.12, "y.values")) | |
max(round(auc.rf1.nodes.12, digits = 2)) | |
# [1] 0.75 | |
auc.rf1.nodes.13 <- performance( prediction(labels = expd.nodes.8$selected, predictions = rf1.nodes.13$predicted) ,"auc") | |
auc.rf1.nodes.13 <- unlist(slot(auc.rf1.nodes.13, "y.values")) | |
# > max(round(auc.rf1.nodes.13, digits = 2)) | |
# [1] 0.81 | |
auc.rf1.nodes.13a <- performance( prediction(labels = expd.nodes.8$selected, predictions = rf1.nodes.13a$predicted) ,"auc") | |
auc.rf1.nodes.13a <- unlist(slot(auc.rf1.nodes.13a, "y.values")) | |
max(round(auc.rf1.nodes.13a, digits = 2)) | |
# [1] 0.80 | |
auc.rf1.nodes.13b <- performance( prediction(labels = expd.nodes.8$selected, predictions = rf1.nodes.13b$predicted) ,"auc") | |
auc.rf1.nodes.13b <- unlist(slot(auc.rf1.nodes.13b, "y.values")) | |
max(round(auc.rf1.nodes.13b, digits = 2)) | |
# [1] 0.78 | |
auc.rf1.nodes.13c <- performance( prediction(labels = expd.nodes.8$selected, predictions = rf1.nodes.13c$predicted) ,"auc") | |
auc.rf1.nodes.13c <- unlist(slot(auc.rf1.nodes.13c, "y.values")) | |
max(round(auc.rf1.nodes.13c, digits = 2)) | |
# [1] 0.75 | |
auc.rf1.nodes.14 <- performance( prediction(labels = expd.nodes.6$selected, predictions = rf1.nodes.14$predicted) ,"auc") | |
auc.rf1.nodes.14 <- unlist(slot(auc.rf1.nodes.14, "y.values")) | |
# > max(round(auc.rf1.nodes.14, digits = 2)) | |
# [1] 0.80 | |
auc.rf1.nodes.15 <- performance( prediction(labels = expd.nodes.6$selected, predictions = rf1.nodes.15$predicted) ,"auc") | |
auc.rf1.nodes.15 <- unlist(slot(auc.rf1.nodes.15, "y.values")) | |
# > max(round(auc.rf1.nodes.15, digits = 2)) | |
# [1] 0.77 | |
auc.rf1.nodes.16 <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.16$predicted) ,"auc") | |
auc.rf1.nodes.16 <- unlist(slot(auc.rf1.nodes.16, "y.values")) | |
# > max(round(auc.rf1.nodes.16, digits = 2)) | |
# [1] 0.80 | |
auc.rf1.nodes.16 <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.16$predicted) ,"auc") | |
auc.rf1.nodes.16 <- unlist(slot(auc.rf1.nodes.16, "y.values")) | |
# > max(round(auc.rf1.nodes.16, digits = 2)) | |
# [1] 0.80 | |
auc.rf1.nodes.17 <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17$predicted) ,"auc") | |
auc.rf1.nodes.17 <- unlist(slot(auc.rf1.nodes.17, "y.values")) | |
max(round(auc.rf1.nodes.17, digits = 2)) | |
# [1] 0.78 (DistanceFromCenter + EigenCentrality) | |
auc.rf1.nodes.17a <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"auc") | |
auc.rf1.nodes.17a <- unlist(slot(auc.rf1.nodes.17a, "y.values")) | |
max(round(auc.rf1.nodes.17a, digits = 2)) | |
# [1] 0.80 (XY + EigenCentrality) | |
auc.rf1.nodes.17a1 <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[, 16]) & !(expd.nodes.9$yposition > 1))], predictions = rf1.nodes.17a1$predicted) ,"auc") | |
auc.rf1.nodes.17a1 <- unlist(slot(auc.rf1.nodes.17a1, "y.values")) | |
max(round(auc.rf1.nodes.17a1, digits = 2)) | |
# [1] 0.80 (XY + EigenCentrality) | |
auc.rf1.nodes.17b <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17b$predicted) ,"auc") | |
auc.rf1.nodes.17b <- unlist(slot(auc.rf1.nodes.17b, "y.values")) | |
max(round(auc.rf1.nodes.17b, digits = 2)) | |
# [1] 0.80 (XY + EigenCentrality + ClusteringCoefficient) | |
auc.rf1.nodes.17c <- performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17c$predicted) ,"auc") | |
auc.rf1.nodes.17c <- unlist(slot(auc.rf1.nodes.17c, "y.values")) | |
max(round(auc.rf1.nodes.17c, digits = 2)) | |
# [1] 0.78 (DistanceFromCenter + EigenCentrality) | |
# Younden's Index (https://en.wikipedia.org/wiki/Youden%27s_J_statistic) | |
# Nodes | |
# TPR + TNR - 1 (http://stats.stackexchange.com/questions/29719/how-to-determine-best-cutoff-point-and-its-confidence-interval-using-roc-curve-i) | |
# tpr.nodes.1 <- unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"tpr"), "y.values") ) | |
# tnr.nodes.1 <- unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"tnr"), "y.values") ) | |
# yi.nodes.1 <- tpr.nodes.1 + tnr.nodes.1 - 1 | |
# tpr.nodes.1[which( (yi.nodes.1) == max(yi.nodes.1))] | |
# tnr.nodes.1[which( (yi.nodes.1) == max(yi.nodes.1))] | |
# | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"err"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"ppv"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"npv"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"cal"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"acc"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"fpr"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
# unlist(slot(performance( prediction(labels = expd.nodes.1$selected, predictions = rf1.nodes.1$predicted) ,"fnr"), "y.values") )[which( (yi.nodes.1) == max(yi.nodes.1))[1]] | |
tpr.nodes.17a <- unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"tpr"), "y.values") ) | |
tnr.nodes.17a <- unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"tnr"), "y.values") ) | |
yi.nodes.17a <- tpr.nodes.17a + tnr.nodes.17a - 1 | |
tpr.nodes.17a[which( (yi.nodes.17a) == max(yi.nodes.17a))] | |
tnr.nodes.17a[which( (yi.nodes.17a) == max(yi.nodes.17a))] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"err"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"ppv"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"npv"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"cal"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"acc"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"fpr"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
unlist(slot(performance( prediction(labels = expd.nodes.9$selected[which(!is.na(expd.nodes.9[,16]))], predictions = rf1.nodes.17a$predicted) ,"fnr"), "y.values") )[which( (yi.nodes.17a) == max(yi.nodes.17a))[1]] | |
# Edges | |
# TPR + TNR - 1 | |
tpr.edges.1 <- unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"tpr"), "y.values") ) | |
tnr.edges.1 <- unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"tnr"), "y.values") ) | |
yi.edges.1 <- tpr.edges.1 + tnr.edges.1 - 1 | |
tpr.edges.1[which( (yi.edges.1) == max(yi.edges.1))] | |
tnr.edges.1[which( (yi.edges.1) == max(yi.edges.1))] | |
unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"err"), "y.values") )[which( (yi.edges.1) == max(yi.edges.1))] | |
unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"ppv"), "y.values") )[which( (yi.edges.1) == max(yi.edges.1))] | |
unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"npv"), "y.values") )[which( (yi.edges.1) == max(yi.edges.1))] | |
unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"cal"), "y.values") )[which( (yi.edges.1) == max(yi.edges.1))] | |
unlist(slot(performance( prediction(labels = expd.edges.1$selected, predictions = rf1.edges.1$predicted) ,"acc"), "y.values") )[which( (yi.edges.1) == max(yi.edges.1))] | |
# Forest Floor | |
library(forestFloor) | |
# Originally used | |
# ff = forestFloor( | |
# rf.fit = rf1.nodes.1, # mandatory | |
# X = expd.nodes.1, # mandatory | |
# calc_np = FALSE, # TRUE or FALSE both works, makes no difference | |
# binary_reg = FALSE # takes no effect here when rfo$type="regression" | |
# ) | |
ff.2 = forestFloor( | |
rf.fit = rf1.nodes.17a, # mandatory | |
X = expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], # mandatory | |
calc_np = FALSE, # TRUE or FALSE both works, makes no difference | |
binary_reg = FALSE # takes no effect here when rfo$type="regression" | |
) | |
ff.2a = forestFloor( | |
rf.fit = rf1.nodes.17a1, # mandatory | |
X = expd.nodes.9[which( !is.na(expd.nodes.9[,16]) & !(expd.nodes.9$yposition > 1) ),], # mandatory | |
calc_np = FALSE, # TRUE or FALSE both works, makes no difference | |
binary_reg = FALSE # takes no effect here when rfo$type="regression" | |
) | |
ffe = forestFloor( | |
rf.fit = rf1.edges.1, # mandatory | |
X = expd.edges.1, # mandatory | |
calc_np = FALSE, # TRUE or FALSE both works, makes no difference | |
binary_reg = FALSE # takes no effect here when rfo$type="regression" | |
) | |
#plot partial functions of most important variables first | |
# Original code | |
# plot(ff, # forestFloor object | |
# plot_seq = 1:9, # optional sequence of features to plot | |
# orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
# col=ifelse(ff$Y, "red", "gray") | |
# ) | |
plot(ff.2, # forestFloor object | |
plot_seq = 1:16, # optional sequence of features to plot | |
orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
col=ifelse(ff.2$Y, "red", "gray") | |
) | |
plot(ff.2a, # forestFloor object | |
plot_seq = c(1:6,8:9), # optional sequence of features to plot | |
orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
col=ifelse(ff.2a$Y, "red", "gray") | |
) | |
# jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodesPartial.jpg", res=300, height = 3000, width=3000) | |
# plot(ff, # forestFloor object | |
# plot_seq = 1:9, # optional sequence of features to plot | |
# orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
# col=ifelse(ff$Y, "red", "gray") | |
# ) | |
# dev.off() | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodesPartial2.jpg", res=300, height = 3000, width=3000) | |
plot(ff.2a, # forestFloor object | |
plot_seq = c(1:6,9), # optional sequence of features to plot | |
orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
col=ifelse(ff.2a$Y, "red", "gray") | |
) | |
dev.off() | |
plot(ffe, # forestFloor object | |
plot_seq = c(1:5,7), # optional sequence of features to plot | |
orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
col=ifelse(ffe$Y, "red", "gray") | |
) | |
# jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgesPartial.jpg", res=300, height = 3000, width=3000) | |
# plot(ffe, # forestFloor object | |
# plot_seq = 1:9, # optional sequence of features to plot | |
# orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
# col=ifelse(ff$Y, "red", "gray") | |
# ) | |
# dev.off() | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgesPartial2.jpg", res=300, height = 3000, width=3000) | |
plot(ffe, # forestFloor object | |
plot_seq = c(1:3,7), # optional sequence of features to plot | |
orderByImportance=TRUE, # if TRUE index sequence by importance, else by X column | |
col=ifelse(ffe$Y, "red", "gray") | |
) | |
dev.off() | |
# Barplot of the selected networks | |
barplot(table(expd.nodes.1$network[(expd.nodes.1$selected == 1)]), las=2, horiz = T) | |
par(mfrow=c(3,3)) | |
for(i in 1:9) partialPlot(rf1.nodes.1,expd.nodes.1,x.var=eval(names(expd.nodes.1)[i])) | |
partialPlot(rf1.nodes.1, expd.nodes.1, nodeshape) | |
partialPlot(rf1.nodes.1, expd.nodes.1, nodeheight) | |
partialPlot(rf1.nodes.1, expd.nodes.1, nodeHue) | |
partialPlot(rf1.nodes.1, expd.nodes.1, nodeheight, rug=T, ylim=c(0,1)) | |
points(expd.nodes.1[,3], predict(rf1.nodes.1, expd.nodes.1[,-6]), col=ifelse(expd.nodes.1[,4] == 1, "red", "gray")) | |
partialPlot(rf1.nodes.1, expd.nodes.1, nodeborderwidth, rug=T, ylim=c(0,1)) | |
points(expd.nodes.1[,7], predict(rf1.nodes.1, expd.nodes.1[,-6]), col=ifelse(expd.nodes.1[,4] == 1, "red", "gray")) | |
partialPlot(rf1.nodes.1, expd.nodes.1, numConnected, rug=T, ylim=c(0,1)) | |
points(expd.nodes.1[,5], predict(rf1.nodes.1, expd.nodes.1[,-6]), col=ifelse(expd.nodes.1[,4] == 1, "red", "gray")) | |
par(mfrow=c(2,4)) | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], nodeheight, main = "Node Size") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], nodeborderwidth, main = "Node Border Width") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], nodeHue, main = "Node Color Hue") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], nodeSaturation, main = "Node Color Saturation") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], nodeValue, main = "Node Color Value") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], xposition, main = "X Coordinate Position") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], yposition, main = "Y Coordinate Position") | |
partialPlot(rf1.nodes.17a, expd.nodes.9[which(!is.na(expd.nodes.9[,16])),], EigenvectorCentality, main = "Eigenvector Centrality") | |
dev.off() | |
library(rfPermute) | |
library(ggRandomForests) | |
##ggRandomForest | |
library(ggplot2) | |
library(RColorBrewer) | |
library(plot3D) | |
library(dplyr) | |
library(reshape) | |
library(reshape2) | |
library(randomForestSRC) | |
library(gridExtra) | |
theme_set(theme_bw()) | |
event.marks <- c(1,4) | |
event.labels <- c(FALSE, TRUE) | |
strCol <- brewer.pal(3, "Set1")[c(2,1,3)] | |
expd.nodes.1.melted <- melt(expd.nodes.1, id.vars=c("nodeshape", "selected")) | |
ggplot(expd.nodes.1.melted, aes(x=nodeshape, y=value, color=factor(selected)))+ | |
geom_point(alpha=.4)+ | |
geom_rug(data=expd.nodes.1.melted %>% filter(is.na(value)))+ | |
# labs(y="", x=nodeshape) + | |
scale_color_brewer(palette="Set2")+ | |
facet_wrap(~variable, scales="free_y", ncol=3) | |
rfsc_selected <- rfsrc(selected ~ ., data=expd.nodes.1) | |
#plot OOB against growth of forests | |
gg_e <- gg_error(rfsc_selected) | |
plot(gg_e) | |
# VIMP | |
plot(gg_vimp(rfsc_selected)) | |
# Minimal Depth | |
varsel_node <- var.select(rfsc_selected) | |
gg_md <- gg_minimal_depth(varsel_node) | |
plot(gg_md) | |
# Compare VIMP and Minimal depth | |
plot(gg_minimal_vimp(gg_md)) | |
# Variable Dependence (this can theoretically be generated by plotting the results from RF for each variable) | |
gg_v <- gg_variable(rfsc_selected) | |
xvar <- gg_md$topvars | |
plot(gg_v, xvar=xvar, panel=TRUE, | |
alpha=.4) | |
# labs(y=selected, x="") | |
# Partial Dependence (this is in the randomForest library and works there, so no need for this.) | |
partial_node <- plot.variable(rfsc_selected, xvar=gg_md$topvars, partial=TRUE, sorted=FALSE, show.plots=FALSE) | |
gg_p <- gg_partial(partial_node) | |
plot(gg_p, xvar=xvar, panel=TRUE) | |
interaction_nodes <- find.interaction(rfsc_selected) | |
plot(interaction_nodes, xvar=gg_md$topvars, panel=TRUE) | |
heatmap(interaction_nodes) | |
## Trying ICEbox | |
library(ICEbox) | |
nodes.ice1 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "nodeheight", frac_to_build = .1) | |
nodes.ice2 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "nodeHue", frac_to_build = .1) | |
nodes.ice3 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "nodeborderwidth", frac_to_build = .1) | |
nodes.ice4 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "numEdges", frac_to_build = .1) | |
#nodes.ice5 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "nodeshape", frac_to_build = .1) #doesn't handle factors...I may be able to make a dummy variable? | |
nodes.ice6 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "numConnected", frac_to_build = .1) | |
nodes.ice7 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "nodeValue", frac_to_build = .1) | |
nodes.ice8 <- ice(object = rf1.nodes.1, X = expd.nodes.1, y = expd.nodes.1$selected, predictor = "nodeSaturation", frac_to_build = .1) | |
nodes.dice1 <- dice(nodes.ice1) | |
nodes.dice2 <- dice(nodes.ice2) | |
nodes.dice3 <- dice(nodes.ice3) | |
nodes.dice4 <- dice(nodes.ice4) | |
#nodes.dice5 <- dice(nodes.ice5) | |
nodes.dice6 <- dice(nodes.ice6) | |
nodes.dice7 <- dice(nodes.ice7) | |
nodes.dice8 <- dice(nodes.ice8) | |
# ICE = individual conditional expectation curves | |
dev.off() | |
par(mfrow=c(2,4)) | |
plot(nodes.ice1) | |
plot(nodes.ice2) | |
plot(nodes.ice3) | |
plot(nodes.ice4) | |
plot(nodes.ice6) | |
plot(nodes.ice7) | |
plot(nodes.ice8) | |
# DICE = Estimates the partial derivative function for each curve in an ice object | |
dev.off() | |
par(mfrow=c(2,4)) | |
plot(nodes.dice1) | |
plot(nodes.dice2) | |
plot(nodes.dice3) | |
plot(nodes.dice4) | |
plot(nodes.dice6) | |
plot(nodes.dice7) | |
plot(nodes.dice8) | |
# Interactions | |
library(plotmo) | |
plotmo(rf1.nodes.1c, type="prob") | |
# plotmo(rf1.nodes.1) | |
plotmo(rf1.nodes.17a) | |
plotmo(rf1.edges.1) | |
plotmo(rf1.nodes.1, pt.col = heat.colors(12)) | |
# jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodesInteraction.jpg", res=300, height = 3000, width=3000) | |
# plotmo(rf1.nodes.1) | |
# dev.off() | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/nodesInteraction2.jpg", res=300, height = 3000, width=3000) | |
plotmo(rf1.nodes.17a) | |
dev.off() | |
# jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgesInteraction.jpg", res=300, height = 3000, width=3000) | |
# plotmo(rf1.edges.1) | |
# dev.off() | |
jpeg(filename="~/Documents/Evaluation of Importance Experiment/edgesInteraction2.jpg", res=300, height = 3000, width=3000) | |
plotmo(rf1.edges.1) | |
dev.off() | |
# Saving the RF Models | |
saveRDS(rf1.nodes.1, file="~/Documents/Evaluation of Importance Experiment/rf1.nodes.1.rds") | |
saveRDS(rf1.edges.1, file="~/Documents/Evaluation of Importance Experiment/rf1.edges.1.rds") | |
# to save entire session, use save.image() | |
# save.image(file="~/Documents/Evaluation of Importance Experiment/Aim3.RData") | |
# Qualitative Responses | |
unique(expd.dat.nodes.balanced[!is.na(expd.dat.nodes.balanced$participantResponse),]$participantResponse) | |
unique(expd.dat.edges.balanced[!is.na(expd.dat.edges.balanced$participantResponse),]$participantResponse) | |
# Calculate participation drop off? | |
table(expd.dat$user) | |
# Webers Law | |
calcWebersForRows <- function(rowIndexStart, rowIndexEnd, c) { | |
nodeCombs <- c() | |
for (i in rowIndexStart:rowIndexEnd) { | |
for (k in i:rowIndexEnd) { | |
cat(i,k,expd.dat[i,c],expd.dat[k,c], | |
calcWeber(expd.dat[i,c],expd.dat[k,c]),'\n') | |
nodeCombs <- rbind(nodeCombs, | |
c(i,k,expd.dat[i,c],expd.dat[k,c],calcWeber(expd.dat[i,c],expd.dat[k,c]))) | |
} | |
} | |
colnames(nodeCombs) <- c("index1", "index2", "value1", "value2", "K") | |
return(nodeCombs) | |
} | |
calcWeber <- function(a,b) { | |
return( abs(a - b) / b ) | |
} | |
calcWebersForRows(1, 4, 35) | |
calcWebersForRows(8, 11, 42) | |
for (n in unique(expd.dat[which(expd.dat$eletype == "node"),9]) ) { | |
numeros <- which(expd.dat[which(expd.dat$eletype == "node"),9] == n) | |
cat(which(expd.dat[which(expd.dat$eletype == "node"),9] == n),'\n') | |
} | |
countContiguous <- function(numeros) { | |
contig <- list() | |
k = 1; | |
contig[[k]] <- vector() | |
for (i in length(numeros)) { | |
if ( numeros[i+1 || i] == numeros[i] + 1 ) { | |
contig[[k]] <- append(i, contig[[k]]) | |
} | |
else { | |
k = k + 1; | |
contig[[k]] <- append(i, contig[[k]]) | |
} | |
} | |
return( contig ) | |
} | |
# Count length of line | |
subNet <- expd.dat[1:7,c(7,8,13,33)] | |
for (i in which(is.na(subNet$xposition))) { | |
# subNet[subNet[i,]$elesource,]$xposition, subNet[subNet[i,]$elesource,]$yposition | |
# subNet[subNet[i,]$eletarget,]$xposition, subNet[subNet[i,]$eletarget,]$yposition | |
dd <- sqrt((subNet[subNet[i,]$elesource,]$xposition - subNet[subNet[i,]$eletarget,]$xposition)^2 + | |
(subNet[subNet[i,]$elesource,]$yposition - subNet[subNet[i,]$eletarget,]$yposition)^2) | |
cat(subNet[i,]$elesource, subNet[i,]$eletarget, dd,'\n') | |
} | |
# Attach an "edgeLength" column to expd.dat | |
edgeLength <- matrix(0, dim(expd.dat)[1]) | |
subNets <- unique(paste(expd.dat$user,'---',expd.dat$network,sep='')) | |
for (s in subNets) { | |
li <- strsplit(s, '---') | |
u <- li[[1]][1] | |
n <- li[[1]][2] | |
inds <- which(expd.dat$network == n & expd.dat$user == u) | |
subNet <- expd.dat[inds,c(7,8,13,33)] | |
tsn <- c() | |
for (i in which(is.na(subNet$xposition))) { | |
dd <- sqrt((subNet[subNet[i,]$elesource,]$xposition - subNet[subNet[i,]$eletarget,]$xposition)^2 + | |
(subNet[subNet[i,]$elesource,]$yposition - subNet[subNet[i,]$eletarget,]$yposition)^2) | |
cat(subNet[i,]$elesource, subNet[i,]$eletarget, dd,'\n') | |
tsn <- append(tsn, dd) | |
} | |
edgeLength[inds[is.na(expd.dat[inds,]$xposition)]] <- tsn | |
} | |
# Calculate Steven's Power Law | |
selnodesonly <- expd.dat[which((expd.dat$selected == 1) & (expd.dat$xposition != 0)),] #selected nodes only | |
selnodesonly <- selnodesonly[which(selnodesonly$nodeEncoding1 == "node border (bin)" | selnodesonly$nodeEncoding1 == "node border (quant)" | selnodesonly$nodeEncoding2 == "node border (bin)" | selnodesonly$nodeEncoding2 == "node border (quant)"),] | |
log(selnodesonly[,35]) | |
# Cannot be calculated because this experiment does not capture data | |
# about the perceived intensity of a data point, rather only the actual intensity | |
# used to visualize the data | |
# Node combinations | |
unique(cbind(expd.nodes$nodeEncoding1,expd.nodes$nodeEncoding2)) | |
# Edge combinations | |
unique(cbind(expd.edges$edgeEncoding1,expd.edges$edgeEncoding2)) | |
# Regression Models | |
glmn <- glm(selected ~ ., data = expd.nodes.1, family = "binomial") | |
glmn <- glm(selected ~ nodeheight + nodeborderwidth + nodeSaturation + nodeValue, data = expd.nodes.1, family = "binomial") | |
glmn <- glm(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth + nodeshape + network + user, data = expd.dat, family = "binomial") | |
glmn <- glm(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth + nodeshape, data = expd.nodes.1, family = "binomial") | |
glmn <- glm(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth, data = expd.nodes.1, family = "binomial") | |
summary(glmn) | |
glme <- glm(selected ~ ., data = expd.edges.1, family = "binomial") | |
summary(glme) | |
library(pscl) | |
# Multinomial Logistic Regression Model | |
library(nnet) | |
mlrm <- multinom(selected ~ ., data = expd.nodes.1) | |
summary(mlrm) | |
# Mixed Effects Model | |
library(lme4) | |
lmn <- lm(selected ~ ., data = expd.nodes.1) | |
summary(lmn) | |
lme <- lm(selected ~ ., data = expd.edges.1) | |
summary(lme) | |
lmner <- lmer(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth + (1 | user) + (1 | network), data = expd.dat) | |
lmner2 <- lmer(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth + nodeHue + nodeshape + (1 | user) + (1 | network), data = expd.dat) | |
lmner3 <- lmer(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth + nodeHue + nodeshape + network + (1 | user), data = expd.dat) | |
lmner4 <- lmer(selected ~ nodeheight + nodeSaturation + nodeValue + nodeborderwidth + nodeshape + network + (1 | user), data = expd.dat) | |
summary(lmner) | |
summary(lmner2) | |
summary(lmner3) | |
summary(lmner4) | |
glme <- glm(selected ~ ., data = expd.edges.1) | |
summary(glme) | |
library(popbio) | |
logi.hist.plot(expd.nodes.1$nodeheight, expd.nodes.1$selected, boxp = F, type = "hist", col="gray", xlabel = "node size") | |
logi.hist.plot(expd.nodes.1$nodeborderwidth, expd.nodes.1$selected, boxp = F, type = "hist", col="gray", xlabel = "node border width") | |
logi.hist.plot(expd.nodes.1$nodeValue, expd.nodes.1$selected, boxp = F, type = "hist", col="gray", xlabel = "node value") | |
logi.hist.plot(expd.nodes.1$nodeSaturation, expd.nodes.1$selected, boxp = F, type = "hist", col="gray", xlabel = "node saturation") | |
logi.hist.plot(expd.nodes.1$numConnected, expd.nodes.1$selected, boxp = F, type = "hist", col="gray", xlabel = "node degree") | |
logi.hist.plot(expd.edges.1$linewidth, expd.edges.1$selected, boxp = F, type = "hist", col="gray", xlabel = "edge width") | |
logi.hist.plot(expd.edges.1$edgeSaturation, expd.edges.1$selected, boxp = F, type = "hist", col="gray", xlabel = "edge saturation") | |
logi.hist.plot(expd.edges.1$edgeValue, expd.edges.1$selected, boxp = F, type = "hist", col="gray", xlabel = "edge value") | |
logi.hist.plot(expd.edges.1$edgeHue, expd.edges.1$selected, boxp = F, type = "hist", col="gray", xlabel = "edge hue") | |
plot(expd.edges.1$edgeValue, expd.edges.1$linewidth, col=ifelse(expd.edges.1$selected, "red", "gray")) | |
pairs(expd.edges.1[,-2], col=ifelse(expd.edges.1$selected, "red", "gray")) | |
plot( performance( prediction(labels = expd.nodes.1$selected, predictions = ifelse(predict(glmn, type = "response") >= 0.1, 1, 0)) ,"tpr","fpr"), main="ROC Curve for Logistic Regression (Nodes)") | |
useCoefs <- function(invec) { return( sum(glmn$coefficients*invec) ) } | |
useCoefsCalcProb <- function(odr) { return( exp(odr) / (exp(odr) + 1)) } | |
useCoefsCalcProb(useCoefs(c(1,27,0,1,0))) | |
pvec <- c() | |
for (i in 1:100) { | |
pvec <- append(pvec, useCoefsCalcProb(useCoefs(c(1,i,0,0,0)))) | |
} | |
plot(1:100, pvec, type="l", ylim=c(0,1), xlim=c(0,100), main="nodeheight vs prob") | |
pvec2 <- c() | |
for (i in 1:10) { | |
pvec2 <- append(pvec2, useCoefsCalcProb(useCoefs(c(1,0,i,0,0)))) | |
} | |
plot(1:10, pvec2, type="l", ylim=c(0,1), xlim=c(0,10), main="nodeborder vs prob") | |
pvec3 <- c() | |
for (i in (1:100/100)) { | |
pvec3 <- append(pvec3, useCoefsCalcProb(useCoefs(c(1,0,0,i,0)))) | |
} | |
plot(1:100, pvec3, type="l", ylim=c(0,1), xlim=c(0,100), main="nodesaturation vs prob") | |
pvec4 <- c() | |
for (i in (1:100/100)) { | |
pvec4 <- append(pvec4, useCoefsCalcProb(useCoefs(c(1,0,0,0,i)))) | |
} | |
plot(1:100, pvec4, type="l", ylim=c(0,1), xlim=c(0,100), main="nodevalue vs prob") | |
# Function for Cohen's D to determine effect size | |
cohens_d <- function(x, y) { | |
lx <- length(x)- 1 | |
ly <- length(y)- 1 | |
md <- abs(mean(x) - mean(y)) ## mean difference (numerator) | |
csd <- lx * var(x) + ly * var(y) | |
csd <- csd/(lx + ly) | |
csd <- sqrt(csd) ## common sd computation | |
cd <- md/csd ## cohen's d | |
} | |
# > res <- cohens_d(expd.dat$nodeheight[expd.dat$selected == 1], expd.dat$nodeheight[expd.dat$selected == 0]) | |
# > res | |
# [1] 0.02592582 | |
# > res <- cohens_d(expd.dat$nodeborderwidth[expd.dat$selected == 1], expd.dat$nodeborderwidth[expd.dat$selected == 0]) | |
# > res | |
# [1] 1.01351 | |
library(RColorBrewer) | |
g1 <- erdos.renyi.game(11, 1) | |
V(g1)$size <- c(sample(15:20, 10, replace=T), sample(21:30, 1, replace=T)) | |
# V(g1)$color <- brewer.pal(n = 11, name = "RdBu")[order(V(g1)$size)] | |
V(g1)$color <- c(rep(hsv(h = 0.2, s = 0.5, v = 0.8), 10), rep(hsv(h = 0.2, s = 1, v = 0.3), 1)) | |
plot(g1) | |
get.edgelist(g1) | |
# NLME | |
library(nlme) | |
plot(expd.nodes.1) | |
model1 = lm(selected ~ ., data=expd.nodes.1) | |
summary(model1) | |
model2 = lme(selected ~ ., data=expd.nodes.1, random= ~1|network) | |
summary(model2) | |
######################################### | |
col2rgb(htmlcolors) | |
black #000000 0,0,0 | |
silver #C0C0C0 192,192,192 | |
gray #808080 128,128,128 | |
white #FFFFFF 255,255,255 | |
maroon #800000 128,0,0 | |
red #FF0000 255,0,0 | |
purple #800080 128,0,128 | |
fuchsia #FF00FF 255,0,255 | |
green #008000 0,128,0 | |
lime #00FF00 0,255,0 | |
olive #808000 128,128,0 | |
yellow #FFFF00 255,255,0 | |
navy #000080 0,0,128 | |
blue #0000FF 0,0,255 | |
teal #008080 0,128,128 | |
aqua #00FFFF 0,255,255 | |
# https://www.w3.org/TR/css3-color/ | |
eucColor <- function(inhex) { | |
htmlcolornames <- c("black", "silver", "gray", "white", "maroon", "red", "purple", | |
"fuchsia", "green", "lime", "olive", "yellow", "navy", "blue", "teal", | |
"aqua") | |
htmlcolors <- c("#000000", | |
"#C0C0C0", | |
"#808080", | |
"#FFFFFF", | |
"#800000", | |
"#FF0000", | |
"#800080", | |
"#FF00FF", | |
"#008000", | |
"#00FF00", | |
"#808000", | |
"#FFFF00", | |
"#000080", | |
"#0000FF", | |
"#008080", | |
"#00FFFF") | |
qrgb <- col2rgb(inhex) | |
htem <- matrix(c((col2rgb(htmlcolors)[1,] - qrgb[1,])^2, | |
(col2rgb(htmlcolors)[2,] - qrgb[2,])^2, | |
(col2rgb(htmlcolors)[3,] - qrgb[3,])^2), 16, 3) | |
vals <- sqrt(apply(htem, FUN=sum, MAR=1)) | |
return( htmlcolornames[which(vals == min(vals))] ) | |
} | |
"#cadef1" "#dde8f8" "#eff3ff" "#c6dbef" | |
eucColor("#cadef1") | |
eucColor("#dde8f8") | |
eucColor("#eff3ff") | |
eucColor("#c6dbef") | |
eucColor("#999999") | |
sapply(expd.dat$nodebackground, FUN=eucColor) | |
sapply(expd.dat$linecolor, FUN=eucColor) | |
# RF UTILITIES | |
library(rfUtilities) | |
multi.collinear(expd.nodes.1[,2:7]) | |
# This shows that I can remove nodeheight from the model since it mirrors nodewidth | |
multi.collinear(expd.edges.1[,-3]) | |
# No multicollinearity | |
multi.collinear(expd.both[,c(-3, -7, -10, -15)]) | |
# In addition to nodeheight, windowheight, windowwidth, and nodeBrightness | |
# may be removed due to collinearity | |
# RF UTILITIES CLASS BALANCE | |
# https://cran.r-project.org/web/packages/rfUtilities/rfUtilities.pdf | |
rf.nodes.1.balanced <- rf.classBalance(ydata = expd.nodes.1[,4], xdata = expd.nodes.1[,c(2,3,5,6)]) | |
# Future Functions Below | |
clickmap <- function() { | |
#plot(as.numeric(as.character(expd$clickX)) / as.numeric(as.character(expd$windowWidth)), as.numeric(as.character(expd$clickY)) / as.numeric(as.character(expd$windowHeight))) | |
plot(as.numeric(as.character(expd$xpos[which(expd$selected == 0)])) / as.numeric(as.character(expd$windowWidth[which(expd$selected == 0)])), as.numeric(as.character(expd$ypos[which(expd$selected == 0)])) / as.numeric(as.character(expd$windowHeight[which(expd$selected == 0)])), col="gray", | |
xlab="Normalized X Coordinate Position", | |
ylab="Normalized Y Coordinate Position", | |
main="Click Map of Selected Nodes Versus Unselected") | |
points(as.numeric(as.character(expd$xpos[which(expd$selected == 1)])) / as.numeric(as.character(expd$windowWidth[which(expd$selected == 1)])), as.numeric(as.character(expd$ypos[which(expd$selected == 1)])) / as.numeric(as.character(expd$windowHeight[which(expd$selected == 1)])), col="red", pch=2) | |
} | |
# Basic Visualizations | |
clickmap() | |
barplot(table(expd[which(expd$selected == 1),3]), horiz = T, las=1) | |
barplot(table(paste(expd[which(expd$selected == 1),4],expd[which(expd$selected == 1),5])), horiz=T, las=1) | |
tpch <- rep(1,dim(expd)[1]) | |
tpch[which(expd.mod$selected == 1)] <- 1 | |
ts <- rep(1,dim(expd)[1]) | |
ts[which(expd.mod$selected == 1)] <- 1 | |
pairs(expd.mod[,2:4], col=as.character(expd$nodecolor), pch=tpch, cex=ts) | |
pairs(expd.mod[,2:4], col=ifelse(expd.mod$selected == 1, as.character(expd$nodecolor), "gray"), pch=tpch, cex=ts) | |
#euclidian distance from center | |
for (f in levels(expd[,1])) { | |
cat(f,'\n') | |
#expd[which(expd[,1] == f),12] <- calcDistanceFromCenterOfNetwork(f) | |
expd[which(expd[,1] == f),13] <- calcDistanceFromCenterOfNetworkDoubleEnc(f) | |
} | |
calcDistanceFromCenterOfNetworkDoubleEnc <- function(file) { | |
return( c(expd[which(expd[,1] == file),10] - mean(expd[which(expd[,1] == file),11]) + expd[which(expd[,1] == file),10] - mean(expd[which(expd[,1] == file),11]))^2 ) | |
} | |
# Add centrality data to data frame | |
expdwcent <- expd | |
expdwcent <- data.frame(cbind(expdwcent, | |
rep(centralization.betweenness(genemania.network.graph)$res, dim(expd)[1]), | |
rep(centralization.closeness(genemania.network.graph)$res, dim(expd)[1]), | |
rep(centralization.degree(genemania.network.graph)$res, dim(expd)[1]), | |
rep(centralization.evcent(genemania.network.graph)$vector, dim(expd)[1]) | |
)) | |
colnames(expdwcent) <- c("file","id", "name", "encoding1", "encoding2", "nodecolor", "nodeshape", "nodeborder", "nodesize", "xpos", "ypos", "selected", "clickX", "clickY", "windowHeight", "windowWidth", "betweenness", "closeness", "degree", "eigenvector") | |
colnames(expd) <- c("file","id", "name", "encoding1", "encoding2", "nodecolor", "nodeshape", "nodeborder", "nodesize", "xpos", "ypos", "selected","distCent") | |
expd[,13] <- as.numeric(as.character(expd[,13])) | |
expd <- as.data.frame(cbind(expd[,1:11],expd[,13],expd[,12])) | |
colnames(expd) <- c(colnames(expd)[1:11],"distCent","selected") | |
library(lme4) | |
expd.model1 <- lmer(as.numeric(selected) ~ as.numeric(nodeborder) + (1|name) + (1|file), data=expd) | |
expd.model2 <- lmer(as.numeric(selected) ~ as.numeric(nodesize) + (1|name) + (1|file), data=expd) | |
anova(expd.model1,expd.model2) | |
summary(expd.model1) | |
coef(expd.model1) | |
summary(lm(as.numeric(selected) ~ as.numeric(nodeborder) + log10(distCent) + as.numeric(nodesize) + name, data=expd)) | |
mod.coef <- coef(lmer(as.numeric(selected) ~ as.numeric(nodeborder) + as.numeric(nodesize) + nodecolor + as.numeric(xpos) + as.numeric(ypos) + (1|name) + (1|file) + (1|encoding1) + (1|encoding2), data=expd)) | |
mod.coef$name | |
heatmap(as.matrix(mod.coef$name), margins = c(10,10)) | |
# randomly sampling an equal number rows of zero and one selection values | |
rsexpd <- expd[c(c(sample(which(expd[,11] == 0), length(which(expd[,11] == 1)))),c(which(expd[,11] == 1))),] | |
coef(lmer(as.numeric(selected) ~ as.numeric(nodesize) + (1|name) + (1|file), data=rsexpd)) | |
coef(lmer(as.numeric(selected) ~ as.numeric(nodesize) + (1|name) + (1|encoding), data=rsexpd)) | |
summary(lmer(as.numeric(selected) ~ as.numeric(nodesize) + as.numeric(nodeborder) + log10(distCent) + (1|name) + (1|encoding), data=rsexpd)) | |
coef(lmer(as.numeric(selected) ~ as.numeric(nodesize) + as.numeric(nodeborder) + log10(distCent) + (1|name) + (1|encoding), data=rsexpd)) | |
# Let's use an RF | |
library(randomForest) | |
expd.mod <- expd[,c(1,3,6:12)] #until 13 if I want to include distCent | |
#rf1 <- randomForest(as.numeric(selected) ~ ., data=expd, importance=TRUE, proximity=TRUE) | |
rf1 <- randomForest(as.numeric(selected) ~ ., data=expd.mod, importance=TRUE, proximity=TRUE) | |
print(rf1) | |
rf1$importance | |
varImpPlot(rf1,type=2) | |
voodoo <- c() | |
for (i in 1:dim(expd)[1]) { | |
if (expd[i,4] == "#999") { | |
voodoo <- append(voodoo, t(col2rgb("#999999"))) | |
} | |
else { | |
voodoo <- append(voodoo, t(col2rgb(expd[i,4]))) | |
} | |
} | |
mycolors <- t(matrix(voodoo, 3, length(voodoo)/3)) | |
#r, g, b cols | |
expd.mod <- data.frame(cbind(expd[,3],mycolors[,1:3],expd[,7:18])) #until 13 if I want distCent | |
colnames(expd.mod) <- c("name", "R", "G", "B", colnames(expd)[7:18]) | |
rf2 <- randomForest(as.numeric(selected) ~ ., data=expd.mod, importance=TRUE, proximity=TRUE, do.trace = TRUE) | |
print(rf2) | |
rf2$importance | |
varImpPlot(rf2,type=2) | |
# unsupervised | |
expd.urf <- randomForest(expd.mod[, -11]) | |
MDSplot(expd.urf, expd$selected) | |
#regression | |
predict(rf2, expd.mod[sample(which(expd.mod[,11] == 1), 1),-11]) | |
predict(rf2, expd.mod[sample(which(expd.mod[,11] == 0), 1),-11]) | |
plot(rf2$predicted) | |
# optimizing mtry | |
tuneRF(x = expd.mod[,-11], y = expd.mod[,11], plot = T, doBest = T) | |
#trying to balance classes for RF | |
expd.mod.bal <- expd.mod[c(c(sample(which(expd.mod[,11] == 0), length(which(expd.mod[,11] == 1)))),c(which(expd.mod[,11] == 1))),] | |
tuneRF(x = expd.mod.bal[,-11], y = expd.mod.bal[,11], plot = T, doBest = T) | |
rf3 <- randomForest(as.numeric(selected) ~ ., data=expd.mod.bal, importance=TRUE, proximity=TRUE, do.trace = F, mtry=2) | |
print(rf3) | |
rf3$importance | |
varImpPlot(rf3,type=2) | |
rf4 <- randomForest(selected ~ ., data=expd.mod, importance=TRUE, proximity=TRUE, do.trace = F, mtry=2, strata = selected, sampsize = sum(expd.mod[,11] == 1)) | |
print(rf4) | |
rf4$importance | |
varImpPlot(rf3,type=2) | |
#compare balanced vs unbalanced | |
library(ROCR) | |
rf3.perf = performance( prediction(labels = expd.mod.bal$selected, predictions = rf3$predicted) ,"tpr","fpr") | |
rf4.perf = performance( prediction(labels = expd.mod$selected, predictions = rf4$predicted) ,"tpr","fpr") | |
#plot the curve | |
plot(rf4.perf,main="ROC Curve for Random Forest",col=2,lwd=2) | |
lines(unlist(rf3.perf@x.values),unlist(rf3.perf@y.values), col=4, lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
#compute area under curve | |
auc.rf3 <- performance( prediction(labels = expd.mod.bal$selected, predictions = rf3$predicted) ,"auc") | |
auc.rf4 <- performance( prediction(labels = expd.mod$selected, predictions = rf4$predicted) ,"auc") | |
auc.rf3 <- unlist(slot(auc.rf3, "y.values")) | |
auc.rf4 <- unlist(slot(auc.rf4, "y.values")) | |
minauc<-min(round(auc.rf3, digits = 2)) | |
maxauc<-max(round(auc.rf3, digits = 2)) | |
minauct <- paste(c("min(AUC) = "),minauc,sep="") | |
maxauct <- paste(c("max(AUC) = "),maxauc,sep="") | |
minauct | |
maxauct | |
minauc<-min(round(auc.rf4, digits = 2)) | |
maxauc<-max(round(auc.rf4, digits = 2)) | |
minauct <- paste(c("min(AUC) = "),minauc,sep="") | |
maxauct <- paste(c("max(AUC) = "),maxauc,sep="") | |
minauct | |
maxauct | |
# Threshold that Neil provided | |
gthresh <- function(numNodes) { return( ceiling(dim(combn(1:numNodes, 2))[2]*(log(numNodes)/numNodes)) ) } | |
plot(10:300, unlist(lapply(10:300, FUN=gthresh)), type="l") | |
# More ideas for color analysis | |
t(rgb2hsv((col2rgb(expd.dat$nodebackground)))) | |
library(scatterplot3d) | |
tcolor <- rgb2hsv((col2rgb(expd.dat$nodebackground))) | |
scatterplot3d(tcolor[1,], tcolor[2,], tcolor[3,], color = expd.dat$nodebackground) | |
panel.hist <- function(x, ...) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(usr[1:2], 0, 1.5) ) | |
h <- hist(x, plot = FALSE) | |
breaks <- h$breaks; nB <- length(breaks) | |
y <- h$counts; y <- y/max(y) | |
rect(breaks[-nB], 0, breaks[-1], y, ...) | |
} | |
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...) | |
{ | |
usr <- par("usr"); on.exit(par(usr)) | |
par(usr = c(0, 1, 0, 1)) | |
r <- abs(cor(x, y)) | |
txt <- format(c(r, 0.123456789), digits = digits)[1] | |
txt <- paste0(prefix, txt) | |
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt) | |
text(0.5, 0.5, txt, cex = cex.cor * r) | |
} | |
pairs(t(rgb2hsv((col2rgb(expd.dat$nodebackground)))), col = expd.dat$nodebackground, upper.panel=panel.cor,diag.panel=panel.hist) | |
pairs(t(rgb2hsv((col2rgb(expd.dat$nodebackground)))), col = expd.dat$nodebackground, upper.panel = NULL,diag.panel=panel.hist) | |
hist(tcolor[1,], main = "Distribution of Hues") | |
hist(tcolor[2,], main = "Distribution of Saturation") | |
hist(tcolor[3,], main = "Distribution of Values") | |
# library(jsonlite) | |
# tn <- as.data.frame(fromJSON(gsub("\'", "\"", "[{ 'data': { 'id': '1', 'name' : 'ENSG00000068793', 'dimension' : 'area', 'value' : '4.40646151205377' } },{ 'data': { 'id': '2', 'name' : 'ENSG00000162627', 'dimension' : 'area', 'value' : '5.38202560777306' } },{ 'data': { 'id': '3', 'name' : 'ENSG00000170266', 'dimension' : 'area', 'value' : '1.26156626101008' } },{ 'data': { 'id': '4', 'name' : 'ENSG00000175315', 'dimension' : 'area', 'value' : '4.40646151205377' } },{ 'data': { 'id': '5', 'source': '1', 'target': '2', 'dimension': 'weight', 'value':'0.000085'} },{ 'data': { 'id': '6', 'source': '1', 'target': '3', 'dimension': 'weight', 'value':'0.000037'} },{ 'data': { 'id': '7', 'source': '2', 'target': '3', 'dimension': 'weight', 'value':'0.000086'} },{ 'data': { 'id': '8', 'source': '3', 'target': '4', 'dimension': 'weight', 'value':'0.000099'} }]"))) | |
# nodeRows <- which(!is.na(tn[1:dim(tn)[1],]$name)) | |
# edgeRows <- which(is.na(tn[1:dim(tn)[1],]$name)) | |
# | |
# edgeData <- cbind(tn[edgeRows,]$source,tn[edgeRows,]$target, tn[edgeRows,]$value) | |
# colnames(edgeData) <- c("from", "to", "weight") | |
# | |
# nodeData <- cbind(tn[nodeRows,]$id, tn[nodeRows,]$value, tn[nodeRows,]$name) | |
# colnames(nodeData) <- c("id", "area", "name") | |
# | |
# igobj <- graph.data.frame(edgeData, directed = F, vertices = nodeData) | |
# for (v in 1:length(V(igobj))) { | |
# print(length(neighbors(igobj, v, mode=("in")))) | |
# } | |
# | |
### At Neil's request, creating single variable models and obtaining AUC to compare to RF AUC | |
library(pscl) | |
glm.nodes.numConnected <- glm(selected ~ numConnected, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.network <- glm(selected ~ network, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.nodeheight <- glm(selected ~ nodeheight, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.nodeshape <- glm(selected ~ nodeshape, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.nodeborderwidth <- glm(selected ~ nodeborderwidth, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.nodeHue <- glm(selected ~ nodeHue, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.nodeSaturation <- glm(selected ~ nodeSaturation, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.nodeValue <- glm(selected ~ nodeValue, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.xposition <- glm(selected ~ xposition, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.yposition <- glm(selected ~ yposition, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.radiusDistance <- glm(selected ~ radius_distance, data=expd.nodes.2, family=binomial(link="logit")) | |
glm.nodes.all <- glm(selected ~ ., data=expd.nodes.2[,-6], family=binomial(link="logit")) | |
summary(glm.nodes.numConnected) | |
summary(glm.nodes.network) # nonsensical | |
summary(glm.nodes.nodeshape) | |
summary(glm.nodes.nodeborderwidth) | |
summary(glm.nodes.nodeHue) | |
summary(glm.nodes.nodeValue) | |
summary(glm.nodes.nodeSaturation) | |
summary(glm.nodes.xposition) | |
summary(glm.nodes.yposition) | |
summary(glm.nodes.radiusDistance) | |
summary(glm.nodes.all) | |
# Probability that nodeSaturation is noticeable | |
# 1 / (1 + 2.718^(0.24852 - 0.74362*1) ) = 0.44 | |
# 1 / (1 + 2.718^(0.24852 - 0.74362*0) ) = 0.62 | |
singleVariableGlmToProbability <- function(glmmodel,inval) { | |
intercept <- glmmodel$coefficients[1] | |
coefficient <- glmmodel$coefficients[2] | |
return( 1 / (1 + exp(-intercept - coefficient*inval)) ) | |
} | |
# Let's train a model with 66% of data, and test with 33% | |
noderows.sample.1 <- sample(1:dim(expd.nodes.1)[1], round(dim(expd.nodes.1)[1]*.66), replace=F) | |
noderows.sample.1remaining <- which(1:dim(expd.nodes.1)[1] %in% noderows.sample.1) | |
noderows.sample.2 <- sample(1:dim(expd.nodes.1)[1], round(dim(expd.nodes.1)[1]*.66), replace=F) | |
noderows.sample.2remaining <- which(1:dim(expd.nodes.1)[1] %in% noderows.sample.1) | |
glm.nodes.numConnected.noderows.sample.1 <- glm(selected ~ numConnected, data=expd.nodes.1[noderows.sample.1,], family=binomial(link="logit")) | |
glm.nodes.numConnected.noderows.sample.1.predictions <- predict.glm(glm.nodes.numConnected.noderows.sample.1, expd.nodes.1[noderows.sample.1remaining,]) | |
perf.glm.nodes.numConnected <- performance( prediction(labels = expd.nodes.1$selected[noderows.sample.1remaining], predictions = glm.nodes.numConnected.noderows.sample.1.predictions) ,"tpr","fpr") | |
auc.glm.nodes.numConnected <- performance( prediction(labels = expd.nodes.1$selected[noderows.sample.1remaining], predictions = glm.nodes.numConnected.noderows.sample.1.predictions) ,"auc") | |
auc.glm.nodes.numConnected <- unlist(slot(auc.glm.nodes.numConnected, "y.values")) | |
plot(perf.glm.nodes.numConnected,main="ROC Curve for Random Forest (Nodes)",col=2,lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
trainAndEvaluateModel <- function(dataset, trainingRows, testRows, singleVariable) { | |
glmmodel <- glm(selected ~ eval(parse(text=singleVariable)), data=dataset[trainingRows,], family=binomial(link="logit")) | |
print(glmmodel) | |
glmmodel.predictions <- predict.glm(glmmodel, dataset[testRows,]) | |
perf.glmmodel <- performance( prediction(labels = dataset$selected[testRows], predictions = glmmodel.predictions) ,"tpr","fpr") | |
auc.glmmodel <- performance( prediction(labels = dataset$selected[testRows], predictions = glmmodel.predictions) ,"auc") | |
auc.glmmodel <- unlist(slot(auc.glmmodel, "y.values")) | |
plot(perf.glmmodel,main=paste("ROC Curve for", singleVariable, deparse(substitute(dataset)), sep=" "),col=2,lwd=2) | |
#lines(unlist(perf.glm.nodes.numConnected@x.values),unlist(perf.glm.nodes.numConnected@y.values), col=4, lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
print(auc.glmmodel) | |
} | |
trainAndEvaluateModelAllVariables <- function(dataset, trainingRows, testRows) { | |
glmmodel <- glm(selected ~ ., data=expd.nodes.1[trainingRows,-6], family=binomial(link="logit")) | |
print(glmmodel) | |
glmmodel.predictions <- predict.glm(glmmodel, expd.nodes.1[testRows,-6]) | |
perf.glmmodel <- performance( prediction(labels = expd.nodes.1$selected[testRows], predictions = glmmodel.predictions) ,"tpr","fpr") | |
auc.glmmodel <- performance( prediction(labels = expd.nodes.1$selected[testRows], predictions = glmmodel.predictions) ,"auc") | |
auc.glmmodel <- unlist(slot(auc.glmmodel, "y.values")) | |
plot(perf.glmmodel,main=paste("ROC Curve for", ".", deparse(substitute(dataset)), sep=" "),col=2,lwd=2) | |
#lines(unlist(perf.glm.nodes.numConnected@x.values),unlist(perf.glm.nodes.numConnected@y.values), col=4, lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
print(auc.glmmodel) | |
} | |
#trainAndEvaluateModelAllVariables(expd.nodes.1, noderows.sample.1, noderows.sample.1remaining) | |
sampleDataAndEvaluate <- function(dataset, singleVariable, proportionToTrain) { | |
noderows.sampled <- sample(1:dim(dataset)[1], round(dim(dataset)[1]*proportionToTrain), replace=F) | |
noderows.remaining <- which(1:dim(dataset)[1] %in% noderows.sampled) | |
trainAndEvaluateModel(dataset, noderows.sampled, noderows.remaining, singleVariable) | |
} | |
sampleDataAndEvaluateAllVariables <- function(dataset, proportionToTrain) { | |
noderows.sampled <- sample(1:dim(dataset)[1], round(dim(dataset)[1]*proportionToTrain), replace=F) | |
noderows.remaining <- which(1:dim(dataset)[1] %in% noderows.sampled) | |
trainAndEvaluateModelAllVariables(dataset, noderows.sampled, noderows.remaining) | |
} | |
# sampleDataAndEvaluate(expd.nodes.1, "numConnected", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "nodeborderwidth", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "nodeheight", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "nodeValue", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "nodeSaturation", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "nodeHue", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "nodeshape", 0.66) | |
# sampleDataAndEvaluate(expd.nodes.1, "network", 0.66) | |
# sampleDataAndEvaluateAllVariables(expd.nodes.1, 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "numConnected", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "nodeborderwidth", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "nodeheight", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "nodeValue", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "nodeSaturation", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "nodeHue", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "nodeshape", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "network", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "xposition", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "yposition", 0.66) | |
sampleDataAndEvaluate(expd.nodes.2, "radius_distance", 0.66) | |
sampleDataAndEvaluateAllVariables(expd.nodes.2, 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "linewidth", 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "edgeValue", 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "edgeSaturation", 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "edgeHue", 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "linestyle", 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "network", 0.66) | |
sampleDataAndEvaluate(expd.edges.1, "edgeLength", 0.66) | |
sampleDataAndEvaluateAllVariables(expd.edges.1, 0.66) | |
# And single variable RFs | |
rf1.nodes.numConnected <- randomForest(selected ~ numConnected, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.nodeborderwidth <- randomForest(selected ~ nodeborderwidth, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.nodeheight <- randomForest(selected ~ nodeheight, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.nodeValue <- randomForest(selected ~ nodeValue, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.nodeSaturation <- randomForest(selected ~ nodeSaturation, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.nodeHue <- randomForest(selected ~ nodeHue, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.nodeshape <- randomForest(selected ~ nodeshape, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.network <- randomForest(selected ~ network, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.xposition <- randomForest(selected ~ xposition, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.yposition <- randomForest(selected ~ yposition, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.nodes.raduisDistance <- randomForest(selected ~ radius_distance, data=expd.nodes.2[,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.linewidth <- randomForest(selected ~ linewidth, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.linestyle <- randomForest(selected ~ linestyle, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.edgeValue <- randomForest(selected ~ edgeValue, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.edgeSaturation <- randomForest(selected ~ edgeSaturation, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.edgeHue <- randomForest(selected ~ edgeHue, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.network <- randomForest(selected ~ network, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
rf1.edges.edgeLength <- randomForest(selected ~ edgeLength, data=expd.edges.1, importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
makeROCforSingleVariableRF <- function(inputModel, singleVariable, dataset) { | |
perf.model <- performance( prediction(labels = dataset$selected, predictions = inputModel$predicted) ,"tpr","fpr") | |
auc.nodes <- performance( prediction(labels = dataset$selected, predictions = inputModel$predicted) ,"auc") | |
auc.nodes <- unlist(slot(auc.nodes, "y.values")) | |
plot(perf.model,main=paste("ROC Curve for", singleVariable, "(Nodes)", sep=" "),col=2,lwd=2) | |
abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
print(min(auc.nodes)) | |
} | |
makeROCforSingleVariableRF(rf1.nodes.numConnected, "numConnected", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.nodeborderwidth, "nodeborderwidth", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.nodeheight, "nodeheight", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.nodeValue, "nodeValue", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.nodeSaturation, "nodeSaturation", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.nodeHue, "nodeHue", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.nodeshape, "nodeshape", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.network, "network", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.xposition, "xposition", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.yposition, "yposition", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.nodes.radiusDistance, "radius_distance", expd.nodes.2) | |
makeROCforSingleVariableRF(rf1.edges.network, "network", expd.edges.1) | |
makeROCforSingleVariableRF(rf1.edges.linewidth, "linewidth", expd.edges.1) | |
makeROCforSingleVariableRF(rf1.edges.edgeValue, "edgeValue", expd.edges.1) | |
makeROCforSingleVariableRF(rf1.edges.edgeSaturation, "edgeSaturation", expd.edges.1) | |
makeROCforSingleVariableRF(rf1.edges.edgeHue, "edgeHue", expd.edges.1) | |
makeROCforSingleVariableRF(rf1.edges.linestyle, "linestyle", expd.edges.1) | |
makeROCforSingleVariableRF(rf1.edges.edgeLength, "edgeLength", expd.edges.1) | |
# makeROCforSingleVariableRFByProportion <- function(singleVariable, dataset, proportion) { | |
# sampledRF <- sample(1:dim(dataset)[1], dim(dataset)[1]*proportion) | |
# remainingRF <- which(!(1:dim(dataset)[1] %in% sampledRF)) | |
# inputModel <- randomForest(selected ~ str(singleVariable), data=dataset[sampledRF,-6], importance=TRUE, proximity=TRUE, keep.inbag = TRUE) | |
# perf.model <- performance( prediction(labels = dataset$selected[sampledRF], predictions = inputModel$predicted[sampledRF]) ,"tpr","fpr") | |
# auc.nodes <- performance( prediction(labels = dataset$selected[sampledRF], predictions = inputModel$predicted[sampledRF]) ,"auc") | |
# auc.nodes <- unlist(slot(auc.nodes, "y.values")) | |
# | |
# plot(perf.model,main=paste("ROC Curve for", singleVariable, "(Nodes)", sep=" "),col=2,lwd=2) | |
# abline(a=0,b=1,lwd=2,lty=2,col="gray") | |
# print(min(auc.nodes)) | |
# } | |
# | |
# makeROCforSingleVariableRFByProportion("nodeheight", expd.nodes.2, 0.66) | |
# Using the newer models that include XY position don't perform better than RF | |
# > sampleDataAndEvaluate(expd.nodes.2, "xposition", 0.66) | |
# [1] 0.5014012 | |
# > sampleDataAndEvaluate(expd.nodes.2, "yposition", 0.66) | |
# [1] 0.5677483 | |
# > sampleDataAndEvaluateAllVariables(expd.nodes.2, 0.66) | |
# [1] 0.7436241 | |
# > sampleDataAndEvaluateAllVariables(expd.nodes.3, 0.66) | |
# [1] 0.7436092 | |
# consider making a sampleAndEvaluateRF function | |
# Neil wanted a prediction interval from the RF models, if possible | |
library(RFinfer) | |
head(rfPredVar(rf1.nodes.1, rf.data = expd.nodes.1, CI=TRUE, tree.type = "rf", prog.bar = TRUE), n=50) | |
library(randomForestCI) | |
head(randomForestInfJack(rf1.nodes.1, expd.nodes.1, calibrate = TRUE)) | |
# Code to analyze each of the 34 networks used in study | |
makeNetwork <- function(li) { | |
nnn <- c() | |
for (i in li) { | |
nnn <- rbind(nnn,i) | |
print(i) | |
} | |
return( graph_from_edgelist(nnn, directed=F) ) | |
} | |
net_1_g <- makeNetwork(list(c(1,2), c(1,3), c(2,4))) | |
plot(net_1_g) | |
net_2_g <- makeNetwork(list(c(1,2),c(1,3),c(2,4))) | |
plot(net_2_g) | |
net_3_g <- makeNetwork(list(c(1,2),c(1,3),c(1,4),c(3,4))) | |
plot(net_3_g) | |
analyzeNetwork(net_3_g) | |
analyzeNetworkGlobal(net_3_g) | |
net_4_g <- makeNetwork(list(c(1,3),c(2,3),c(2,4),c())) | |
plot(net_4_g) | |
net_5_g <- makeNetwork(list(c(1,2),c(2,4),c(3,4),c())) | |
plot(net_5_g) | |
net_6_g <- makeNetwork(list(c(1,3),c(1,4),c(2,4),c())) | |
plot(net_6_g) | |
net_7_g <- makeNetwork(list(c(1,2),c(1,3),c(3,4),c())) | |
plot(net_7_g) | |
net_8_g <- makeNetwork(list(c(1,3),c(1,4),c(2,3),c(3,4))) | |
plot(net_8_g) | |
analyzeNetwork(net_8_g) | |
analyzeNetworkGlobal(net_8_g) | |
net_9_g <- makeNetwork(list(c(1,3),c(1,4),c(2,4),c())) | |
plot(net_9_g) | |
net_10_g <- makeNetwork(list(c(1,4),c(2,3),c(2,4),c())) | |
plot(net_10_g) | |
net_11_g <- makeNetwork(list(c(1,2),c(2,4),c(3,4),c())) | |
plot(net_11_g) | |
net_12_g <- makeNetwork(list(c(1,3),c(2,3),c(2,4),c())) | |
plot(net_12_g) | |
net_13_g <- makeNetwork(list(c(1,2),c(1,4),c(2,3),c(2,4))) | |
plot(net_13_g) | |
analyzeNetwork(net_13_g) | |
analyzeNetworkGlobal(net_13_g) | |
net_14_g <- makeNetwork(list(c(1,2),c(1,4),c(2,4),c(3,4))) | |
plot(net_14_g) | |
analyzeNetwork(net_14_g) | |
analyzeNetworkGlobal(net_14_g) | |
net_15_g <- makeNetwork(list(c(1,3),c(1,4),c(2,3),c())) | |
plot(net_15_g) | |
net_16_g <- makeNetwork(list(c(1,3),c(1,4),c(2,3),c(2,4))) | |
plot(net_16_g) | |
analyzeNetwork(net_16_g) | |
analyzeNetworkGlobal(net_16_g) | |
net_17_g <- makeNetwork(list(c(1,3),c(2,3),c(2,4),c(3,4))) | |
plot(net_17_g) | |
analyzeNetwork(net_17_g) | |
analyzeNetworkGlobal(net_17_g) | |
net_18_g <- makeNetwork(list(c(1,2),c(1,3),c(1,4),c(2,3))) | |
plot(net_18_g) | |
analyzeNetwork(net_18_g) | |
analyzeNetworkGlobal(net_18_g) | |
net_19_g <- makeNetwork(list(c(1,3),c(1,4),c(2,3),c())) | |
plot(net_19_g) | |
net_20_g <- makeNetwork(list(c(1,4),c(2,3),c(3,4),c())) | |
plot(net_20_g) | |
net_21_g <- makeNetwork(list(c(1,2),c(1,4),c(2,3),c(3,4))) | |
plot(net_21_g) | |
net_22_g <- makeNetwork(list(c(1,2),c(2,4),c(3,4),c())) | |
plot(net_22_g) | |
net_23_g <- makeNetwork(list(c(1,2),c(1,3),c(2,4),c())) | |
plot(net_23_g) | |
net_24_g <- makeNetwork(list(c(1,2),c(1,3),c(2,4),c(3,4))) | |
plot(net_24_g) | |
analyzeNetwork(net_24_g) | |
analyzeNetworkGlobal(net_24_g) | |
net_25_g <- makeNetwork(list(c(1,2),c(1,3),c(2,4),c())) | |
plot(net_25_g) | |
net_26_g <- makeNetwork(list(c(1,3),c(1,4),c(2,3),c(2,4))) | |
plot(net_26_g) | |
analyzeNetwork(net_26_g) | |
analyzeNetworkGlobal(net_26_g) | |
net_27_g <- makeNetwork(list(c(1,2),c(2,4),c(3,4),c())) | |
plot(net_27_g) | |
net_28_g <- makeNetwork(list(c(1,2),c(1,3),c(3,4),c())) | |
plot(net_28_g) | |
net_29_g <- makeNetwork(list(c(1,2),c(2,3),c(2,4),c(3,4))) | |
plot(net_29_g) | |
analyzeNetwork(net_29_g) | |
analyzeNetworkGlobal(net_29_g) | |
net_30_g <- makeNetwork(list(c(1,4),c(2,3),c(2,4),c())) | |
plot(net_30_g) | |
net_31_g <- makeNetwork(list(c(1,2),c(1,3),c(1,4),c(2,3))) | |
plot(net_31_g) | |
analyzeNetwork(net_31_g) | |
analyzeNetworkGlobal(net_31_g) | |
net_32_g <- makeNetwork(list(c(1,3),c(1,4),c(2,4),c())) | |
plot(net_32_g) | |
analyzeNetwork(net_32_g) | |
net_33_g <- makeNetwork(list(c(1,3),c(1,4),c(2,3),c())) | |
plot(net_33_g) | |
analyzeNetwork(net_33_g) | |
net_34_g <- makeNetwork(list(c(1,4),c(2,3),c(2,4),c())) | |
plot(net_34_g) | |
analyzeNetwork(net_34_g) | |
analyzeNetworkGlobal(net_34_g) # 0 | |
analyzeNetwork <- function(net_1_g) { | |
plot(net_1_g) | |
# E(net_1_g)$Weight <- c(0.000022, 0.000068, 0.000012) | |
# plot(tnstr_g, edge.width = E(tnstr_g)$Weight*100000) | |
print(centralization.degree(net_1_g)$res) | |
print(centralization.betweenness(net_1_g)$res) | |
print(centralization.evcent(net_1_g)$vector) | |
#print(transitivity(net_1_g, type = "global")) | |
print(transitivity(net_1_g, type = "local", isolates = 'zero')) | |
} | |
analyzeNetworkBetweenness <- function(net_1_g) { | |
return(centralization.betweenness(net_1_g)$res) | |
} | |
analyzeNetworkDegree <- function(net_1_g) { | |
return(centralization.degree(net_1_g)$res) | |
} | |
analyzeNetworkCloseness <- function(net_1_g) { | |
return(centralization.closeness(net_1_g)$res) | |
} | |
analyzeNetworkEigenvector <- function(net_1_g) { | |
return(centralization.evcent(net_1_g)$vector) | |
} | |
analyzeNetworkGlobal <- function(net_1_g, silent=FALSE) { | |
plot(net_1_g) | |
if (!silent) { | |
print(transitivity(net_1_g, type = "global")) | |
} | |
else { | |
return(transitivity(net_1_g, type = "global")) | |
} | |
} | |
## Loading in an image and plotting random points | |
# Inspired from: http://stackoverflow.com/questions/12918367/in-r-how-to-plot-with-a-png-as-background | |
library(png) | |
library(jpeg) | |
generateRandomCoordinates <- function() { | |
return( c(sample(1:100 / 100, 1), sample(1:100 / 100, 1)) ) | |
} | |
# It would be nice to select the number of points to be plotted, or perhaps have two separate | |
# functions if that is faster to write | |
selectRandomPointsInImage <- function(imgURL) { | |
ima <- readJPEG(imgURL) | |
plot(0:1, 0:1, type='n', main="Two Random Points", xlab="x", ylab="y") | |
lim <- par() | |
rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4]) | |
tpoints <- t(as.matrix(generateRandomCoordinates())) | |
tpoints2 <- t(as.matrix(generateRandomCoordinates())) | |
print(tpoints) | |
print(tpoints2) | |
points(tpoints) | |
points(tpoints2) | |
} | |
selectRandomPointsInImage("/Users/nikhilgopal/Downloads/gkq482f1.jpg") | |
selectRandomNPointsInImage <- function(imgURL, numberOfPoints = 2) { | |
# Function handles pngs and jpegs | |
library(png) | |
library(jpeg) | |
if (numberOfPoints < 1) { | |
stop("Please enter a valid argument for number of points. It must be numeric and > 0") | |
} | |
# ima <- readJPEG(imgURL) | |
ima = tryCatch({ | |
readPNG(imgURL) | |
}, error = function(e) { | |
# Going to try JPEG now | |
readJPEG(imgURL) | |
}, finally = { | |
print("Please use a JPG or PNG") | |
}) | |
plot(0:1, 0:1, type='n', main="Two Random Points", xlab="x", ylab="y") | |
lim <- par() | |
rasterImage(ima, lim$usr[1], lim$usr[3], lim$usr[2], lim$usr[4]) | |
li = list() | |
for (l in 1:numberOfPoints) { | |
li[[l]] = t(as.matrix(generateRandomCoordinates())) | |
points(li[[l]]) | |
} | |
print(li) | |
} | |
selectRandomNPointsInImage("/Users/nikhilgopal/Downloads/gkq482f1.jpg", 2) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment