Skip to content

Instantly share code, notes, and snippets.

@Arnie97
Last active April 15, 2019 15:05
Show Gist options
  • Save Arnie97/1796bccfd145a4f87a9d3c2759ef9188 to your computer and use it in GitHub Desktop.
Save Arnie97/1796bccfd145a4f87a9d3c2759ef9188 to your computer and use it in GitHub Desktop.
#!/usr/bin/env Rscript
library('tools')
library('RColorBrewer')
library('ScottKnottESD')
suppressMessages(library('gdata'))
doubleSK <- function(file, clusterers) {
data <- read.xls(file, header=F)
data <- as.data.frame(t(data))
# split the projects
clusterers <- unlist(strsplit(clusterers, ','))
n.clusterers <- length(clusterers)
projects <- matrix(ncol=n.clusterers, nrow=0)
n.projects <- ncol(data) / n.clusterers
print(paste(file, n.clusterers, 'clusterers in', n.projects, 'projects'))
for (i in 1:n.projects - 1) {
beg <- n.clusterers * i + 1
end <- n.clusterers *(i + 1)
project <- data[beg:end]
sk <- normalizeRank(sk_esd(project)$groups)
sk.sorted <- sk[order(as.numeric(substring(names(sk), 2)))]
projects <- rbind(projects, sk.sorted)
}
colnames(projects) <- clusterers
rownames(projects) <- c()
plotSK(file, projects)
}
normalizeRank <- function(rank) {
beg <- 1
for (i in 1:length(rank)) {
if (length(rank) == i || rank[i] != rank[i+1]) {
end <- i
for (j in beg:end) {
rank[j] <- (beg + end) / 2
}
beg <- end + 1
}
}
rank
}
plotSK <- function(file, projects) {
print(projects)
sk <- sk_esd(projects)
file <- file_path_sans_ext(file)
class(file) <- c('FileName', class(file))
# print the mean and standard deviation
df <- as.data.frame(sk$m.inf)
values <- cbind(sk$groups, df$mean, df[3] - df$mean)
colnames(values) <- c('group', 'mean', 'std')
print(values)
write.csv(values, file + 'csv')
# deduce the graph title
name <- gsub('_', ' ', file)
if (endsWith(name, 'F')) name <- 'F-measure'
if (endsWith(name, 'G')) name <- 'G-measure'
if (endsWith(name, 'M')) name <- 'MCC'
if (endsWith(name, 'A') || endsWith(name, 'AUC')) name <- 'AUC'
# define the color palette
palette <- c(4, 1, 2, 3, 6, 5, 7, 8, 1, 2, 3, 6, 5, 7, 8, 1, 2, 3, 6, 5, 7, 8)
palette <- brewer.pal(8, 'Dark2')[palette]
palette <- rev(palette[1:max(sk$groups)])
draw <- function() plot(sk, main='', title='', xlab='', ylab='Rankings', las=2, col=palette)
# specify the graph size (in inches)
width = 7
height = 3.5
dpi = 240
# specify the font size (in points, i.e. 1/72 inches)
text.size = 12
# plot in different formats
pdf(file + 'pdf', width, height, pointsize=text.size)
draw()
png(file + 'png', width, height, pointsize=text.size, units='in', res=dpi)
draw()
}
'+.FileName' <- function(self, ext) {
paste(self, ext, sep='.')
}
argv <- commandArgs(trailingOnly=T)
clusterers <- argv[1]
files <- argv[2:length(argv)]
for (file in files) {
doubleSK(file, clusterers)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment