phlippieb/bsr-scripts.R

## bsr-scripts.R
plot.full <- function (datafile, ...)
{
	data <- read.table( datafile, quote="\"" );
	dataframe <- data.frame(x=1:length(rowMeans(data)), y=rowMeans(data));
	plot(dataframe, type='l');

	b <- bsr2 (dataframe$y, dataframe$x, 100);
	slope <- diff( b$fit$model[,2] ) / diff ( predict(b$fit) );

	plot.2pwla(dataframe$x, dataframe$y, cont=FALSE, add=TRUE, type='l', col='red', ...);
	legend ( 800, 100, legend=paste( "m0:  ", slope[1], "\nm1:  ", slope[length(slope)-2], sep="" ));
}

plot.plain <- function (datafile, ...)
{
	data <- read.table( datafile, quote="\"" );
	plot( rowMeans(data), type='l', ...);
}

plot.la <- function (datafile, ...)
{
	data <- read.table( datafile, quote="\"" );
	dataframe <- data.frame(x=1:length(rowMeans(data)), y=rowMeans(data));
	plot.2pwla(dataframe$x, dataframe$y, type='l', cont=FALSE, ...);
}

plot.full.tofile <- function (datafile, outfile=null, ...)
{
	if (is.null(outfile)) {
		outfile <- infile;
	}
	jpeg(outfile, width=1000, height=1000);
	plot.full(datafile, ...);
	dev.off();
}

#	param opsize.popsize.dimension:
#		a string containing the concatenation of:
#		- the population size
#		- the problem function name
#		- the number of dimensions of the problem
#	param ...
#		additonal parameters for the plotting function...
#	note: expects data to be in "./finalrdata/"
plot.perproblem <- function (popsize.problem.dimension, ...) {
	path <- "finalrdata/";
	algorithms <- c("cpso.", "spso.", "gbest.", "lbest.", "gcpso.");

	maxDivVal <- 0;
	for (a in algorithms ) {
		fullFilePath <- paste ( path, a, popsize.problem.dimension, ".div", sep='' );
		if ( file.exists( fullFilePath ) ) {
			maxDivVal <- max( maxDivVal, max(rowMeans(read.table(fullFilePath))) );
		}
	}

	mustAdd <- FALSE;
	linecolour <- 0;
	rainbowSize  <- 6;
	legendColors <- c();
	legendWidths <- c();
	legendTypes  <- c();
	legendTexts  <- c();

	for (a in algorithms ) {
		linecolour <- linecolour + 1;
		fullFilePath <- paste ( path, a, popsize.problem.dimension, ".div", sep='' );
		if ( file.exists( fullFilePath ) ) {
			plot.la ( fullFilePath, add=mustAdd, ylim=c(0,maxDivVal), col=rainbow(rainbowSize)[linecolour], ... );
			mustAdd <- TRUE;
			legendColors <- c(legendColors, rainbow(rainbowSize)[linecolour]);
			legendWidths <- c(legendWidths, 2.5);
			legendTypes  <- c(legendTypes, 1);
			legendTexts  <- c(legendTexts, a);
		}
	}
	if (mustAdd) {
		text(120, maxDivVal-40, popsize.problem.dimension)
		legend (100, maxDivVal - 50, legendTexts, lty=legendTypes, lwd=legendWidths, col=legendColors);
	}
}


# param b: the bsr object
# returns: the slope of the left/first linear approximation
bsr.slope1 <- function (b) {
	return (predict(b$fit)[2] - predict(b$fit)[1]);
}

# param b: the bsr object
# returns: the slope of the right/second linear approximation
bsr.slope2 <- function (b) {
	return (predict(b$fit)[200] - predict(b$fit)[199]);
}

# param b: the bsr object
# param i: the x-value to find the slope at (between i and i+1)
# return: the slope between i and i+1.
bsr.slopeAt <- function (b, i) {
	return (predict(b$fit)[i+1] - predict(b$fit)[i]);
}

# param b: the bsr object
# returns: the coordinates of the point where the two linear approximations meet
bsr.breakpt <- function (b) {
	for ( i in 1:(length(b$fit$model[,2])-3) ) {
		if ( round(bsr.slopeAt(b, i),10) != round(bsr.slopeAt(b, i+1),10) ) {
			return (c(i+1, predict(b$fit)[i+1]));
		}
	}
	return (c(-1,-1));
}


# one-size-fits-all plot method to plot pwla, using below functions.
# note:
#	does not work on just a bunch of data (took me a month to learn)
#	you need a dataframe with an x and y, or else you can generate an x for the function:
#	method 1 (recommended):
#		mydataframe <- data.frame( x=1:1001, y=mydata)
#			where mydata's length is 1001. You'll get an error if the sizes don't match.
#		plot.2pwla(mydataframe$x, mydataframe$y)
#	method 2:
#		plot.2pwla(x=1:1001, y=mydata)
#			More compact, error-prone, not tested yet.
plot.2pwla <- function(x, y, res=100, ...) {
	plot.bsr(bsr2(x, y, res), ...)
}


# updated of above method to auto-calculate length
#	the number of candidate breakpoints is equal to the length of y.
plot2.2pwla <- function (y, ...) {
	plot.bsr(bsr2(1:length(y), y, length(y)), cont=FALSE, ...)
}


# Broken stick regression.
# note:
#	the 'b' argument is a vector of candidate values for the join point
#	eg:
#		b <- seq(lo, hi, length=100),
#		where lo and hi are suitably chosen bounds on the location of the join point
bsr <- function(x,y,b) {
	if(length(b)==1) {
		 x1 <- ifelse(x<=b,0,x-b)
		 temp <- lm(y~x+x1)
		 ss <- sum(resid(temp)^2)
		 rslt <- list(fit=temp,ss=ss,b=b)
		 class(rslt) <- "bsr"
		 return(rslt)
	}
	temp <- list()
	for(v in b) {
		 x1 <- ifelse(x<=v,0,x-v)
		 fff <- lm(y~x+x1)
		 temp[[paste(v)]] <- sum(resid(fff)^2)
	}
	ss <- unname(unlist(temp))
	v <- b[which.min(ss)]
	x1 <- ifelse(x<=v,0,x-v)
	rslt <- list(fit=lm(y~x+x1),ss=unname(unlist(temp)),
		      b=b,ss.min=min(ss),b.min=v)
	class(rslt) <- "bsr"
	rslt
}


# Broken stick regression.
# note:
#	same as above (bsr), except range of candidate values is automatically entire range of x
#	instead of passing b, then, the passed res (resolution) determines how many candidate values to consider.
bsr2 <- function(x,y,res) {
	b <- seq(min(x), max(x), length=res)
	if(length(b)==1) {
		 x1 <- ifelse(x<=b,0,x-b)
		 temp <- lm(y~x+x1)
		 ss <- sum(resid(temp)^2)
		 rslt <- list(fit=temp,ss=ss,b=b)
		 class(rslt) <- "bsr"
		 return(rslt)
	}
	temp <- list()
	for(v in b) {
		 x1 <- ifelse(x<=v,0,x-v)
		 fff <- lm(y~x+x1)
		 temp[[paste(v)]] <- sum(resid(fff)^2)
	}
	ss <- unname(unlist(temp))
	v <- b[which.min(ss)]
	x1 <- ifelse(x<=v,0,x-v)
	rslt <- list(fit=lm(y~x+x1),ss=unname(unlist(temp)),
		      b=b,ss.min=min(ss),b.min=v)
	class(rslt) <- "bsr"
	rslt
}


# Plot method for broken stick regressions.
# note:
#	pass function call to bsr as x. (at least it works!)
plot.bsr <- function(x,cont=FALSE,add=FALSE,rr=NULL,npts=101,...) {
	fit <- x
	if(cont) {
		 if(is.null(rr)) {
		         x <- fit$fit$model$x
		         rr <- range(x)
		 }
		 xx <- seq(rr[1],rr[2],length=npts)
		 b  <- fit$b.min
		 if(is.null(b)) b <- fit$b
		 x1 <- ifelse(xx<=b,0,xx-b)
		 yh <- predict(fit$fit,newdata=data.frame(x=xx,x1=x1))
	} else {
		 xx <- fit$fit$model[,2]
		 yh <- predict(fit$fit)
	}
	if(add) lines(x=xx,y=yh,...) else plot(x=xx,y=yh,...)
}
	plot.full <- function (datafile, ...)
	{
	data <- read.table( datafile, quote="\"" );
	dataframe <- data.frame(x=1:length(rowMeans(data)), y=rowMeans(data));
	plot(dataframe, type='l');

	b <- bsr2 (dataframe$y, dataframe$x, 100);
	slope <- diff( b$fit$model[,2] ) / diff ( predict(b$fit) );

	plot.2pwla(dataframe$x, dataframe$y, cont=FALSE, add=TRUE, type='l', col='red', ...);
	legend ( 800, 100, legend=paste( "m0: ", slope[1], "\nm1: ", slope[length(slope)-2], sep="" ));
	}

	plot.plain <- function (datafile, ...)
	{
	data <- read.table( datafile, quote="\"" );
	plot( rowMeans(data), type='l', ...);
	}

	plot.la <- function (datafile, ...)
	{
	data <- read.table( datafile, quote="\"" );
	dataframe <- data.frame(x=1:length(rowMeans(data)), y=rowMeans(data));
	plot.2pwla(dataframe$x, dataframe$y, type='l', cont=FALSE, ...);
	}

	plot.full.tofile <- function (datafile, outfile=null, ...)
	{
	if (is.null(outfile)) {
	outfile <- infile;
	}
	jpeg(outfile, width=1000, height=1000);
	plot.full(datafile, ...);
	dev.off();
	}

	# param opsize.popsize.dimension:
	# a string containing the concatenation of:
	# - the population size
	# - the problem function name
	# - the number of dimensions of the problem
	# param ...
	# additonal parameters for the plotting function...
	# note: expects data to be in "./finalrdata/"
	plot.perproblem <- function (popsize.problem.dimension, ...) {
	path <- "finalrdata/";
	algorithms <- c("cpso.", "spso.", "gbest.", "lbest.", "gcpso.");

	maxDivVal <- 0;
	for (a in algorithms ) {
	fullFilePath <- paste ( path, a, popsize.problem.dimension, ".div", sep='' );
	if ( file.exists( fullFilePath ) ) {
	maxDivVal <- max( maxDivVal, max(rowMeans(read.table(fullFilePath))) );
	}
	}

	mustAdd <- FALSE;
	linecolour <- 0;
	rainbowSize <- 6;
	legendColors <- c();
	legendWidths <- c();
	legendTypes <- c();
	legendTexts <- c();

	for (a in algorithms ) {
	linecolour <- linecolour + 1;
	fullFilePath <- paste ( path, a, popsize.problem.dimension, ".div", sep='' );
	if ( file.exists( fullFilePath ) ) {
	plot.la ( fullFilePath, add=mustAdd, ylim=c(0,maxDivVal), col=rainbow(rainbowSize)[linecolour], ... );
	mustAdd <- TRUE;
	legendColors <- c(legendColors, rainbow(rainbowSize)[linecolour]);
	legendWidths <- c(legendWidths, 2.5);
	legendTypes <- c(legendTypes, 1);
	legendTexts <- c(legendTexts, a);
	}
	}
	if (mustAdd) {
	text(120, maxDivVal-40, popsize.problem.dimension)
	legend (100, maxDivVal - 50, legendTexts, lty=legendTypes, lwd=legendWidths, col=legendColors);
	}
	}


	# param b: the bsr object
	# returns: the slope of the left/first linear approximation
	bsr.slope1 <- function (b) {
	return (predict(b$fit)[2] - predict(b$fit)[1]);
	}

	# param b: the bsr object
	# returns: the slope of the right/second linear approximation
	bsr.slope2 <- function (b) {
	return (predict(b$fit)[200] - predict(b$fit)[199]);
	}

	# param b: the bsr object
	# param i: the x-value to find the slope at (between i and i+1)
	# return: the slope between i and i+1.
	bsr.slopeAt <- function (b, i) {
	return (predict(b$fit)[i+1] - predict(b$fit)[i]);
	}

	# param b: the bsr object
	# returns: the coordinates of the point where the two linear approximations meet
	bsr.breakpt <- function (b) {
	for ( i in 1:(length(b$fit$model[,2])-3) ) {
	if ( round(bsr.slopeAt(b, i),10) != round(bsr.slopeAt(b, i+1),10) ) {
	return (c(i+1, predict(b$fit)[i+1]));
	}
	}
	return (c(-1,-1));
	}


	# one-size-fits-all plot method to plot pwla, using below functions.
	# note:
	# does not work on just a bunch of data (took me a month to learn)
	# you need a dataframe with an x and y, or else you can generate an x for the function:
	# method 1 (recommended):
	# mydataframe <- data.frame( x=1:1001, y=mydata)
	# where mydata's length is 1001. You'll get an error if the sizes don't match.
	# plot.2pwla(mydataframe$x, mydataframe$y)
	# method 2:
	# plot.2pwla(x=1:1001, y=mydata)
	# More compact, error-prone, not tested yet.
	plot.2pwla <- function(x, y, res=100, ...) {
	plot.bsr(bsr2(x, y, res), ...)
	}


	# updated of above method to auto-calculate length
	# the number of candidate breakpoints is equal to the length of y.
	plot2.2pwla <- function (y, ...) {
	plot.bsr(bsr2(1:length(y), y, length(y)), cont=FALSE, ...)
	}


	# Broken stick regression.
	# note:
	# the 'b' argument is a vector of candidate values for the join point
	# eg:
	# b <- seq(lo, hi, length=100),
	# where lo and hi are suitably chosen bounds on the location of the join point
	bsr <- function(x,y,b) {
	if(length(b)==1) {
	x1 <- ifelse(x<=b,0,x-b)
	temp <- lm(y~x+x1)
	ss <- sum(resid(temp)^2)
	rslt <- list(fit=temp,ss=ss,b=b)
	class(rslt) <- "bsr"
	return(rslt)
	}
	temp <- list()
	for(v in b) {
	x1 <- ifelse(x<=v,0,x-v)
	fff <- lm(y~x+x1)
	temp[[paste(v)]] <- sum(resid(fff)^2)
	}
	ss <- unname(unlist(temp))
	v <- b[which.min(ss)]
	x1 <- ifelse(x<=v,0,x-v)
	rslt <- list(fit=lm(y~x+x1),ss=unname(unlist(temp)),
	b=b,ss.min=min(ss),b.min=v)
	class(rslt) <- "bsr"
	rslt
	}


	# Broken stick regression.
	# note:
	# same as above (bsr), except range of candidate values is automatically entire range of x
	# instead of passing b, then, the passed res (resolution) determines how many candidate values to consider.
	bsr2 <- function(x,y,res) {
	b <- seq(min(x), max(x), length=res)
	if(length(b)==1) {
	x1 <- ifelse(x<=b,0,x-b)
	temp <- lm(y~x+x1)
	ss <- sum(resid(temp)^2)
	rslt <- list(fit=temp,ss=ss,b=b)
	class(rslt) <- "bsr"
	return(rslt)
	}
	temp <- list()
	for(v in b) {
	x1 <- ifelse(x<=v,0,x-v)
	fff <- lm(y~x+x1)
	temp[[paste(v)]] <- sum(resid(fff)^2)
	}
	ss <- unname(unlist(temp))
	v <- b[which.min(ss)]
	x1 <- ifelse(x<=v,0,x-v)
	rslt <- list(fit=lm(y~x+x1),ss=unname(unlist(temp)),
	b=b,ss.min=min(ss),b.min=v)
	class(rslt) <- "bsr"
	rslt
	}


	# Plot method for broken stick regressions.
	# note:
	# pass function call to bsr as x. (at least it works!)
	plot.bsr <- function(x,cont=FALSE,add=FALSE,rr=NULL,npts=101,...) {
	fit <- x
	if(cont) {
	if(is.null(rr)) {
	x <- fit$fit$model$x
	rr <- range(x)
	}
	xx <- seq(rr[1],rr[2],length=npts)
	b <- fit$b.min
	if(is.null(b)) b <- fit$b
	x1 <- ifelse(xx<=b,0,xx-b)
	yh <- predict(fit$fit,newdata=data.frame(x=xx,x1=x1))
	} else {
	xx <- fit$fit$model[,2]
	yh <- predict(fit$fit)
	}
	if(add) lines(x=xx,y=yh,...) else plot(x=xx,y=yh,...)
	}