scbrown86/MapCurves.R

## MapCurves.R
# Code from https://www.uva.nl/en/profile/l/o/e.e.vanloon/e.e.vanloon.html

# Implementation of the MapCurves algorithm (DOI 10.1007/s10109-006-0025-x)

mapcurves <- function(A, B, plotGOF = TRUE) {
  ## R implementation of the mapcurves goodness of fit measure
  ## (Hargrove et al. 20006, see full reference below) for comparing
  ## two categorical maps
  ##
  ## usage:
  ##
  ##  out <- mapcurves(mapA,mapB)
  ##
  ## where
  ##   mapA = a matrix or vector with integers, representing a categorical map
  ##   mapB = a matrix or vector with integers, representing a categorical map
  ##   out = a list with the (named) fields
  ##       out$GOF = the mapcurves goodness of fit value (GOF)
  ##       out$Refmap = the map to be used as reference map ('A' or 'B')
  ##       out$GOFtable = the GOF for each pare of classes in maps A and B
  ##       out$mGOF_A2B =
  ##       out$mGOF_B2A = maximum GOF when using B as reference map
  ##       out$BMC_A2B = best matching class (BMC)and corresponding maximum GOF
  ##                       when using map A as reference (data frame with three columns)
  ##       out$BMC_B2A = BMC and corresponding maximum GOF when using map B as reference
  ##                       (data frame with three columns)
  ##
  ## additional requirements:
  ##    - maps A and B should be matrices or vectors of equal size
  ##    - missing values should be coded with NA and are disregarded
  ##       in the comparison
  ##
  ## This implementation is based on the description of the Mapcurves algorithm
  ## by Hargrove et al. in the following paper:
  ##
  ## William W. Hargrove, Forrest M. Hoffman and Paul F. Hessburg (2006)
  ##   Mapcurves: a quantitative method for comparing categorical maps.
  ##   J Geograph Syst, 8, 187–208. DOI 10.1007/s10109-006-0025-x
  ## Emiel van Loon, May 2011
  ## University of Amsterdam
  ## http://staf.science.uva.nl/~vanloon/
  A <- as.vector(A) # required for the unique function
  B <- as.vector(B)
  # identify all unique values in maps A and B,
  # the sort function automatically removes the NA values that
  # are still reported by unique.
  a <- sort(unique(A))
  b <- sort(unique(B))
  nra <- length(a)
  nrb <- length(b)
  tC <- matrix(data = NA, nrow = nra, ncol = nrb)
  rC <- matrix(data = NA, nrow = nra, ncol = nrb)
  for (i in 1:nra) {
    for (j in 1:nrb) {
      tC[i, j] <- sum((A == a[i]) & (B == b[j]), na.rm = TRUE)
    }
  }
  Sa <- rowSums(tC)
  Sb <- colSums(tC)
  for (i in 1:nra) {
    for (j in 1:nrb) {
      rC[i, j] <- (tC[i, j]^2) / (Sa[i] * Sb[j])
    }
  }
  rSa <- rowSums(rC)
  rSb <- colSums(rC)
  sSa <- c(0, sort(rSa), 1)
  sSb <- c(0, sort(rSb), 1)
  # percent of categories larger than this GOF value
  PCLa <- c(1, seq(1, 0, length.out = nra), 0)
  PCLb <- c(1, seq(1, 0, length.out = nrb), 0)
  if (plotGOF) {
    dev.new()
    plot(sSa, PCLa,
      type = "b", pch = 1, col = "blue", lty = 1,
      xlab = "GOF score", ylab = "% of map classes < GOF score"
    )
    lines(sSb, PCLb, type = "b", pch = 1, col = "red", lty = 1)
    # title(main='Mapcurves diagram')
    legend(list(x = 0.7, y = 0.98),
      legend = c("A as reference", "B as reference"),
      col = c("blue", "red"), pch = c(1, 1), lty = c(1, 1)
    )
  }
  # calculate surface under curve, when using
  # respectively map A and map B
  # calculation uses trapezium rule, which is
  # exact for piece-wise linear as in this case
  GOFa <- 0
  for (i in 1:nra) {
    GOFa <- GOFa + (sSa[i + 1] - sSa[i]) * PCLa[i + 1] +
      0.5 * (sSa[i + 1] - sSa[i]) * (PCLa[i] - PCLa[i + 1])
  }
  GOFb <- 0
  for (i in 1:nrb) {
    GOFb <- GOFb + (sSb[i + 1] - sSb[i]) * PCLb[i + 1] +
      0.5 * (sSb[i + 1] - sSb[i]) * (PCLb[i] - PCLb[i + 1])
  }
  # prepare data for output
  if (GOFa >= GOFb) {
    Refmap <- "A"
    GOF <- GOFa
  }
  if (GOFa < GOFb) {
    Refmap <- "B"
    GOF <- GOFb
  }
  GOFtable <- round(rC, 3)
  rownames(GOFtable) <- a
  colnames(GOFtable) <- b
  vb <- vector(mode = "integer", length = nra)
  mg <- vector(mode = "double", length = nra)
  BMC_A2B <- data.frame(A = a, B = vb, mGOF = mg)
  va <- vector(mode = "integer", length = nrb)
  mg <- vector(mode = "double", length = nrb)
  BMC_B2A <- data.frame(A = va, B = b, mGOF = mg)
  for (i in 1:nra) {
    BMC_A2B$mGOF[i] <- round(max(rC[i, ]), 3)
    BMC_A2B$B[i] <- b[ which.max(rC[i, ]) ]
  }
  for (j in 1:nrb) {
    BMC_B2A$mGOF[j] <- round(max(rC[, j]), 3)
    BMC_B2A$A[j] <- a[ which.max(rC[, j]) ]
  }
  return(list(
    GOF = GOF, Refmap = Refmap, GOFtable = GOFtable,
    BMC_A2B = BMC_A2B, BMC_B2A = BMC_B2A
  ))
}

# TEST #
# A <- floor(matrix(runif(100,0,9), 10))
# B <- floor(matrix(runif(100,0,9), 10))
# C1 <- mapcurves(A, B)
	# Code from https://www.uva.nl/en/profile/l/o/e.e.vanloon/e.e.vanloon.html

	# Implementation of the MapCurves algorithm (DOI 10.1007/s10109-006-0025-x)

	mapcurves <- function(A, B, plotGOF = TRUE) {
	## R implementation of the mapcurves goodness of fit measure
	## (Hargrove et al. 20006, see full reference below) for comparing
	## two categorical maps
	##
	## usage:
	##
	## out <- mapcurves(mapA,mapB)
	##
	## where
	## mapA = a matrix or vector with integers, representing a categorical map
	## mapB = a matrix or vector with integers, representing a categorical map
	## out = a list with the (named) fields
	## out$GOF = the mapcurves goodness of fit value (GOF)
	## out$Refmap = the map to be used as reference map ('A' or 'B')
	## out$GOFtable = the GOF for each pare of classes in maps A and B
	## out$mGOF_A2B =
	## out$mGOF_B2A = maximum GOF when using B as reference map
	## out$BMC_A2B = best matching class (BMC)and corresponding maximum GOF
	## when using map A as reference (data frame with three columns)
	## out$BMC_B2A = BMC and corresponding maximum GOF when using map B as reference
	## (data frame with three columns)
	##
	## additional requirements:
	## - maps A and B should be matrices or vectors of equal size
	## - missing values should be coded with NA and are disregarded
	## in the comparison
	##
	## This implementation is based on the description of the Mapcurves algorithm
	## by Hargrove et al. in the following paper:
	##
	## William W. Hargrove, Forrest M. Hoffman and Paul F. Hessburg (2006)
	## Mapcurves: a quantitative method for comparing categorical maps.
	## J Geograph Syst, 8, 187–208. DOI 10.1007/s10109-006-0025-x
	## Emiel van Loon, May 2011
	## University of Amsterdam
	## http://staf.science.uva.nl/~vanloon/
	A <- as.vector(A) # required for the unique function
	B <- as.vector(B)
	# identify all unique values in maps A and B,
	# the sort function automatically removes the NA values that
	# are still reported by unique.
	a <- sort(unique(A))
	b <- sort(unique(B))
	nra <- length(a)
	nrb <- length(b)
	tC <- matrix(data = NA, nrow = nra, ncol = nrb)
	rC <- matrix(data = NA, nrow = nra, ncol = nrb)
	for (i in 1:nra) {
	for (j in 1:nrb) {
	tC[i, j] <- sum((A == a[i]) & (B == b[j]), na.rm = TRUE)
	}
	}
	Sa <- rowSums(tC)
	Sb <- colSums(tC)
	for (i in 1:nra) {
	for (j in 1:nrb) {
	rC[i, j] <- (tC[i, j]^2) / (Sa[i] * Sb[j])
	}
	}
	rSa <- rowSums(rC)
	rSb <- colSums(rC)
	sSa <- c(0, sort(rSa), 1)
	sSb <- c(0, sort(rSb), 1)
	# percent of categories larger than this GOF value
	PCLa <- c(1, seq(1, 0, length.out = nra), 0)
	PCLb <- c(1, seq(1, 0, length.out = nrb), 0)
	if (plotGOF) {
	dev.new()
	plot(sSa, PCLa,
	type = "b", pch = 1, col = "blue", lty = 1,
	xlab = "GOF score", ylab = "% of map classes < GOF score"
	)
	lines(sSb, PCLb, type = "b", pch = 1, col = "red", lty = 1)
	# title(main='Mapcurves diagram')
	legend(list(x = 0.7, y = 0.98),
	legend = c("A as reference", "B as reference"),
	col = c("blue", "red"), pch = c(1, 1), lty = c(1, 1)
	)
	}
	# calculate surface under curve, when using
	# respectively map A and map B
	# calculation uses trapezium rule, which is
	# exact for piece-wise linear as in this case
	GOFa <- 0
	for (i in 1:nra) {
	GOFa <- GOFa + (sSa[i + 1] - sSa[i]) * PCLa[i + 1] +
	0.5 * (sSa[i + 1] - sSa[i]) * (PCLa[i] - PCLa[i + 1])
	}
	GOFb <- 0
	for (i in 1:nrb) {
	GOFb <- GOFb + (sSb[i + 1] - sSb[i]) * PCLb[i + 1] +
	0.5 * (sSb[i + 1] - sSb[i]) * (PCLb[i] - PCLb[i + 1])
	}
	# prepare data for output
	if (GOFa >= GOFb) {
	Refmap <- "A"
	GOF <- GOFa
	}
	if (GOFa < GOFb) {
	Refmap <- "B"
	GOF <- GOFb
	}
	GOFtable <- round(rC, 3)
	rownames(GOFtable) <- a
	colnames(GOFtable) <- b
	vb <- vector(mode = "integer", length = nra)
	mg <- vector(mode = "double", length = nra)
	BMC_A2B <- data.frame(A = a, B = vb, mGOF = mg)
	va <- vector(mode = "integer", length = nrb)
	mg <- vector(mode = "double", length = nrb)
	BMC_B2A <- data.frame(A = va, B = b, mGOF = mg)
	for (i in 1:nra) {
	BMC_A2B$mGOF[i] <- round(max(rC[i, ]), 3)
	BMC_A2B$B[i] <- b[ which.max(rC[i, ]) ]
	}
	for (j in 1:nrb) {
	BMC_B2A$mGOF[j] <- round(max(rC[, j]), 3)
	BMC_B2A$A[j] <- a[ which.max(rC[, j]) ]
	}
	return(list(
	GOF = GOF, Refmap = Refmap, GOFtable = GOFtable,
	BMC_A2B = BMC_A2B, BMC_B2A = BMC_B2A
	))
	}

	# TEST #
	# A <- floor(matrix(runif(100,0,9), 10))
	# B <- floor(matrix(runif(100,0,9), 10))
	# C1 <- mapcurves(A, B)