idblr/Lee_L.R

## Lee_L.R
# ----------------------------------------------------- #
# Calculate Lee's L statistic
#
# Created by: Ian Buller, Ph.D., M.A. (GitHub: @idblr)
# Created on: December 06, 2020
#
# Notes:
# A) Based on code for Bivariate LISA by @rafapereirabr https://gist.github.com/rafapereirabr/5348193abf779625f5e8c5090776a228
# ----------------------------------------------------- #

# Load packages
loadedPackages <- c("boot", "ggplot2", "grDevices", "magrittr", "Matrix", "sf", "spData", "spdep", "stringr", "utils")
suppressMessages(invisible(lapply(loadedPackages, require, character.only = TRUE)))

# ----------------------------------------------------- #
# Load data
utils::data(boston, package = "spData")

# Variables to use in the correlation
X <- boston.c$CMEDV # median values of owner-occupied housing in USD 1000
Y <- boston.c$CRIM # per capita crime

# ----------------------------------------------------- #
# Program a function
## Permutations for Lee's L statistic
## Modification of the lee.mc() function within the {spdep} package
## Saves 'localL' output instead of 'L' output
simula_lee <- function(x, y, listw, nsim = nsim, zero.policy = NULL, na.action = na.fail) {

  if (deparse(substitute(na.action)) == "na.pass")
    stop ("na.pass not permitted")
  na.act <- attr(na.action(cbind(x, y)), "na.action")
  x[na.act] <- NA
  y[na.act] <- NA
  x <- na.action(x)
  y <- na.action(y)
  if (!is.null(na.act)) {
    subset <- !(1:length(listw$neighbours) %in% na.act)
    listw <- subset(listw, subset, zero.policy = zero.policy)
  }
  n <- length(listw$neighbours)
  if ((n != length(x)) | (n != length(y)))
    stop ("objects of different length")
  gamres <- suppressWarnings(nsim > gamma(n + 1))
  if (gamres)
    stop ("nsim too large for this number of observations")
  if (nsim < 1)
    stop ("nsim too small")
  xy <- data.frame(x, y)
  S2 <- sum((unlist(lapply(listw$weights, sum)))^2)

  lee_boot <- function(var, i, ...) {
    return(spdep::lee(x = var[i, 1], y = var[i, 2], ...)$localL)
  }

  res <- boot::boot(xy, statistic = lee_boot, R = nsim, sim = "permutation",
                    listw = listw, n = n, S2 = S2, zero.policy = zero.policy)
}

# ----------------------------------------------------- #
# Adjacency Matrix
lw <- spdep::nb2listw(boston.soi, style = "B", zero.policy = T)
W  <- as(lw, "symmetricMatrix")
W  <- as.matrix(W / Matrix::rowSums(W))
W[which(is.na(W))] <- 0

# ----------------------------------------------------- #
# Calculate the index and its simulated distribution
# for global and local values

# Global Lee's L
spdep::lee.test(x = X,
                y = Y,
                listw = lw,
                zero.policy = TRUE,
                alternative = "two.sided",
                na.action = na.omit)

# Local Lee's L values
m <- spdep::lee(x = X,
                y = Y,
                listw = lw,
                n = length(X),
                zero.policy = TRUE,
                NAOK = TRUE)

# Local Lee's L simulations
local_sims <- simula_lee(x = X,
                         y = Y,
                         listw = lw,
                         nsim = 10000,
                         zero.policy = TRUE,
                         na.action = na.omit)

m_i <- m[[2]]  # local values

# Identify the significant values
alpha <- 0.05  # for a 95% confidence interval
probs <- c(alpha/2, 1-alpha/2)
intervals <- t(apply(t(local_sims[[2]]), 1, function(x) quantile(x, probs = probs)))
sig <- ( m_i < intervals[ , 1] ) | ( m_i > intervals[ , 2] )

# ----------------------------------------------------- #
# Prepare for plotting
map_sf <- sf::st_as_sf(data.frame(boston.utm), coords = c("x", "y"))
map_sf$sig <- sig

# Identify the Lee's L clusters
Xp <- scale(X)[ , 1]
Yp <- scale(Y)[ , 1]

patterns <- as.character(interaction(Xp > 0, W %*% Yp > 0))
patterns <- patterns %>%
  stringr::str_replace_all("TRUE","High") %>%
  stringr::str_replace_all("FALSE","Low")
patterns[map_sf$sig == 0] <- "Not significant"
map_sf$patterns <- patterns

# Rename Lee's L clusters
map_sf$patterns2 <- factor(map_sf$patterns,
                           levels = c("High.High", "High.Low", "Low.High", "Low.Low", "Not significant"),
                           labels = c("High value - High crime", "High value - Low crime", "Low value - High crime","Low value - Low crime", "Not significant"))

# ----------------------------------------------------- #
# Plot
grDevices::png("Lee_L_clusters.png", height = 800, width = 800)
ggplot2::ggplot() +
  ggplot2::geom_sf(data = map_sf, ggplot2::aes(color = patterns2)) +
  ggplot2::scale_color_manual(values = c("red", "pink", "light blue", "dark blue", "grey80")) +
  ggplot2::guides(color = ggplot2::guide_legend(title = "Lee's L clusters")) +
  ggplot2::theme_minimal()
grDevices::dev.off()

## Lee_L_clusters.png

      
    Raw
  

              Lee_L_clusters.png
	# ----------------------------------------------------- #
	# Calculate Lee's L statistic
	#
	# Created by: Ian Buller, Ph.D., M.A. (GitHub: @idblr)
	# Created on: December 06, 2020
	#
	# Notes:
	# A) Based on code for Bivariate LISA by @rafapereirabr https://gist.github.com/rafapereirabr/5348193abf779625f5e8c5090776a228
	# ----------------------------------------------------- #

	# Load packages
	loadedPackages <- c("boot", "ggplot2", "grDevices", "magrittr", "Matrix", "sf", "spData", "spdep", "stringr", "utils")
	suppressMessages(invisible(lapply(loadedPackages, require, character.only = TRUE)))

	# ----------------------------------------------------- #
	# Load data
	utils::data(boston, package = "spData")

	# Variables to use in the correlation
	X <- boston.c$CMEDV # median values of owner-occupied housing in USD 1000
	Y <- boston.c$CRIM # per capita crime

	# ----------------------------------------------------- #
	# Program a function
	## Permutations for Lee's L statistic
	## Modification of the lee.mc() function within the {spdep} package
	## Saves 'localL' output instead of 'L' output
	simula_lee <- function(x, y, listw, nsim = nsim, zero.policy = NULL, na.action = na.fail) {

	if (deparse(substitute(na.action)) == "na.pass")
	stop ("na.pass not permitted")
	na.act <- attr(na.action(cbind(x, y)), "na.action")
	x[na.act] <- NA
	y[na.act] <- NA
	x <- na.action(x)
	y <- na.action(y)
	if (!is.null(na.act)) {
	subset <- !(1:length(listw$neighbours) %in% na.act)
	listw <- subset(listw, subset, zero.policy = zero.policy)
	}
	n <- length(listw$neighbours)
	if ((n != length(x)) \| (n != length(y)))
	stop ("objects of different length")
	gamres <- suppressWarnings(nsim > gamma(n + 1))
	if (gamres)
	stop ("nsim too large for this number of observations")
	if (nsim < 1)
	stop ("nsim too small")
	xy <- data.frame(x, y)
	S2 <- sum((unlist(lapply(listw$weights, sum)))^2)

	lee_boot <- function(var, i, ...) {
	return(spdep::lee(x = var[i, 1], y = var[i, 2], ...)$localL)
	}

	res <- boot::boot(xy, statistic = lee_boot, R = nsim, sim = "permutation",
	listw = listw, n = n, S2 = S2, zero.policy = zero.policy)
	}

	# ----------------------------------------------------- #
	# Adjacency Matrix
	lw <- spdep::nb2listw(boston.soi, style = "B", zero.policy = T)
	W <- as(lw, "symmetricMatrix")
	W <- as.matrix(W / Matrix::rowSums(W))
	W[which(is.na(W))] <- 0

	# ----------------------------------------------------- #
	# Calculate the index and its simulated distribution
	# for global and local values

	# Global Lee's L
	spdep::lee.test(x = X,
	y = Y,
	listw = lw,
	zero.policy = TRUE,
	alternative = "two.sided",
	na.action = na.omit)

	# Local Lee's L values
	m <- spdep::lee(x = X,
	y = Y,
	listw = lw,
	n = length(X),
	zero.policy = TRUE,
	NAOK = TRUE)

	# Local Lee's L simulations
	local_sims <- simula_lee(x = X,
	y = Y,
	listw = lw,
	nsim = 10000,
	zero.policy = TRUE,
	na.action = na.omit)

	m_i <- m[[2]] # local values

	# Identify the significant values
	alpha <- 0.05 # for a 95% confidence interval
	probs <- c(alpha/2, 1-alpha/2)
	intervals <- t(apply(t(local_sims[[2]]), 1, function(x) quantile(x, probs = probs)))
	sig <- ( m_i < intervals[ , 1] ) \| ( m_i > intervals[ , 2] )

	# ----------------------------------------------------- #
	# Prepare for plotting
	map_sf <- sf::st_as_sf(data.frame(boston.utm), coords = c("x", "y"))
	map_sf$sig <- sig

	# Identify the Lee's L clusters
	Xp <- scale(X)[ , 1]
	Yp <- scale(Y)[ , 1]

	patterns <- as.character(interaction(Xp > 0, W %*% Yp > 0))
	patterns <- patterns %>%
	stringr::str_replace_all("TRUE","High") %>%
	stringr::str_replace_all("FALSE","Low")
	patterns[map_sf$sig == 0] <- "Not significant"
	map_sf$patterns <- patterns

	# Rename Lee's L clusters
	map_sf$patterns2 <- factor(map_sf$patterns,
	levels = c("High.High", "High.Low", "Low.High", "Low.Low", "Not significant"),
	labels = c("High value - High crime", "High value - Low crime", "Low value - High crime","Low value - Low crime", "Not significant"))

	# ----------------------------------------------------- #
	# Plot
	grDevices::png("Lee_L_clusters.png", height = 800, width = 800)
	ggplot2::ggplot() +
	ggplot2::geom_sf(data = map_sf, ggplot2::aes(color = patterns2)) +
	ggplot2::scale_color_manual(values = c("red", "pink", "light blue", "dark blue", "grey80")) +
	ggplot2::guides(color = ggplot2::guide_legend(title = "Lee's L clusters")) +
	ggplot2::theme_minimal()
	grDevices::dev.off()