statguy/diamina.R

## diamina.R
library(spdep)
library(INLA)

# Take a subset of the data.
diamina <- read.csv("~/Downloads/diamina_meadows_125000_sample.csv", sep=";", dec=",")
diamina <- subset(diamina, Xcoord>230000 & Ycoord>6910000)
diamina$habitable <- as.factor(diamina$habitable)
diamina$id <- 1:nrow(diamina)


# Continuos spatial model:

# Without scaling the coordinates we might end up in problems with INLA.
coords.scale <- 1e-6
locations <- as.matrix(diamina[,c("Xcoord","Ycoord")])
locations[,1] <- (locations[,1] + 100000) * 10
locations <- locations * coords.scale
# Construct an estimation mesh.
# The estimates are computed at the node locations and estimates at the observation locations
# are interpolated from those.
mesh <- inla.mesh.2d(locations, max.edge=c(0.005,2), cutoff=0.001)
mesh$n
plot(mesh)
points(locations, col="red", pch='.')

# INLA doesn't support categorical variables for this model, so we need the following trick.
model <- meadows ~ -1 + intercept + habitable2 + habitable3 + sqrt(dfield) + sqrt(driver) + twi + f(s, model=spde)
covar <- as.data.frame(model.matrix(~-1 + habitable + dfield + driver + twi, diamina)[,-1])

# Some data manipulation for INLA.
spde <- inla.spde2.matern(mesh)
index <- inla.spde.make.index("s", spde$n.spde)
A <- inla.spde.make.A(mesh, loc=locations)
stack.obs <- inla.stack(data=list(meadows=diamina$meadows),
                        A=list(A, 1),
                        effects=list(c(index, list(intercept=1)), covar=covar),
                        tag="obs")

result <- inla(model,
               data=inla.stack.data(stack.obs, spde=spde),
               family="binomial",
               control.predictor=list(A=inla.stack.A(stack.obs), compute=TRUE),
               verbose=TRUE)
summary(result)

# Compare observed and predicted values.
index.obs <- inla.stack.index(stack.obs, "obs")$data
cbind(obs=diamina$meadows, pred=round(result$summary.fitted.values$mean[index.obs]))

# Posterior spatial scale.
spde.result <- inla.spde.result(result, "s", spde)
plot(spde.result$marginals.range.nominal[[1]] / coords.scale, type="l")


# Discrete spatial model (Besag-York-Mollié):

diamina <- read.csv("~/Downloads/diamina_meadows_125000_sample.csv", sep=";", dec=",")
diamina <- subset(diamina, Xcoord>230000 & Ycoord>6910000)
diamina$habitable <- as.factor(diamina$habitable)
diamina$id <- 1:nrow(diamina)


# Find neighbours with the k-nearest neighbour algorithm with k=20 (randomly picked).
# The neighbours are marked in a matrix (with indexes i,j) so that i,j = 1 if points i and j are
# neighbours, otherwise i,j = 0.
locations <- as.matrix(diamina[,c("Xcoord","Ycoord")])
knn <- knearneigh(locations, k=20, longlat=FALSE, RANN=FALSE)
nb <- knn2nb(knn)
graph.file <- tempfile()
graph <- nb2INLA(graph.file, nb)

model <- meadows ~ habitable + sqrt(dfield) + sqrt(driver) + twi + f(id, model="bym", graph=graph.file)
result <- inla(model, family="binomial", data=diamina, control.predictor=list(compute=TRUE), verbose=TRUE)
summary(result)

# Compare observed and predicted values.
cbind(obs=diamina$meadows, pred=round(result$summary.fitted.values$mean))
	library(spdep)
	library(INLA)

	# Take a subset of the data.
	diamina <- read.csv("~/Downloads/diamina_meadows_125000_sample.csv", sep=";", dec=",")
	diamina <- subset(diamina, Xcoord>230000 & Ycoord>6910000)
	diamina$habitable <- as.factor(diamina$habitable)
	diamina$id <- 1:nrow(diamina)


	# Continuos spatial model:

	# Without scaling the coordinates we might end up in problems with INLA.
	coords.scale <- 1e-6
	locations <- as.matrix(diamina[,c("Xcoord","Ycoord")])
	locations[,1] <- (locations[,1] + 100000) * 10
	locations <- locations * coords.scale
	# Construct an estimation mesh.
	# The estimates are computed at the node locations and estimates at the observation locations
	# are interpolated from those.
	mesh <- inla.mesh.2d(locations, max.edge=c(0.005,2), cutoff=0.001)
	mesh$n
	plot(mesh)
	points(locations, col="red", pch='.')

	# INLA doesn't support categorical variables for this model, so we need the following trick.
	model <- meadows ~ -1 + intercept + habitable2 + habitable3 + sqrt(dfield) + sqrt(driver) + twi + f(s, model=spde)
	covar <- as.data.frame(model.matrix(~-1 + habitable + dfield + driver + twi, diamina)[,-1])

	# Some data manipulation for INLA.
	spde <- inla.spde2.matern(mesh)
	index <- inla.spde.make.index("s", spde$n.spde)
	A <- inla.spde.make.A(mesh, loc=locations)
	stack.obs <- inla.stack(data=list(meadows=diamina$meadows),
	A=list(A, 1),
	effects=list(c(index, list(intercept=1)), covar=covar),
	tag="obs")

	result <- inla(model,
	data=inla.stack.data(stack.obs, spde=spde),
	family="binomial",
	control.predictor=list(A=inla.stack.A(stack.obs), compute=TRUE),
	verbose=TRUE)
	summary(result)

	# Compare observed and predicted values.
	index.obs <- inla.stack.index(stack.obs, "obs")$data
	cbind(obs=diamina$meadows, pred=round(result$summary.fitted.values$mean[index.obs]))

	# Posterior spatial scale.
	spde.result <- inla.spde.result(result, "s", spde)
	plot(spde.result$marginals.range.nominal[[1]] / coords.scale, type="l")



	# Discrete spatial model (Besag-York-Mollié):

	diamina <- read.csv("~/Downloads/diamina_meadows_125000_sample.csv", sep=";", dec=",")
	diamina <- subset(diamina, Xcoord>230000 & Ycoord>6910000)
	diamina$habitable <- as.factor(diamina$habitable)
	diamina$id <- 1:nrow(diamina)


	# Find neighbours with the k-nearest neighbour algorithm with k=20 (randomly picked).
	# The neighbours are marked in a matrix (with indexes i,j) so that i,j = 1 if points i and j are
	# neighbours, otherwise i,j = 0.
	locations <- as.matrix(diamina[,c("Xcoord","Ycoord")])
	knn <- knearneigh(locations, k=20, longlat=FALSE, RANN=FALSE)
	nb <- knn2nb(knn)
	graph.file <- tempfile()
	graph <- nb2INLA(graph.file, nb)

	model <- meadows ~ habitable + sqrt(dfield) + sqrt(driver) + twi + f(id, model="bym", graph=graph.file)
	result <- inla(model, family="binomial", data=diamina, control.predictor=list(compute=TRUE), verbose=TRUE)
	summary(result)

	# Compare observed and predicted values.
	cbind(obs=diamina$meadows, pred=round(result$summary.fitted.values$mean))