geografif/Landcover_classification.R

## Landcover_classification.R
## Modified from Babak Mohammadi's script

## Set working folder so you don't have to type the whole address when loading file
setwd("D:/MSc/LU/NGEN08_Satellite_RS/Exercise/3_Lunds_kommun/Module_3/Classification")
#getwd()

## Uncomment to install required packages below
#install.packages("RStoolbox")
#install.packages("monmlp")
#install.packages("readxl") #read excel spreadsheet instead of csv
#install.packages("dplyr") #for stratified random sample selection and splitting (train & test)

## Load packages
library(dplyr)
library(sp)
library(rgdal)
library(RStoolbox)
library(caret)
library(e1071)
library(raster)
library(monmlp)
library(readxl)


###################
### PREPARATION ###
###################

## Load image
sentinel10m <- stack("Image/S2_20160502_10m_SWEREF99_.tif")
sentinel20m <- stack("Image/S2_20160502_20m_SWEREF99.tif")
names(sentinel10m) <- c('B2', 'B3', 'B4', 'B8')
names(sentinel20m) <- c('B5', 'B6', 'B7', 'B8A', 'B11', 'B12')

sentinel <- stack(sentinel10m, sentinel20m)
reorderband <- c("B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B11", "B12")
sentinel <- sentinel[[reorderband]]
print(sentinel)
plotRGB(sentinel, r = "B4", g = "B3", b = "B2", scale = 100, stretch = "lin")
#writeRaster(sentinel, "Image/sentinel.tif", format="GTiff", overwrite=TRUE)
#hist(sentinel, main="Distribution of elevation values", col="purple", breaks=20)

## Import Lunds Kommun adm boundary
LK = shapefile("Adm/Lunds_kommun.shp")
LK_extent = extent(LK)
plot(LK, fill=NA, border="yellow", add=T)
#myraster.crop <- crop(DEM, e, snap="out", filename="myoutput.tif", overwrite=TRUE)
#Mince <- mask(Mince, mask = LK)

## Import sample spreadsheet as data frame. Skip to Line 58 if sample already splitted
sample = read_excel("Sample/LK2022fielddata.xlsx")

## Uncomment below to merge grassland (7) and cropland (6) as one class (6)
#sample$CLASSNUM[sample$CLASSNUM == 7] <- 6

## Select random rows from whole field sample data frame based on class
## Skip to Line 58 if sample already splitted outside
set.seed(1)
trdata <- sample %>%
  group_by("CLASSNUM") %>%
  slice_sample(prop = .60)

## Create tsdata by selecting sample rows that are not in trdata based on ID
tsdata = anti_join(sample, trdata, by=c("ID"="ID"))

## Uncomment below to import already splitted sheets
#trdata = read_excel("Sample/LK2022fielddata_SAIFUL.xlsx", sheet = "Train_data")
#tsdata = read_excel("Sample/LK2022fielddata_SAIFUL.xlsx", sheet = "Test_data")

## Check sample frequency of each class
print(table(trdata$CLASSLABEL))
print(table(tsdata$CLASSLABEL))

## Convert dataframe of trdata & tsdata and export as point shapefiles
crs.geo1 = CRS("+proj=utm +zone=33 +ellps=GRS80 +units=m +no_defs")

coordinates(trdata) = ~ SWEREF99TMEAST+SWEREF99TMNORTH
proj4string(trdata) = crs.geo1
plot(trdata, pch = 20, col = "yellow", add=T)
writeOGR(trdata, "Sample/Train.shp", "filename", driver = "ESRI Shapefile")

coordinates(tsdata) = ~ SWEREF99TMEAST+SWEREF99TMNORTH
proj4string(tsdata) = crs.geo1
plot(tsdata, pch = 20, col = "red", add=T)
writeOGR(tsdata, "Sample/Test.shp", "filename", driver = "ESRI Shapefile")

## Import back exported point shapefiles
Trainshapefile = shapefile("Sample/Train.shp")
Testshapefile = shapefile("Sample/Test.shp")
proj4string(Trainshapefile) = crs.geo1
proj4string(Testshapefile) = crs.geo1


##########################
### SPECTRAL SIGNATURE ###
##########################

## Extract value across bands at pixels correspond to training sample points
df <- extract(sentinel, Trainshapefile)
df <- round(df)

## Group values based on class
ms <- matrix(NA, nrow = length(unique(Trainshapefile$CLASSNUM)), ncol = nlayers(sentinel))
for (i in unique(Trainshapefile$CLASSNUM)){
  x <- df[Trainshapefile$CLASSNUM==i,]
  ms[i,] <- colMeans(x)
}
print(ms)

## Specify plot settings
rownames(ms) <- c('Urban', 'Water', 'Bare ground', 'Deciduous forest', 'Coniferous forest', 'Cropland', 'Grassland')
colnames(ms) <- names(sentinel)
mycolor <- c('red', 'blue', 'burlywood', 'lightgreen', 'darkgreen', 'yellow', 'brown')

dev.off(dev.list()["RStudioGD"])

plot(1, ylim=c(300, 4000), xlim=c(1,10), xlab="Bands", ylab="Reflectance (x10000)", xaxt='n')
axis(1, at=1:10, lab=colnames(ms))
for (i in 1:nrow(ms)){
  lines(ms[i,], type="o", lwd=3, lty=1, col=mycolor[i])
}
title(main="Spectral Profile", font.main=2)
legend("topleft", rownames(ms), cex=0.8, col=mycolor, lty=1,lwd=3, bty="n")


######################
### cLASSIFICATION ###
######################

proj4string(sentinel) = crs.geo1

## Based on the spectral signatures, should you use all bands? Are they provide significant difference?
## Subset if not. More bands -> more processing time
names(sentinel)
selection = c('B2','B3','B4','B8')
sentinel = subset(sentinel, selection) #Re-run Line 34 to recreate full Sentinel bands
print(sentinel)

## Should the class merged?
Trainshapefile$CLASSNUM[Trainshapefile$CLASSNUM == 7] <- 6 #Rerun line 79 to reset
Testshapefile$CLASSNUM[Testshapefile$CLASSNUM == 7] <- 6

## Running machine learning classification (change model to = "rf", "monmlp", "svmRadial")
RF_model= superClass(sentinel , trainData=Trainshapefile, responseCol = "CLASSNUM",
                     model = "rf",mode = "classification", predict = TRUE) #trainPartition=0.6

monmlp_model= superClass(sentinel , trainData=Trainshapefile, responseCol = "CLASSNUM",
                         model = "monmlp",mode = "classification", predict = TRUE)

SVM_model= superClass(sentinel , trainData=Trainshapefile, responseCol = "CLASSNUM",
                      model = "svmRadial",mode = "classification", predict = TRUE)

plot(RF_model$map, main="Random Forest")
plot(monmlp_model$map, main="Multilayer Perceptron")
plot(SVM_model$map, main="Support Vector Machine")

## Export classified raster files
writeRaster(RF_model$map, "ClassOut/RF4_7.tif", format="GTiff", overwrite=TRUE)
writeRaster(monmlp_model$map, "ClassOut/monmlp10_6.tif", format="GTiff", overwrite=TRUE)
writeRaster(SVM_model$map, "ClassOut/SVM10_6.tif", format="GTiff", overwrite=TRUE)

## Import Minimum Distance classification resulted from other software
Mindis = stack("SNAP/Mindis_10b_6c.tif")
Mindis = Mindis$Mindis_10b_6c.1
Mindis=Mindis+1 #Adjust pixel value to match CLASSNUM
plot(Mindis, main="Minimum Distance")
#Mindis[Mindis<1] <- NA


###########################
### ACCURACY ASSESSMENT ###
###########################

## Set validation samples againts LC class at corresponding pixel for creating confusion matrix
Testval = as.factor(Testshapefile$CLASSNUM)

## RF
RFval = as.factor(extract(RF_model$map, Testshapefile))
#Auto confmat
confusionMatrix(RFval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
#Manual confmat, works when prediction & reference's level matches
RF_CM = table("pred"=RFval, "ref"=Testval)
RF_UA = diag(RF_CM) / rowSums(RF_CM) * 100
RF_PA = diag(RF_CM) / colSums(RF_CM) * 100
RF_OA = sum(diag(RF_CM)/sum(RF_CM)) * 100
print(RF_CM)
print(RF_UA)
print(RF_PA)
print(RF_OA)

## Multi-layer perceptron
monmlpval = as.factor(extract(monmlp_model$map, Testshapefile))
confusionMatrix(monmlpval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
monmlp_CM = table("pred"=monmlpvalcm, "ref"=Testval)
#monmlp_UA = diag(monmlp_CM) / rowSums(monmlp_CM) * 100
#monmlp_PA = diag(monmlp_CM) / colSums(monmlp_CM) * 100
#monmlp_OA = sum(diag(monmlp_CM)/sum(monmlp_CM)) * 100
print(monmlp_CM)
#print(monmlp_UA)
#print(monmlp_PA)
#print(monmlp_OA)
#In-case level does not match, use below
monmlpval <- validateMap(monmlp_model$map, valData = Testshapefile, responseCol = "CLASSNUM",
                         classMapping = monmlp_model$classMapping)
monmlpvalResults<-monmlpval$performance$overall
list(monmlpvalResults)


## SVM
SVMval = as.factor(extract(SVM_model$map, Testshapefile))
confusionMatrix(SVMval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
SVM_CM = table("pred"=SVMval, "ref"=Testval)
SVM_UA = diag(SVM_CM) / rowSums(SVM_CM) * 100
SVM_PA = diag(SVM_CM) / colSums(SVM_CM) * 100
SVM_OA = sum(diag(SVM_CM)/sum(SVM_CM)) * 100
print(SVM_CM)
print(SVM_UA)
print(SVM_PA)
#print(SVM_OA)
SVMval <- validateMap(SVM_model$map, valData = Testshapefile, responseCol = "CLASSNUM",
                         classMapping = SVM_model$classMapping)
SVMvalResults<-SVMval$performance$overall
list(SVMvalResults)


## Mindis
Mindisval = as.factor(extract(Mindis, Testshapefile))
confusionMatrix(Mindisval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
Mindis_CM = table("pred"=Mindisval, "ref"=Testval)
Mindis_UA = diag(Mindis_CM) / rowSums(Mindis_CM) * 100
Mindis_PA = diag(Mindis_CM) / colSums(Mindis_CM) * 100
Mindis_OA = sum(diag(Mindis_CM)/sum(Mindis_CM)) * 100
print(Mindis_CM)
print(Mindis_UA)
print(Mindis_PA)
print(Mindis_OA)

## Manually calculate Kappa coeff.
kappa <- function(m) {
  N <- sum(m)
  No <- sum(diag(m))
  Ne <- 1 / N * sum(colSums(m) * rowSums(m))
  return( (No - Ne) / (N - Ne) )
}

kappacoef_RF = kappa(RF_CM)
kappacoef_monmlp = kappa(monmlp_CM)
kappacoef_SVM = kappa(SVM_CM)
kappacoef_Mindis = kappa(Mindis_CM)

print(kappacoef_RF)
print(kappacoef_monmlp)
print(kappacoef_SVM)
print(kappacoef_Mindis)

rffff <- extract(RFF, Testshapefile)
	## Modified from Babak Mohammadi's script

	## Set working folder so you don't have to type the whole address when loading file
	setwd("D:/MSc/LU/NGEN08_Satellite_RS/Exercise/3_Lunds_kommun/Module_3/Classification")
	#getwd()

	## Uncomment to install required packages below
	#install.packages("RStoolbox")
	#install.packages("monmlp")
	#install.packages("readxl") #read excel spreadsheet instead of csv
	#install.packages("dplyr") #for stratified random sample selection and splitting (train & test)

	## Load packages
	library(dplyr)
	library(sp)
	library(rgdal)
	library(RStoolbox)
	library(caret)
	library(e1071)
	library(raster)
	library(monmlp)
	library(readxl)



	###################
	### PREPARATION ###
	###################

	## Load image
	sentinel10m <- stack("Image/S2_20160502_10m_SWEREF99_.tif")
	sentinel20m <- stack("Image/S2_20160502_20m_SWEREF99.tif")
	names(sentinel10m) <- c('B2', 'B3', 'B4', 'B8')
	names(sentinel20m) <- c('B5', 'B6', 'B7', 'B8A', 'B11', 'B12')

	sentinel <- stack(sentinel10m, sentinel20m)
	reorderband <- c("B2", "B3", "B4", "B5", "B6", "B7", "B8", "B8A", "B11", "B12")
	sentinel <- sentinel[[reorderband]]
	print(sentinel)
	plotRGB(sentinel, r = "B4", g = "B3", b = "B2", scale = 100, stretch = "lin")
	#writeRaster(sentinel, "Image/sentinel.tif", format="GTiff", overwrite=TRUE)
	#hist(sentinel, main="Distribution of elevation values", col="purple", breaks=20)

	## Import Lunds Kommun adm boundary
	LK = shapefile("Adm/Lunds_kommun.shp")
	LK_extent = extent(LK)
	plot(LK, fill=NA, border="yellow", add=T)
	#myraster.crop <- crop(DEM, e, snap="out", filename="myoutput.tif", overwrite=TRUE)
	#Mince <- mask(Mince, mask = LK)

	## Import sample spreadsheet as data frame. Skip to Line 58 if sample already splitted
	sample = read_excel("Sample/LK2022fielddata.xlsx")

	## Uncomment below to merge grassland (7) and cropland (6) as one class (6)
	#sample$CLASSNUM[sample$CLASSNUM == 7] <- 6

	## Select random rows from whole field sample data frame based on class
	## Skip to Line 58 if sample already splitted outside
	set.seed(1)
	trdata <- sample %>%
	group_by("CLASSNUM") %>%
	slice_sample(prop = .60)

	## Create tsdata by selecting sample rows that are not in trdata based on ID
	tsdata = anti_join(sample, trdata, by=c("ID"="ID"))

	## Uncomment below to import already splitted sheets
	#trdata = read_excel("Sample/LK2022fielddata_SAIFUL.xlsx", sheet = "Train_data")
	#tsdata = read_excel("Sample/LK2022fielddata_SAIFUL.xlsx", sheet = "Test_data")

	## Check sample frequency of each class
	print(table(trdata$CLASSLABEL))
	print(table(tsdata$CLASSLABEL))

	## Convert dataframe of trdata & tsdata and export as point shapefiles
	crs.geo1 = CRS("+proj=utm +zone=33 +ellps=GRS80 +units=m +no_defs")

	coordinates(trdata) = ~ SWEREF99TMEAST+SWEREF99TMNORTH
	proj4string(trdata) = crs.geo1
	plot(trdata, pch = 20, col = "yellow", add=T)
	writeOGR(trdata, "Sample/Train.shp", "filename", driver = "ESRI Shapefile")

	coordinates(tsdata) = ~ SWEREF99TMEAST+SWEREF99TMNORTH
	proj4string(tsdata) = crs.geo1
	plot(tsdata, pch = 20, col = "red", add=T)
	writeOGR(tsdata, "Sample/Test.shp", "filename", driver = "ESRI Shapefile")

	## Import back exported point shapefiles
	Trainshapefile = shapefile("Sample/Train.shp")
	Testshapefile = shapefile("Sample/Test.shp")
	proj4string(Trainshapefile) = crs.geo1
	proj4string(Testshapefile) = crs.geo1



	##########################
	### SPECTRAL SIGNATURE ###
	##########################

	## Extract value across bands at pixels correspond to training sample points
	df <- extract(sentinel, Trainshapefile)
	df <- round(df)

	## Group values based on class
	ms <- matrix(NA, nrow = length(unique(Trainshapefile$CLASSNUM)), ncol = nlayers(sentinel))
	for (i in unique(Trainshapefile$CLASSNUM)){
	x <- df[Trainshapefile$CLASSNUM==i,]
	ms[i,] <- colMeans(x)
	}
	print(ms)

	## Specify plot settings
	rownames(ms) <- c('Urban', 'Water', 'Bare ground', 'Deciduous forest', 'Coniferous forest', 'Cropland', 'Grassland')
	colnames(ms) <- names(sentinel)
	mycolor <- c('red', 'blue', 'burlywood', 'lightgreen', 'darkgreen', 'yellow', 'brown')

	dev.off(dev.list()["RStudioGD"])

	plot(1, ylim=c(300, 4000), xlim=c(1,10), xlab="Bands", ylab="Reflectance (x10000)", xaxt='n')
	axis(1, at=1:10, lab=colnames(ms))
	for (i in 1:nrow(ms)){
	lines(ms[i,], type="o", lwd=3, lty=1, col=mycolor[i])
	}
	title(main="Spectral Profile", font.main=2)
	legend("topleft", rownames(ms), cex=0.8, col=mycolor, lty=1,lwd=3, bty="n")



	######################
	### cLASSIFICATION ###
	######################

	proj4string(sentinel) = crs.geo1

	## Based on the spectral signatures, should you use all bands? Are they provide significant difference?
	## Subset if not. More bands -> more processing time
	names(sentinel)
	selection = c('B2','B3','B4','B8')
	sentinel = subset(sentinel, selection) #Re-run Line 34 to recreate full Sentinel bands
	print(sentinel)

	## Should the class merged?
	Trainshapefile$CLASSNUM[Trainshapefile$CLASSNUM == 7] <- 6 #Rerun line 79 to reset
	Testshapefile$CLASSNUM[Testshapefile$CLASSNUM == 7] <- 6

	## Running machine learning classification (change model to = "rf", "monmlp", "svmRadial")
	RF_model= superClass(sentinel , trainData=Trainshapefile, responseCol = "CLASSNUM",
	model = "rf",mode = "classification", predict = TRUE) #trainPartition=0.6

	monmlp_model= superClass(sentinel , trainData=Trainshapefile, responseCol = "CLASSNUM",
	model = "monmlp",mode = "classification", predict = TRUE)

	SVM_model= superClass(sentinel , trainData=Trainshapefile, responseCol = "CLASSNUM",
	model = "svmRadial",mode = "classification", predict = TRUE)

	plot(RF_model$map, main="Random Forest")
	plot(monmlp_model$map, main="Multilayer Perceptron")
	plot(SVM_model$map, main="Support Vector Machine")

	## Export classified raster files
	writeRaster(RF_model$map, "ClassOut/RF4_7.tif", format="GTiff", overwrite=TRUE)
	writeRaster(monmlp_model$map, "ClassOut/monmlp10_6.tif", format="GTiff", overwrite=TRUE)
	writeRaster(SVM_model$map, "ClassOut/SVM10_6.tif", format="GTiff", overwrite=TRUE)

	## Import Minimum Distance classification resulted from other software
	Mindis = stack("SNAP/Mindis_10b_6c.tif")
	Mindis = Mindis$Mindis_10b_6c.1
	Mindis=Mindis+1 #Adjust pixel value to match CLASSNUM
	plot(Mindis, main="Minimum Distance")
	#Mindis[Mindis<1] <- NA



	###########################
	### ACCURACY ASSESSMENT ###
	###########################

	## Set validation samples againts LC class at corresponding pixel for creating confusion matrix
	Testval = as.factor(Testshapefile$CLASSNUM)

	## RF
	RFval = as.factor(extract(RF_model$map, Testshapefile))
	#Auto confmat
	confusionMatrix(RFval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
	#Manual confmat, works when prediction & reference's level matches
	RF_CM = table("pred"=RFval, "ref"=Testval)
	RF_UA = diag(RF_CM) / rowSums(RF_CM) * 100
	RF_PA = diag(RF_CM) / colSums(RF_CM) * 100
	RF_OA = sum(diag(RF_CM)/sum(RF_CM)) * 100
	print(RF_CM)
	print(RF_UA)
	print(RF_PA)
	print(RF_OA)

	## Multi-layer perceptron
	monmlpval = as.factor(extract(monmlp_model$map, Testshapefile))
	confusionMatrix(monmlpval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
	monmlp_CM = table("pred"=monmlpvalcm, "ref"=Testval)
	#monmlp_UA = diag(monmlp_CM) / rowSums(monmlp_CM) * 100
	#monmlp_PA = diag(monmlp_CM) / colSums(monmlp_CM) * 100
	#monmlp_OA = sum(diag(monmlp_CM)/sum(monmlp_CM)) * 100
	print(monmlp_CM)
	#print(monmlp_UA)
	#print(monmlp_PA)
	#print(monmlp_OA)
	#In-case level does not match, use below
	monmlpval <- validateMap(monmlp_model$map, valData = Testshapefile, responseCol = "CLASSNUM",
	classMapping = monmlp_model$classMapping)
	monmlpvalResults<-monmlpval$performance$overall
	list(monmlpvalResults)


	## SVM
	SVMval = as.factor(extract(SVM_model$map, Testshapefile))
	confusionMatrix(SVMval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
	SVM_CM = table("pred"=SVMval, "ref"=Testval)
	SVM_UA = diag(SVM_CM) / rowSums(SVM_CM) * 100
	SVM_PA = diag(SVM_CM) / colSums(SVM_CM) * 100
	SVM_OA = sum(diag(SVM_CM)/sum(SVM_CM)) * 100
	print(SVM_CM)
	print(SVM_UA)
	print(SVM_PA)
	#print(SVM_OA)
	SVMval <- validateMap(SVM_model$map, valData = Testshapefile, responseCol = "CLASSNUM",
	classMapping = SVM_model$classMapping)
	SVMvalResults<-SVMval$performance$overall
	list(SVMvalResults)


	## Mindis
	Mindisval = as.factor(extract(Mindis, Testshapefile))
	confusionMatrix(Mindisval, Testval, positive=NULL, dnn= c("Prediction", "Reference"))
	Mindis_CM = table("pred"=Mindisval, "ref"=Testval)
	Mindis_UA = diag(Mindis_CM) / rowSums(Mindis_CM) * 100
	Mindis_PA = diag(Mindis_CM) / colSums(Mindis_CM) * 100
	Mindis_OA = sum(diag(Mindis_CM)/sum(Mindis_CM)) * 100
	print(Mindis_CM)
	print(Mindis_UA)
	print(Mindis_PA)
	print(Mindis_OA)

	## Manually calculate Kappa coeff.
	kappa <- function(m) {
	N <- sum(m)
	No <- sum(diag(m))
	Ne <- 1 / N * sum(colSums(m) * rowSums(m))
	return( (No - Ne) / (N - Ne) )
	}

	kappacoef_RF = kappa(RF_CM)
	kappacoef_monmlp = kappa(monmlp_CM)
	kappacoef_SVM = kappa(SVM_CM)
	kappacoef_Mindis = kappa(Mindis_CM)

	print(kappacoef_RF)
	print(kappacoef_monmlp)
	print(kappacoef_SVM)
	print(kappacoef_Mindis)

	rffff <- extract(RFF, Testshapefile)