jbants/RemoteSensingImageClassification.py

## RemoteSensingImageClassification.py
# ----------------------------------------------------------------------
# Remote Sensing Image Classification Workflow for Landsat data with soft
# voting on a SVM and Gradient Boosting classifier. Outlier in the
# training data are flagged through an Isolation Forest algorithm.
# Feature Selection is done by a Recursive Feature Elimination method.
# The results are classification and classification probability raster
# images in TIF format.
#
# Written by Dimo Dimov, MapTailor, 2017
# ----------------------------------------------------------------------
# Prerequisites: Installation of Numpy, Scipy, Scikit-Image, Scikit-Learn

import skimage.io as io
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
from sklearn.externals import joblib
from sklearn.model_selection  import train_test_split
import numpy.ma as ma
import os, shutil

def Classification():
	raster = path_to_tiff + ".tif"
	samples = path_to_tiff + "_samples.tif"
	tfw_old = path_to_tiff + ".tfw"

	classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif"

	# read Landsat data as TIF
	img_ds = io.imread(raster)
	img = np.array(img_ds, dtype='uint16')

	# read training samples as TIF with same dimensions as the Landsat image
	roi_ds = io.imread(samples)
	roi = np.array(roi_ds, dtype='uint8')

	labels = np.unique(roi[roi > 0])
	print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels))

	X = img[roi > 0, :]
	Y = roi[roi > 0]

	# splitting of training & test data in 80% - 20% for outlier analysis
	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None)
	iso = IsolationForest(n_estimators = 500,
				max_samples = 1.0,
				contamination = 0.001,
				max_features = 1.0).fit(X_train)

	# Outliers are flagged and labeled as "-1"

	X_train_outlier = iso.predict(X_train)
	mask = ma.masked_equal(X_train_outlier, -1)
	Y_mask = mask * Y_train

	# further splitting of new training data, cleaned from outliers in 80% - 20%
	X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None)

	gbt = GradientBoostingClassifier(n_estimators = 100,
					min_samples_leaf = 1,
					min_samples_split = 10,
					max_depth = 10,
					max_features = 'sqrt',
					learning_rate = 0.1,
					subsample = 0.8,
					random_state = None,
					warm_start = True).fit(X_train_new, Y_train_new)

	svm = SVC(C = 1.0,
		kernel = 'rbf',
		gamma = 1.0,
		tol = 0.005,
		probability = True,
		class_weight = 'balanced',
		cache_size = 8000,
		decision_function_shape = 'ovr',
		max_iter = 1000).fit(X_train_new, Y_train_new)

	# Voting classifier for Gradient Boosting and SVM
	clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train)

	# Feature Importances of the Gradient Boosting classifier
	print gbt.feature_importances_

	# Feature Selection method, e.g. 10 features/bands
	selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new)
	print selector.score(X_test_new, Y_test_new), raster

	# save the classification model
	model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl"
	joblib.dump(selector, model)

	# call the classification model
	clf = joblib.load(model)

	# reshaping of the array with 10 features/bands
	new_arr = (img.shape[0] * img.shape[1], img.shape[2])
	new_img = img[:, :, :10].reshape(new_arr)

	row = img.shape[0]
	col = img.shape[1]

	# calculating classification probability, e.g. in this case with 7 classes
	class_estimation = clf.predict_proba(new_img)

	for n in range(0,7):
		idx = str(n + 1)
		class_prob = prob[:, n]
		class_prob = class_prob.reshape((row, col))

		probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif"
		io.imsave(probability, class_prob)

		tfw_new = probability.split(".tif")[0] + ".tfw"
		shutil.copy(tfw_old, tfw_new)

	# saving the classification results
	class_prediction = clf.predict(new_img)
	class_prediction = class_prediction.reshape(img[:, :, 0].shape)

	io.imsave(classification, class_prediction)

	tfw_new = classification.split(".tif")[0] + ".tfw"
	shutil.copy(tfw_old, tfw_new)
	# ----------------------------------------------------------------------
	# Remote Sensing Image Classification Workflow for Landsat data with soft
	# voting on a SVM and Gradient Boosting classifier. Outlier in the
	# training data are flagged through an Isolation Forest algorithm.
	# Feature Selection is done by a Recursive Feature Elimination method.
	# The results are classification and classification probability raster
	# images in TIF format.
	#
	# Written by Dimo Dimov, MapTailor, 2017
	# ----------------------------------------------------------------------
	# Prerequisites: Installation of Numpy, Scipy, Scikit-Image, Scikit-Learn

	import skimage.io as io
	import numpy as np
	from sklearn.feature_selection import RFE
	from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
	from sklearn.externals import joblib
	from sklearn.model_selection import train_test_split
	import numpy.ma as ma
	import os, shutil

	def Classification():
	raster = path_to_tiff + ".tif"
	samples = path_to_tiff + "_samples.tif"
	tfw_old = path_to_tiff + ".tfw"

	classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif"

	# read Landsat data as TIF
	img_ds = io.imread(raster)
	img = np.array(img_ds, dtype='uint16')

	# read training samples as TIF with same dimensions as the Landsat image
	roi_ds = io.imread(samples)
	roi = np.array(roi_ds, dtype='uint8')

	labels = np.unique(roi[roi > 0])
	print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels))

	X = img[roi > 0, :]
	Y = roi[roi > 0]

	# splitting of training & test data in 80% - 20% for outlier analysis
	X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None)
	iso = IsolationForest(n_estimators = 500,
	max_samples = 1.0,
	contamination = 0.001,
	max_features = 1.0).fit(X_train)

	# Outliers are flagged and labeled as "-1"

	X_train_outlier = iso.predict(X_train)
	mask = ma.masked_equal(X_train_outlier, -1)
	Y_mask = mask * Y_train

	# further splitting of new training data, cleaned from outliers in 80% - 20%
	X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None)

	gbt = GradientBoostingClassifier(n_estimators = 100,
	min_samples_leaf = 1,
	min_samples_split = 10,
	max_depth = 10,
	max_features = 'sqrt',
	learning_rate = 0.1,
	subsample = 0.8,
	random_state = None,
	warm_start = True).fit(X_train_new, Y_train_new)

	svm = SVC(C = 1.0,
	kernel = 'rbf',
	gamma = 1.0,
	tol = 0.005,
	probability = True,
	class_weight = 'balanced',
	cache_size = 8000,
	decision_function_shape = 'ovr',
	max_iter = 1000).fit(X_train_new, Y_train_new)

	# Voting classifier for Gradient Boosting and SVM
	clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train)

	# Feature Importances of the Gradient Boosting classifier
	print gbt.feature_importances_

	# Feature Selection method, e.g. 10 features/bands
	selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new)
	print selector.score(X_test_new, Y_test_new), raster

	# save the classification model
	model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl"
	joblib.dump(selector, model)

	# call the classification model
	clf = joblib.load(model)

	# reshaping of the array with 10 features/bands
	new_arr = (img.shape[0] * img.shape[1], img.shape[2])
	new_img = img[:, :, :10].reshape(new_arr)

	row = img.shape[0]
	col = img.shape[1]

	# calculating classification probability, e.g. in this case with 7 classes
	class_estimation = clf.predict_proba(new_img)

	for n in range(0,7):
	idx = str(n + 1)
	class_prob = prob[:, n]
	class_prob = class_prob.reshape((row, col))

	probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif"
	io.imsave(probability, class_prob)

	tfw_new = probability.split(".tif")[0] + ".tfw"
	shutil.copy(tfw_old, tfw_new)

	# saving the classification results
	class_prediction = clf.predict(new_img)
	class_prediction = class_prediction.reshape(img[:, :, 0].shape)

	io.imsave(classification, class_prediction)

	tfw_new = classification.split(".tif")[0] + ".tfw"
	shutil.copy(tfw_old, tfw_new)