Skip to content

Instantly share code, notes, and snippets.

@jbants
Created March 25, 2018 22:44
Show Gist options
  • Save jbants/a92a139984505b6cdce39f137d1ee668 to your computer and use it in GitHub Desktop.
Save jbants/a92a139984505b6cdce39f137d1ee668 to your computer and use it in GitHub Desktop.
Remote Sensing Image Classification with Python and Scikit-Learn
# ----------------------------------------------------------------------
# Remote Sensing Image Classification Workflow for Landsat data with soft
# voting on a SVM and Gradient Boosting classifier. Outlier in the
# training data are flagged through an Isolation Forest algorithm.
# Feature Selection is done by a Recursive Feature Elimination method.
# The results are classification and classification probability raster
# images in TIF format.
#
# Written by Dimo Dimov, MapTailor, 2017
# ----------------------------------------------------------------------
# Prerequisites: Installation of Numpy, Scipy, Scikit-Image, Scikit-Learn
import skimage.io as io
import numpy as np
from sklearn.feature_selection import RFE
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest
from sklearn.externals import joblib
from sklearn.model_selection import train_test_split
import numpy.ma as ma
import os, shutil
def Classification():
raster = path_to_tiff + ".tif"
samples = path_to_tiff + "_samples.tif"
tfw_old = path_to_tiff + ".tfw"
classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif"
# read Landsat data as TIF
img_ds = io.imread(raster)
img = np.array(img_ds, dtype='uint16')
# read training samples as TIF with same dimensions as the Landsat image
roi_ds = io.imread(samples)
roi = np.array(roi_ds, dtype='uint8')
labels = np.unique(roi[roi > 0])
print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels))
X = img[roi > 0, :]
Y = roi[roi > 0]
# splitting of training & test data in 80% - 20% for outlier analysis
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None)
iso = IsolationForest(n_estimators = 500,
max_samples = 1.0,
contamination = 0.001,
max_features = 1.0).fit(X_train)
# Outliers are flagged and labeled as "-1"
X_train_outlier = iso.predict(X_train)
mask = ma.masked_equal(X_train_outlier, -1)
Y_mask = mask * Y_train
# further splitting of new training data, cleaned from outliers in 80% - 20%
X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None)
gbt = GradientBoostingClassifier(n_estimators = 100,
min_samples_leaf = 1,
min_samples_split = 10,
max_depth = 10,
max_features = 'sqrt',
learning_rate = 0.1,
subsample = 0.8,
random_state = None,
warm_start = True).fit(X_train_new, Y_train_new)
svm = SVC(C = 1.0,
kernel = 'rbf',
gamma = 1.0,
tol = 0.005,
probability = True,
class_weight = 'balanced',
cache_size = 8000,
decision_function_shape = 'ovr',
max_iter = 1000).fit(X_train_new, Y_train_new)
# Voting classifier for Gradient Boosting and SVM
clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train)
# Feature Importances of the Gradient Boosting classifier
print gbt.feature_importances_
# Feature Selection method, e.g. 10 features/bands
selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new)
print selector.score(X_test_new, Y_test_new), raster
# save the classification model
model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl"
joblib.dump(selector, model)
# call the classification model
clf = joblib.load(model)
# reshaping of the array with 10 features/bands
new_arr = (img.shape[0] * img.shape[1], img.shape[2])
new_img = img[:, :, :10].reshape(new_arr)
row = img.shape[0]
col = img.shape[1]
# calculating classification probability, e.g. in this case with 7 classes
class_estimation = clf.predict_proba(new_img)
for n in range(0,7):
idx = str(n + 1)
class_prob = prob[:, n]
class_prob = class_prob.reshape((row, col))
probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif"
io.imsave(probability, class_prob)
tfw_new = probability.split(".tif")[0] + ".tfw"
shutil.copy(tfw_old, tfw_new)
# saving the classification results
class_prediction = clf.predict(new_img)
class_prediction = class_prediction.reshape(img[:, :, 0].shape)
io.imsave(classification, class_prediction)
tfw_new = classification.split(".tif")[0] + ".tfw"
shutil.copy(tfw_old, tfw_new)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment