Created
March 25, 2018 22:44
-
-
Save jbants/a92a139984505b6cdce39f137d1ee668 to your computer and use it in GitHub Desktop.
Remote Sensing Image Classification with Python and Scikit-Learn
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ---------------------------------------------------------------------- | |
# Remote Sensing Image Classification Workflow for Landsat data with soft | |
# voting on a SVM and Gradient Boosting classifier. Outlier in the | |
# training data are flagged through an Isolation Forest algorithm. | |
# Feature Selection is done by a Recursive Feature Elimination method. | |
# The results are classification and classification probability raster | |
# images in TIF format. | |
# | |
# Written by Dimo Dimov, MapTailor, 2017 | |
# ---------------------------------------------------------------------- | |
# Prerequisites: Installation of Numpy, Scipy, Scikit-Image, Scikit-Learn | |
import skimage.io as io | |
import numpy as np | |
from sklearn.feature_selection import RFE | |
from sklearn.ensemble import GradientBoostingClassifier, IsolationForest | |
from sklearn.externals import joblib | |
from sklearn.model_selection import train_test_split | |
import numpy.ma as ma | |
import os, shutil | |
def Classification(): | |
raster = path_to_tiff + ".tif" | |
samples = path_to_tiff + "_samples.tif" | |
tfw_old = path_to_tiff + ".tfw" | |
classification = rootdir + "Classification\\" + pathrow + "_" + year + ".tif" | |
# read Landsat data as TIF | |
img_ds = io.imread(raster) | |
img = np.array(img_ds, dtype='uint16') | |
# read training samples as TIF with same dimensions as the Landsat image | |
roi_ds = io.imread(samples) | |
roi = np.array(roi_ds, dtype='uint8') | |
labels = np.unique(roi[roi > 0]) | |
print('The training data include {n} classes: {classes}'.format(n=labels.size, classes=labels)) | |
X = img[roi > 0, :] | |
Y = roi[roi > 0] | |
# splitting of training & test data in 80% - 20% for outlier analysis | |
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, train_size=0.8, test_size=0.2, random_state=None) | |
iso = IsolationForest(n_estimators = 500, | |
max_samples = 1.0, | |
contamination = 0.001, | |
max_features = 1.0).fit(X_train) | |
# Outliers are flagged and labeled as "-1" | |
X_train_outlier = iso.predict(X_train) | |
mask = ma.masked_equal(X_train_outlier, -1) | |
Y_mask = mask * Y_train | |
# further splitting of new training data, cleaned from outliers in 80% - 20% | |
X_train_new, X_test_new, Y_train_new, Y_test_new = train_test_split(X_train, Y_mask, train_size=0.8, test_size=0.2, random_state=None) | |
gbt = GradientBoostingClassifier(n_estimators = 100, | |
min_samples_leaf = 1, | |
min_samples_split = 10, | |
max_depth = 10, | |
max_features = 'sqrt', | |
learning_rate = 0.1, | |
subsample = 0.8, | |
random_state = None, | |
warm_start = True).fit(X_train_new, Y_train_new) | |
svm = SVC(C = 1.0, | |
kernel = 'rbf', | |
gamma = 1.0, | |
tol = 0.005, | |
probability = True, | |
class_weight = 'balanced', | |
cache_size = 8000, | |
decision_function_shape = 'ovr', | |
max_iter = 1000).fit(X_train_new, Y_train_new) | |
# Voting classifier for Gradient Boosting and SVM | |
clf = VotingClassifier(estimators=[('gbt', gbt), ('svm', svm)], voting = 'soft', weights = [1,1]).fit(X_train, Y_train) | |
# Feature Importances of the Gradient Boosting classifier | |
print gbt.feature_importances_ | |
# Feature Selection method, e.g. 10 features/bands | |
selector = RFE(clf, 10, step=1, verbose=0).fit(X_train_new, Y_train_new) | |
print selector.score(X_test_new, Y_test_new), raster | |
# save the classification model | |
model = rootdir + "MODEL\\" + pathrow + "_" + year + ".pkl" | |
joblib.dump(selector, model) | |
# call the classification model | |
clf = joblib.load(model) | |
# reshaping of the array with 10 features/bands | |
new_arr = (img.shape[0] * img.shape[1], img.shape[2]) | |
new_img = img[:, :, :10].reshape(new_arr) | |
row = img.shape[0] | |
col = img.shape[1] | |
# calculating classification probability, e.g. in this case with 7 classes | |
class_estimation = clf.predict_proba(new_img) | |
for n in range(0,7): | |
idx = str(n + 1) | |
class_prob = prob[:, n] | |
class_prob = class_prob.reshape((row, col)) | |
probability = rootdir + "Probability\\" + pathrow + "_" + year + "_" + idx + ".tif" | |
io.imsave(probability, class_prob) | |
tfw_new = probability.split(".tif")[0] + ".tfw" | |
shutil.copy(tfw_old, tfw_new) | |
# saving the classification results | |
class_prediction = clf.predict(new_img) | |
class_prediction = class_prediction.reshape(img[:, :, 0].shape) | |
io.imsave(classification, class_prediction) | |
tfw_new = classification.split(".tif")[0] + ".tfw" | |
shutil.copy(tfw_old, tfw_new) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment