Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
# python input.jpeg output.tif
# n-band image -> PCA -> n-band TIFF image
# with lots of hackety assumptions
# (e.g., output is same type as input)
from sys import argv
import rasterio as rio
import numpy as np
from sklearn import decomposition
with[1]) as src:
meta = src.meta
pixels =
# pixels[pixels == meta['nodata']] = 0.0
pixels = pixels.astype(np.float32)
#pixels = np.nan_to_num(pixels)
#pixels[np.isinf(pixels)] = 0.0
dtype = meta["dtype"]
count = meta["count"]
def normalize(n):
m = np.nanmean(n)
s = np.nanstd(n)
return (n - m) / s
# Todo: make this cleaner:
pixels = np.dstack([normalize(c).ravel() for c in pixels])[0]
pca = decomposition.PCA(
n_components=count, whiten=True
) # The whitening is redundant, right?
for band in range(len(pca.components_)):
weights = ", ".join(["{0:.4g}".format(x) for x in pca.components_[band]])
bidx = band + 1
var = pca.explained_variance_ratio_[band]
print(f"Band {bidx} will hold {var:.4g} of variance with weights:\n{weights}")
out = pca.transform(pixels)
# This is the messy reverse of the messy ravel above:
xyz = np.array([out[:, c].reshape(meta["height"], meta["width"]) for c in range(count)])
if dtype == 'uint8':
dtype = np.uint16
typemax = np.finfo(dtype).max
typemax = np.iinfo(dtype).max
# Scale each band separately to fill out the data type.
# (You either really want this or really don't want this.)
xyz = np.array([((c - np.amin(c)) / (np.amax(c) - np.amin(c))) * typemax for c in xyz])
xyz = xyz.astype(dtype)
# 'transform': meta['affine'],
"driver": "GTiff",
"photometric": "none",
"dtype": dtype,
with[2], "w", **meta) as dst:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.