Last active January 7, 2022 01:31
Random Affine Crop in the style of Albumentations for a Rasterio Dataset with minimal dependencies
# Utility functions for managing 3x3 matrices for cv2.warpAffine in pure numpy
import numpy as np
def identity():
return np.eye(3, dtype=np.float64)
def affine(A=None, t=None):
aff = identity()
if A is not None:
aff[0:2, 0:2] = A
if t is not None:
aff[0:2, 2] = t
return aff
def rotate(theta):
'''Rotate counter-clockwise.'''
return affine(A=[[ np.cos(theta), np.sin(theta)],
[-np.sin(theta), np.cos(theta)]])
def rotate_around(rx, ry, theta):
Rotate counter-clockwise around (rx, ry)
Rotating around (0, 0) in opencv is actually rotating around the centre
of the first pixel.
return concatenate([
translate(-rx, -ry),
translate(rx, ry)
def scale(sx, sy=None):
if sy is None: sy = sx
return affine(A=[[sx, 0],
[ 0, sy]])
def scale_around(srx, sry, sx, sy=None):
''' Scale around a different registration point '''
return concatenate([
translate(-srx, -sry),
scale(sx, sy),
translate(srx, sry),
def translate(tx, ty):
return affine(t=[tx, ty])
def concatenate(matrices):
matrix = identity()
for m in reversed(matrices):
matrix = matrix @ m
return matrix
def homogeneous(coords):
Takes 2D coords [N, ..., {x, y}] to homogeneous coords [N, ..., {x, y, 1}]
ones = np.ones((*coords.shape[:-1], 1))
return np.concatenate([coords, ones], axis=-1)
def unhomogeneous(coords):
''' Inverse of homogeneous() '''
# Normalise
coords_norm = coords/coords[..., 2:]
return coords_norm[..., :2]
def transform(mat, coords):
''' Batch transform `coords` shapend [..., 2]. '''
coords_np = homogeneous(np.asarray(coords))
return unhomogeneous(coords_np @ mat.T)
import decimal
import math
import cv2
import numpy as np
import albumentations as A
import spatial
import mat3
def safe_int(v, atol=7):
Simply casting to int truncates 0.999999999999 to 0.
This accounts for floating point imprecisions.
return int(round(decimal.Decimal(v), atol))
class RandomAffineCrop(A.DualTransform):
Applies the following operations in order:
This is represented as a params dictionary:
'translate': (dx, dy),
'scale': (srx, sry, scale),
'rotate': (rx, ry, angle)
- dx, dy are normalised to IMAGE dimensions
- srx, sry are normalised to CROP dimensions
- rx, ry are normalised to CROP dimensions
- angle is in radians
You can generate a dictionary like this with random values by using
self.get_params*() functions
Then theoretically `apply()` crops the resulting image at crop_size.
In reality, it only loads the minimum image data from disk.
You may provide the random distribution for each operation as a callable
that takes no parameters.
(in the below descriptions `f` refers to `float`)
crop_size : int or (int, int)
Size of crop in pixels
translate_dist : (f, f) or callable, opt
if 2-tuple, interpreted as uniform range for both dimensions
if callable, must return (dx, dy)
scale_dist : f or (f, f, f) or callable, opt
if float, interpreted as symmetric range limit.
e.g. scale_dist=2 means, select scale between 0.5 and 2, with equal
chance to spatial <1 as to spatial >1
The scale registration point defaults to crop centre
if 3-tuple, interpreted as (srx, sry, symmetric range limit)
e.g. (0.5, 0.5, 2)
(both above examples perform the same sampling)
if callable, must return (srx, sry, scale)
rotate_dist : (f, f) or (f, f, (f,f)) or callable, opt
if 2-tuple interpreted as angle (radians) range.
e.g. (-math.pi/2, math.pi/2)
the rotation centre defaults to the centre of the crop
if 3-tuple interpreted as (rx, ry, angle (radians) range)
e.g. (0.5, 0.5, (-math.pi/2, math.pi/2))
(both above examples perform the same sampling)
if callable, must return (rx, ry, angle)
def __init__(
always_apply: bool = False, p: float = 0.5
super().__init__(always_apply, p)
if isinstance(crop_size, int):
crop_size = (crop_size, crop_size)
self.crop_size = crop_size
self.translate_dist = spatial.translate_sampler_fnc(translate)
self.scale_dist = spatial.scale_sampler_fnc(scale)
self.rotate_dist = spatial.rotate_sampler_fnc(rotate)
def get_params(self):
Gets a set of random affine parameters sampled from the distributions
this object was initialised with.
return {
'translate': self.translate_dist(),
'scale': self.scale_dist(),
'rotate': self.rotate_dist(),
def get_params_safe(self, image_size):
Gets a set of params sampled uniformly from provided ranges, while
ensuring crop is within bounds of the image
# TODO: How can you make this safe, efficiently?
# Central square can be sampled normally.
# Triangles on the sides: trigonometry to determine how much are
# valid crop locations
# Select between by area
# In practice, for large images, this is very unlikely to occur more than once
for i in range(100):
params = self.get_params()
M = self.get_pixel_transform(image_size, **params)
if self.check_sample_inside(M, image_size):
return params
raise Exception('Could\'t spatial params for crop within image. Check image and crop sizes')
def check_sample_inside(self, M, image_size):
''' Returns true if this will spatial wholly within the image '''
iw, ih = image_size
xlo, ylo, xhi, yhi = self.get_crop_bounds(M)
return xlo >= 0 and ylo >= 0 and xhi < iw and yhi < ih
def get_pixel_transform(self, img_size, translate=(0, 0), scale=(0, 0, 1), rotate=(0, 0, 0)):
return spatial.get_pixel_transform(self.crop_size, translate, scale, rotate, img_size)
def get_crop_pixel_transform(self, M):
return spatial.get_crop_pixel_transform(M, self.crop_size)
def get_crop_bounds(self, M):
return spatial.get_crop_bounds(M, self.crop_size)
def fetch(self, img, interpolate, M=None, **params):
img_size = img.shape[1::-1]
if M is None:
M = self.get_pixel_transform(img_size, **params)
return spatial.fetch(img, M, self.crop_size, interpolate)
def apply(self, img, rows=None, cols=None, **params):
return self.fetch(img, interpolate=cv2.INTER_LINEAR, **params)
def apply_to_mask(self, img, rows=None, cols=None, **params):
return self.fetch(img, interpolate=cv2.INTER_NEAREST, **params)
def apply_to_bbox(self, bbox, rows, cols, M=None, **params):
Applies the transform to the bbox.
Note: takes axis-aligned coordinates, returns axis-aligned coordinates.
Thus, if there's a rotation, the final axis-aligned corners are the
minimum axis-aligned box to cover the rotated corners and the area
gets larger.
i.e. rotating 45 degrees and then rotating -45 degrees won't give
you the same bbox.
if M is None:
M = self.get_pixel_transform((cols, rows), **params)
xlo, ylo, xhi, yhi = bbox
# Translate to pixel values
pxlo = xlo * cols
pylo = ylo * rows
pxhi = xhi * cols
pyhi = yhi * rows
# Apply transform and re-align corners with axis
initial_corners = [(pxlo, pylo), (pxhi, pylo), (pxlo, pyhi), (pxhi, pyhi)]
corners = mat3.transform(M, initial_corners)
npxlo, npylo = corners.min(axis=0)
npxhi, npyhi = corners.max(axis=0)
# Normalise to crop size
nxlo = npxlo / self.crop_size[0]
nylo = npylo / self.crop_size[1]
nxhi = npxhi / self.crop_size[0]
nyhi = npyhi / self.crop_size[1]
return nxlo, nylo, nxhi, nyhi
def apply_to_keypoint(self, keypoint, rows, cols, M=None, **params):
Keypoints can actually be a vector; they start somewhere and point somewhere else.
if M is None:
M = self.get_pixel_transform((cols, rows), **params)
# Get point and vector
x, y, angle, scale = keypoint
vx = math.cos(angle)*scale
vy = -math.sin(angle)*scale
# Transform point and vector in pixel space
points = [[x, y],[x+vx, y+vy]]
# New x,y denoted nx,ny.
(nx, ny), (nevx, nevy) = mat3.transform(M, points)
# Vector is relative to nx, ny
nvx = nevx-nx
nvy = nevy-ny
nscale = math.sqrt(nvx**2 + nvy**2)
nangle = math.atan2(nvy, nvx)
# Albumentations follows the convention that a positive rotation
# is counter-clockwise for the angle of the keypoint
nangle = -nangle
# Return vector in polar form
# To match with `albumentations.Rotate`, we take the int part.
return safe_int(nx), safe_int(ny), nangle, nscale
Minimal usage example:

import rasterio

from random_affine_crop import RandomAffineCrop

rac = RandomAffineCrop(crop_size=(224, 224))
tif ='some.tif')
crop = rac.apply(tif, **rac.get_params())

Minimal functional usage example:

import rasterio

import spatial

crop_size = (224, 224)
tif ='some.tif')
sampler = spatial.affine_sampler_fnc()
M = spatial.get_pixel_transform(crop_size, img_size=tif.shape, **sampler())
crop = spatial.fetch(tif, M, crop_size)

Note has no dependency on Albumentations.

