jcupitt/smartcrop.py

## smartcrop.py
#!/usr/bin/python

# smartcrop with libvips, based very roughly on
# https://github.com/jwagner/smartcrop.js

import sys

import gi
gi.require_version('Vips', '8.0')
from gi.repository import Vips

image = Vips.Image.new_from_file(sys.argv[1])

# options

# step crops by this much ... we downsample the feature images by this as well,
# so larger values make the search much quicker
step = 8

# write an image to a memory buffer ... can give a speedup if you know an
# image is small and will be reused a lot
def memoryise(image):
    new_image = Vips.Image.new_memory()
    image.write(new_image)
    return new_image

# sobel filter ... use this to find edges
sobel_hmask = Vips.Image.new_from_array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
sobel_vmask = sobel_hmask.rot90()
def sobel_filter(image):
    return image.conv(sobel_hmask).abs() + image.conv(sobel_vmask).abs()

# areas of skin colour ... threshold in LCh
def skin_filter(image):
    lch = image.colourspace(Vips.Interpretation.LCH)
    return ((lch > [5, 5, 0]) & (lch < [80, 30, 80])).bandand()

# areas high in saturation
def saturation_filter(image):
    c = image.colourspace(Vips.Interpretation.LCH)[1]
    return c > 30

# feature-detect with an M x N box ... we look for a feature in the centre and
# no feature at the edges, ie. -1, 1, -1, in thirds.
def feature_detect(image, M, N):
    print 'feature-detect M =', M, ', N =', N

    third = int(M / 3)
    mask = third * [-0.1] + third * [1] + third * [-0.1]
    hmask = Vips.Image.new_from_array(mask, scale = third)

    third = int(N / 3)
    mask = third * [-0.1] + third * [1] + third * [-0.1]
    vmask = Vips.Image.new_from_array(mask, scale = third).rot90()

    return image.conv(hmask) + image.conv(vmask)

# make three base feature images
# downsample by step, the resolution we search for crops at
tiny = memoryise(image.shrink(step, step))
edges = sobel_filter(tiny[1])
skin = skin_filter(tiny)
saturation = saturation_filter(tiny)

# weight the three factors to make a score image
score = edges + 0.5 * skin + 0.5 * saturation

# put a 1 pixel black line around the score image ... we are going to blur at
# various scales and we don't want high edge values to be extended
score = score.embed(1, 1, score.width + 2, score.height + 2, extend = "black")

score = memoryise(score)

score.write_to_file("score.v")
edges.write_to_file("edges.v")
skin.write_to_file("skin.v")
saturation.write_to_file("saturation.v")

# now feature-detect the score image at various scales and search for
# maxima ... they will give the position of the centre of the crops at
# that scale

# default to a square crop
crop_width = min(image.width, image.height)
crop_height = min(image.width, image.height)

best_score = 0

# search from small crops to large ones .. from 1/2 minimum dimension up
for scale in range(5, 11):
    width = (crop_width * scale) / 10
    height = (crop_height * scale) / 10

    # search for the brightest rectangle
    features = feature_detect(score, width / step, height / step)
    v, x, y = features.maxpos()

    print 'width =', width, 'height =', height
    print 'x =', x * step, 'y =', y * step
    print 'score =', v

    if v > best_score:
        print 'new best score', v

        best_score = v
        best_x = (x - 1) * step - width / 2
        best_y = (y - 1) * step - height / 2
        best_w = width
        best_h = height

        # clip against image edges
        if best_x < 0:
            best_w += best_x
            best_x = 0
        if best_y < 0:
            best_h += best_y
            best_y = 0
        if best_x + best_w > image.width:
            best_w = image.width - best_x
        if best_y + best_h > image.height:
            best_h = image.height - best_y

        features.write_to_file("features.v")

print 'best crop:'
print 'x =', best_x, 'y =', best_y
print 'w =', best_w, 'h =', best_h
	#!/usr/bin/python

	# smartcrop with libvips, based very roughly on
	# https://github.com/jwagner/smartcrop.js

	import sys

	import gi
	gi.require_version('Vips', '8.0')
	from gi.repository import Vips

	image = Vips.Image.new_from_file(sys.argv[1])

	# options

	# step crops by this much ... we downsample the feature images by this as well,
	# so larger values make the search much quicker
	step = 8

	# write an image to a memory buffer ... can give a speedup if you know an
	# image is small and will be reused a lot
	def memoryise(image):
	new_image = Vips.Image.new_memory()
	image.write(new_image)
	return new_image

	# sobel filter ... use this to find edges
	sobel_hmask = Vips.Image.new_from_array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])
	sobel_vmask = sobel_hmask.rot90()
	def sobel_filter(image):
	return image.conv(sobel_hmask).abs() + image.conv(sobel_vmask).abs()

	# areas of skin colour ... threshold in LCh
	def skin_filter(image):
	lch = image.colourspace(Vips.Interpretation.LCH)
	return ((lch > [5, 5, 0]) & (lch < [80, 30, 80])).bandand()

	# areas high in saturation
	def saturation_filter(image):
	c = image.colourspace(Vips.Interpretation.LCH)[1]
	return c > 30

	# feature-detect with an M x N box ... we look for a feature in the centre and
	# no feature at the edges, ie. -1, 1, -1, in thirds.
	def feature_detect(image, M, N):
	print 'feature-detect M =', M, ', N =', N

	third = int(M / 3)
	mask = third * [-0.1] + third * [1] + third * [-0.1]
	hmask = Vips.Image.new_from_array(mask, scale = third)

	third = int(N / 3)
	mask = third * [-0.1] + third * [1] + third * [-0.1]
	vmask = Vips.Image.new_from_array(mask, scale = third).rot90()

	return image.conv(hmask) + image.conv(vmask)

	# make three base feature images
	# downsample by step, the resolution we search for crops at
	tiny = memoryise(image.shrink(step, step))
	edges = sobel_filter(tiny[1])
	skin = skin_filter(tiny)
	saturation = saturation_filter(tiny)

	# weight the three factors to make a score image
	score = edges + 0.5 * skin + 0.5 * saturation

	# put a 1 pixel black line around the score image ... we are going to blur at
	# various scales and we don't want high edge values to be extended
	score = score.embed(1, 1, score.width + 2, score.height + 2, extend = "black")

	score = memoryise(score)

	score.write_to_file("score.v")
	edges.write_to_file("edges.v")
	skin.write_to_file("skin.v")
	saturation.write_to_file("saturation.v")

	# now feature-detect the score image at various scales and search for
	# maxima ... they will give the position of the centre of the crops at
	# that scale

	# default to a square crop
	crop_width = min(image.width, image.height)
	crop_height = min(image.width, image.height)

	best_score = 0

	# search from small crops to large ones .. from 1/2 minimum dimension up
	for scale in range(5, 11):
	width = (crop_width * scale) / 10
	height = (crop_height * scale) / 10

	# search for the brightest rectangle
	features = feature_detect(score, width / step, height / step)
	v, x, y = features.maxpos()

	print 'width =', width, 'height =', height
	print 'x =', x * step, 'y =', y * step
	print 'score =', v

	if v > best_score:
	print 'new best score', v

	best_score = v
	best_x = (x - 1) * step - width / 2
	best_y = (y - 1) * step - height / 2
	best_w = width
	best_h = height

	# clip against image edges
	if best_x < 0:
	best_w += best_x
	best_x = 0
	if best_y < 0:
	best_h += best_y
	best_y = 0
	if best_x + best_w > image.width:
	best_w = image.width - best_x
	if best_y + best_h > image.height:
	best_h = image.height - best_y

	features.write_to_file("features.v")

	print 'best crop:'
	print 'x =', best_x, 'y =', best_y
	print 'w =', best_w, 'h =', best_h