Skip to content

Instantly share code, notes, and snippets.

@SheikSadi
Last active November 16, 2022 16:19
Show Gist options
  • Save SheikSadi/e107c42f88a67c4113e7ca587dc3e3ce to your computer and use it in GitHub Desktop.
Save SheikSadi/e107c42f88a67c4113e7ca587dc3e3ce to your computer and use it in GitHub Desktop.
Descent from Hilltop algorithm for cropping images based on saliency maps
import numpy as np
from skimage.feature import peak_local_max
from scipy.cluster.vq import kmeans
def get_centroids(array2d, maximum_gap=0.2, peak_theshold = 0.5):
maximum_distortion = array2d.shape[0] * maximum_gap
for _k in [1, 2, 3, 4]:
peaks = peak_local_max(array2d, threshold_rel=peak_theshold).astype(np.float32)
k_peaks, distortion = kmeans(peaks.astype(float), _k)
if distortion < maximum_distortion:
return k_peaks.astype(np.uint32)
def descend_from_hilltop(array2d, cent_ij, alpha=1, beta=0.5, asp_ratio=1.44):
cent_i, cent_j = cent_ij
image_h, image_w = array2d.shape
_1_pct_height = int(image_h * 0.05)
total_area = image_h * image_w
total_attention = array2d.sum()
scores = []
attentions = []
densities = []
coords = []
pad_top = _1_pct_height
pad_bottom = _1_pct_height
while True:
pad_right = asp_ratio * pad_bottom
pad_left = asp_ratio * pad_top
start_i = int(cent_i - pad_top)
start_j = int(cent_j - pad_left)
finish_i = int(cent_i + pad_bottom)
finish_j = int(cent_j + pad_right)
if start_i < 0 or finish_i >= image_h or start_j < 0 or finish_j >= image_w:
break
else:
attention = array2d[start_i:finish_i, start_j:finish_j].sum()
attention_factor = attention/total_attention
attentions.append(attention_factor)
area = (finish_i - start_i + 1) * (finish_j - start_j + 1)
area_factor = area / total_area
density_factor = attention_factor / area_factor
densities.append(density_factor)
coords.append([start_i, start_j, finish_i, finish_j])
pad_bottom += _1_pct_height
pad_top += _1_pct_height
attentions = np.array(attentions)
densities = np.array(densities)
scores = np.tanh(densities ** alpha) * (attentions ** beta)
start_i, start_j, finish_i, finish_j = coords[np.argmax(scores)]
start_x, start_y, finish_x, finish_y = start_j, start_i, finish_j, finish_i
return start_x, start_y, finish_x, finish_y
@SheikSadi
Copy link
Author

Usage

Directory structure

current directory
├── smart_cropping.py
└── samples
   └── image.jpg
└── maps
│   └── image.jpg
└── cmaps
   └── image.jpg

Code

import cv2
import matplotlib.pyplot as plt
from smart_cropping import get_centroids, descend_from_hilltop

fname = "image.jpg"
img_gray = cv2.imread(f"maps/{fname}", cv2.IMREAD_GRAYSCALE)
overlaid = cv2.imread(f"cmaps/{fname}", cv2.IMREAD_COLOR)
original_img = cv2.imread(f"samples/{fname}", cv2.IMREAD_COLOR)

fig, (ax0, ax1, ax2) = plt.subplots(1, 3, figsize=(20, 22))
ax0.imshow(original_img[:,:,::-1]);
ax0.axis("off");

ax1.imshow(overlaid[:,:,::-1]);
ax1.axis("off");

for cent_ij in get_centroids(img_gray):
    start_x, start_y, finish_x, finish_y = descend_from_hilltop(img_gray, cent_ij, alpha=1.5, beta=0.5)
    boxed = cv2.rectangle(original_img, (start_x, start_y), (finish_x, finish_y), (255, 0, 0), 3)
    ax2.imshow(boxed[:,:,::-1]);
    ax2.axis("off");

Result

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment