Last active
March 3, 2024 00:10
-
-
Save bmad4ever/022c72f26eb5b9721d4a75b4407f018e to your computer and use it in GitHub Desktop.
parallelizing the "sequential", Alexei A. Efros and Willian T. Freeman's image quilting algorithm
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from multiprocessing.shared_memory import SharedMemory | |
import cv2 as cv | |
from .utils.generate import * # from https://github.com/rohitrango/Image-Quilting-for-Texture-Synthesis | |
# -- NOTE -- ___________________________________________________________________________________________________________ | |
# | |
# This implementation improves vanilla (a.k.a. sequential) image quilting speed via parallelization. | |
# | |
# The following is a reference to the source implementation contained in the quilting folder: | |
# https://github.com/rohitrango/Image-Quilting-for-Texture-Synthesis | |
# | |
# And the original algorithm paper: | |
# https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/papers/efros-siggraph01.pdf | |
# | |
# Conceptually, parallelization is done by creating a horizontal and vertical stripes in the shape of a cross | |
# dividing the image into 4 sections that can be generated independently. | |
# Additionally, each section's rows can also be "generated in parallel" ( clarifications below in 2.2 ). | |
# | |
# The operations are as follows: | |
# 1. | |
# 2 stripes for each direction are generated, all in parallel (if enough cores available). | |
# 1 vertical and 1 horizontal stripes are created with inverted images, in order to re-use the original | |
# implementation methods without any additional modifications. | |
# | |
# 2.1 | |
# The 4 separate sections are generated at the same time, using the required inversions, | |
# and then flipped at the end so that all can be stitched together | |
# | |
# 2.2 | |
# An OPTIONAL parallelization step is implemented, but improvement is not significant. | |
# ( also, can be worse in small generations, with a low number of patches or small source image ) | |
# While generating each section, instead of generating each row sequentially, multiple rows "can run in parallel" in | |
# alternating fashion. The modulo of the row number is run by the job with that number, e.g.: if using two jobs, | |
# one runs the odd rows and another the even rows. | |
# Note that one row section CAN ONLY be computed ONCE THE ADJACENT SECTION IN THE PRIOR ROW HAS BEEN COMPUTED. | |
# The algorithm is still sequential in nature, this is not some "modern" variant of the algorithm, | |
# but it does not require a row to be completely filled in order to compute some of the next row's patches. | |
# | |
# 3. | |
# Stitching is straightforward, and only needs to take into account the removal | |
# of the shared components belonging to the initially generated stripes. | |
# | |
# region methods extracted from the source of the referenced implementation for re-usability | |
def fill_column(image, initial_block, overlap, nH, tolerance): | |
blocksize = initial_block.shape[0] | |
textureMap = np.zeros( | |
((blocksize + nH * (blocksize - overlap)), blocksize, image.shape[2])) | |
textureMap[:blocksize, :blocksize, :] = initial_block | |
for i, blkIdx in enumerate(range((blocksize - overlap), textureMap.shape[0] - overlap, (blocksize - overlap))): | |
refBlock = textureMap[(blkIdx - blocksize + overlap):(blkIdx + overlap), :blocksize] | |
patchBlock = findPatchVertical(refBlock, image, blocksize, overlap, tolerance) | |
minCutPatch = getMinCutPatchVertical(refBlock, patchBlock, blocksize, overlap) | |
textureMap[blkIdx:(blkIdx + blocksize), :blocksize] = minCutPatch | |
return textureMap | |
def fill_row(image, initial_block, overlap, nW, tolerance): | |
blocksize = initial_block.shape[0] | |
textureMap = np.zeros( | |
(blocksize, (blocksize + nW * (blocksize - overlap)), image.shape[2])) | |
textureMap[:blocksize, :blocksize, :] = initial_block | |
for i, blkIdx in enumerate(range((blocksize - overlap), textureMap.shape[1] - overlap, (blocksize - overlap))): | |
refBlock = textureMap[:blocksize, (blkIdx - blocksize + overlap):(blkIdx + overlap)] | |
patchBlock = findPatchHorizontal(refBlock, image, blocksize, overlap, tolerance) | |
minCutPatch = getMinCutPatchHorizontal(refBlock, patchBlock, blocksize, overlap) | |
textureMap[:blocksize, (blkIdx):(blkIdx + blocksize)] = minCutPatch | |
return textureMap | |
def fill_quad(nH, nW, blocksize, overlap, textureMap, image, tolerance): | |
for i in range(1, nH + 1): | |
for j in range(1, nW + 1): | |
blkIndexI = i * (blocksize - overlap) | |
blkIndexJ = j * (blocksize - overlap) | |
refBlockLeft = textureMap[(blkIndexI):(blkIndexI + blocksize), | |
(blkIndexJ - blocksize + overlap):(blkIndexJ + overlap)] | |
refBlockTop = textureMap[(blkIndexI - blocksize + overlap):(blkIndexI + overlap), | |
(blkIndexJ):(blkIndexJ + blocksize)] | |
patchBlock = findPatchBoth(refBlockLeft, refBlockTop, image, blocksize, overlap, tolerance) | |
minCutPatch = getMinCutPatchBoth(refBlockLeft, refBlockTop, patchBlock, blocksize, overlap) | |
textureMap[(blkIndexI):(blkIndexI + blocksize), (blkIndexJ):(blkIndexJ + blocksize)] = minCutPatch | |
print("{} out of {} rows complete...".format(i + 1, nH + 1)) | |
return textureMap | |
# endregion | |
def generateTextureMap_p(image, blocksize, overlap, outH, outW, tolerance, nps): | |
""" | |
@param nps: number of parallel stripes; tells how many jobs to use for each of the 4 sections. | |
""" | |
from joblib import Parallel, delayed | |
# Starting index and block | |
H, W = image.shape[:2] | |
randH = np.random.randint(H - blocksize) | |
randW = np.random.randint(W - blocksize) | |
startBlock = image[randH:randH + blocksize, randW:randW + blocksize] | |
# horizontal inverted | |
hi_startBlock = cv.flip(startBlock, 1) | |
hi_image = cv.flip(image, 1) # in retrospective, should have used np; but other nodes use cv, so makes no diff. | |
# vertical inverted | |
vi_startBlock = cv.flip(startBlock, 0) | |
vi_image = cv.flip(image, 0) | |
# note: inverted both ways is computed later when filling the top-left quadrant/section | |
# auxiliary variables | |
quad_row_width = ceil(outW / 2 + blocksize / 2) | |
quad_column_height = ceil(outH / 2 + blocksize / 2) | |
qW = int(ceil((quad_row_width - blocksize) * 1.0 / (blocksize - overlap))) | |
qH = int(ceil((quad_column_height - blocksize) * 1.0 / (blocksize - overlap))) | |
# generate 2 vertical strips and 2 horizontal strips that will split the generated canvas in half | |
# the center, where the stripes connect, shares the same tile | |
args = [ | |
(image, startBlock, overlap, qW, tolerance), | |
(hi_image, hi_startBlock, overlap, qW, tolerance), | |
(image, startBlock, overlap, qH, tolerance), | |
(vi_image, vi_startBlock, overlap, qH, tolerance) | |
] | |
funcs = [fill_row, fill_row, fill_column, fill_column] | |
stripes = Parallel(n_jobs=4, backend="loky", timeout=None)( | |
delayed(funcs[i])(*args[i]) for i in range(4)) | |
hs, his, vs, vis = stripes | |
# generate the 4 sections (quadrants) | |
args = [ | |
(vis, his, hi_image, qH, qW, overlap, tolerance, nps), | |
(vis, hs, vi_image, qH, qW, overlap, tolerance, nps), | |
(vs, hs, image, qH, qW, overlap, tolerance, nps), | |
(vs, his, hi_image, qH, qW, overlap, tolerance, nps) | |
] | |
funcs = [quad1, quad2, quad3, quad4] | |
quads = Parallel(n_jobs=4, backend="loky", timeout=None)( | |
delayed(funcs[i])(*args[i]) for i in range(4)) | |
q1, q2, q3, q4 = quads | |
texture = np.zeros((q1.shape[0] * 2 - blocksize, q1.shape[1] * 2 - blocksize, image.shape[2])) | |
bmo = blocksize - overlap | |
texture[:q1.shape[0] - bmo, :q1.shape[1] - bmo] = q1[:q1.shape[0] - bmo, :q1.shape[1] - bmo] | |
texture[:q1.shape[0] - bmo, q1.shape[1] - bmo:] = q2[:q1.shape[0] - bmo, overlap:] | |
texture[q1.shape[0] - bmo:, :q1.shape[1] - bmo] = q4[overlap:, :q1.shape[1] - bmo] | |
texture[q1.shape[0] - bmo:, q1.shape[1] - bmo:] = q3[overlap:, overlap:] | |
return texture | |
def quad1(vis, his, hi_image, nH, nW, overlap, tolerance, p_strips): | |
""" | |
:param his: horizontal inverted stripe | |
:param vis: vertical inverted stripe | |
""" | |
vi_hi_s = cv.flip(his, 0) # vertical inversion of the horizontal inverted stripe | |
hi_vi_s = cv.flip(vis, 1) | |
vhi_image = cv.flip(hi_image, 0) | |
if p_strips > 1: | |
size = vis.shape[0] * his.shape[1] * hi_image.shape[2] * hi_image.dtype.itemsize | |
shm_text = SharedMemory(create=True, size=size) | |
texture = np.ndarray((vis.shape[0], his.shape[1], hi_image.shape[2]), dtype=hi_image.dtype, buffer=shm_text.buf) | |
else: | |
texture = np.zeros((vis.shape[0], his.shape[1], hi_image.shape[2])) | |
texture[:vi_hi_s.shape[0], :vi_hi_s.shape[1]] = vi_hi_s[:, :] | |
texture[vi_hi_s.shape[0]:hi_vi_s.shape[0], :hi_vi_s.shape[1]] = hi_vi_s[vi_hi_s.shape[0]:, :] | |
if p_strips > 1: | |
fill_quad_ps(nH, nW, vi_hi_s.shape[0], overlap, shm_text.name, vhi_image, tolerance, p_strips) | |
else: | |
texture = fill_quad(nH, nW, vi_hi_s.shape[0], overlap, texture, vhi_image, tolerance) | |
texture = cv.flip(texture, -1) | |
if p_strips > 1: | |
shm_text.close() | |
shm_text.unlink() | |
return texture | |
def quad2(vis, hs, vi_image, nH, nW, overlap, tolerance, p_strips): | |
vi_hs = cv.flip(hs, 0) | |
if p_strips > 1: | |
size = vis.shape[0] * hs.shape[1] * vi_image.shape[2] * vi_image.dtype.itemsize | |
shm_text = SharedMemory(create=True, size=size) | |
texture = np.ndarray((vis.shape[0], hs.shape[1], vi_image.shape[2]), dtype=vi_image.dtype, buffer=shm_text.buf) | |
else: | |
texture = np.zeros((vis.shape[0], hs.shape[1], vi_image.shape[2])) | |
texture[:hs.shape[0], :hs.shape[1]] = vi_hs[:, :] | |
texture[hs.shape[0]:vis.shape[0], :vis.shape[1]] = vis[hs.shape[0]:, :] | |
if p_strips > 1: | |
fill_quad_ps(nH, nW, hs.shape[0], overlap, shm_text.name, vi_image, tolerance, p_strips) | |
else: | |
texture = fill_quad(nH, nW, hs.shape[0], overlap, texture, vi_image, tolerance) | |
texture = cv.flip(texture, 0) | |
if p_strips > 1: | |
shm_text.close() | |
shm_text.unlink() | |
return texture | |
def quad4(vs, his, hi_image, nH, nW, overlap, tolerance, p_strips): | |
hi_vs = cv.flip(vs, 1) | |
if p_strips > 1: | |
size = vs.shape[0] * his.shape[1] * hi_image.shape[2] * hi_image.dtype.itemsize | |
shm_text = SharedMemory(create=True, size=size) | |
texture = np.ndarray((vs.shape[0], his.shape[1], hi_image.shape[2]), dtype=hi_image.dtype, buffer=shm_text.buf) | |
else: | |
texture = np.zeros((vs.shape[0], his.shape[1], hi_image.shape[2])) | |
texture[:his.shape[0], :his.shape[1]] = his[:, :] | |
texture[his.shape[0]:vs.shape[0], :vs.shape[1]] = hi_vs[his.shape[0]:, :] | |
if p_strips > 1: | |
fill_quad_ps(nH, nW, his.shape[0], overlap, shm_text.name, hi_image, tolerance, p_strips) | |
else: | |
texture = fill_quad(nH, nW, his.shape[0], overlap, texture, hi_image, tolerance) | |
texture = cv.flip(texture, 1) | |
if p_strips > 1: | |
shm_text.close() | |
shm_text.unlink() | |
return texture | |
def quad3(vs, hs, image, nH, nW, overlap, tolerance, p_strips): | |
if p_strips > 1: | |
size = vs.shape[0] * hs.shape[1] * image.shape[2] * image.dtype.itemsize | |
shm_text = SharedMemory(create=True, size=size) | |
texture = np.ndarray((vs.shape[0], hs.shape[1], image.shape[2]), dtype=image.dtype, buffer=shm_text.buf) | |
else: | |
texture = np.zeros((vs.shape[0], hs.shape[1], image.shape[2])) | |
texture[:hs.shape[0], :hs.shape[1]] = hs[:, :] | |
texture[hs.shape[0]:vs.shape[0], :vs.shape[1]] = vs[hs.shape[0]:, :] | |
if p_strips > 1: | |
fill_quad_ps(nH, nW, vs.shape[1], overlap, shm_text.name, image, tolerance, p_strips) | |
else: | |
return fill_quad(nH, nW, vs.shape[1], overlap, texture, image, tolerance) | |
texture = texture.copy() | |
if p_strips > 1: | |
shm_text.close() | |
shm_text.unlink() | |
return texture | |
def fill_quad_ps(nH, nW, blocksize, overlap, texture_shared_mem_name, image, tolerance, total_procs): | |
from joblib import Parallel, delayed | |
bmo = blocksize - overlap | |
b_o = ceil(blocksize / (blocksize - overlap)) | |
# note : ShareableList seems to have problems even though there are no concurrent writes to the same position... | |
# setup shared array to store & check each job row & column | |
size = 2 * total_procs * np.int32(1).itemsize | |
shm_coord = SharedMemory(create=True, size=size) | |
np_coord = np.ndarray((2 * total_procs,), dtype=np.int32, buffer=shm_coord.buf) | |
for ip in range(total_procs): | |
np_coord[2 * ip] = 1 + ip | |
np_coord[2 * ip + 1] = 1 | |
def fill_rows(pid, coord_shared_list_name, texture_shm_name): | |
shm_coord_ref = SharedMemory(name=coord_shared_list_name) | |
coord_list = np.ndarray((2 * total_procs,), dtype=np.int32, buffer=shm_coord_ref.buf) | |
prior_proc_base_index = (pid + total_procs - 1) % total_procs | |
shm_texture = SharedMemory(name=texture_shm_name) | |
texture = np.ndarray((blocksize + nH * bmo, blocksize + nW * bmo, image.shape[2]), dtype=image.dtype, | |
buffer=shm_texture.buf) | |
for i in range(1 + pid, nH + 1, total_procs): | |
coord_list[pid * 2 + 0] = i | |
for j in range(1, nW + 1): | |
coord_list[pid * 2 + 1] = j | |
# if previous row hasn't processed the adjacent section yet wait for it to advance. | |
# -1 is used to shortcircuit this check when the job on the prior row has completed all rows. | |
while -1 < coord_list[prior_proc_base_index * 2 + 0] < i and coord_list[ | |
prior_proc_base_index * 2 + 1] - b_o <= j: | |
pass | |
# The same as source implementation ( similar to fill_quad ) | |
blkIndexI = i * (blocksize - overlap) | |
blkIndexJ = j * (blocksize - overlap) | |
refBlockLeft = texture[(blkIndexI):(blkIndexI + blocksize), | |
(blkIndexJ - blocksize + overlap):(blkIndexJ + overlap)] | |
refBlockTop = texture[(blkIndexI - blocksize + overlap):(blkIndexI + overlap), | |
(blkIndexJ):(blkIndexJ + blocksize)] | |
patchBlock = findPatchBoth(refBlockLeft, refBlockTop, image, blocksize, overlap, tolerance) | |
minCutPatch = getMinCutPatchBoth(refBlockLeft, refBlockTop, patchBlock, blocksize, overlap) | |
texture[(blkIndexI):(blkIndexI + blocksize), (blkIndexJ):(blkIndexJ + blocksize)] = minCutPatch | |
coord_list[pid * 2 + 0] = -1 | |
Parallel(n_jobs=total_procs, backend="loky", timeout=None)( | |
delayed(fill_rows)(i, shm_coord.name, texture_shared_mem_name) for i in range(total_procs)) | |
shm_coord.close() | |
shm_coord.unlink() | |
return |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment