hav4ik/locally_adaptive_binarization.py

## locally_adaptive_binarization.py
# These are probably the only important parameters in the
# whole pipeline (steps 0 through 3).
BLOCK_SIZE = 40
DELTA = 25

# Do the necessary noise cleaning and other stuffs.
# I just do a simple blurring here but you can optionally
# add more stuffs.
def preprocess(image):
    image = cv2.medianBlur(image, 3)
    return 255 - image

# Again, this step is fully optional and you can even keep
# the body empty. I just did some opening. The algorithm is
# pretty robust, so this stuff won't affect much.
def postprocess(image):
    kernel = np.ones((3,3), np.uint8)
    image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
    return image

# Just a helper function that generates box coordinates
def get_block_index(image_shape, yx, block_size):
    y = np.arange(max(0, yx[0]-block_size), min(image_shape[0], yx[0]+block_size))
    x = np.arange(max(0, yx[1]-block_size), min(image_shape[1], yx[1]+block_size))
    return np.meshgrid(y, x)

# Here is where the trick begins. We perform binarization from the
# median value locally (the img_in is actually a slice of the image).
# Here, following assumptions are held:
#   1.  The majority of pixels in the slice is background
#   2.  The median value of the intensity histogram probably
#       belongs to the background. We allow a soft margin DELTA
#       to account for any irregularities.
#   3.  We need to keep everything other than the background.
#
# We also do simple morphological operations here. It was just
# something that I empirically found to be "useful", but I assume
# this is pretty robust across different datasets.
def adaptive_median_threshold(img_in):
    med = np.median(img_in)
    img_out = np.zeros_like(img_in)
    img_out[img_in - med < DELTA] = 255
    kernel = np.ones((3,3),np.uint8)
    img_out = 255 - cv2.dilate(255 - img_out,kernel,iterations = 2)
    return img_out

# This function just divides the image into local regions (blocks),
# and perform the `adaptive_mean_threshold(...)` function to each
# of the regions.
def block_image_process(image, block_size):
    out_image = np.zeros_like(image)
    for row in range(0, image.shape[0], block_size):
        for col in range(0, image.shape[1], block_size):
            idx = (row, col)
            block_idx = get_block_index(image.shape, idx, block_size)
            out_image[block_idx] = adaptive_median_threshold(image[block_idx])
    return out_image

# This function invokes the whole pipeline of Step 2.
def process_image(img):
    image_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    image_in = preprocess(image_in)
    image_out = block_image_process(image_in, BLOCK_SIZE)
    image_out = postprocess(image_out)
    return image_out
	# These are probably the only important parameters in the
	# whole pipeline (steps 0 through 3).
	BLOCK_SIZE = 40
	DELTA = 25

	# Do the necessary noise cleaning and other stuffs.
	# I just do a simple blurring here but you can optionally
	# add more stuffs.
	def preprocess(image):
	image = cv2.medianBlur(image, 3)
	return 255 - image

	# Again, this step is fully optional and you can even keep
	# the body empty. I just did some opening. The algorithm is
	# pretty robust, so this stuff won't affect much.
	def postprocess(image):
	kernel = np.ones((3,3), np.uint8)
	image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel)
	return image

	# Just a helper function that generates box coordinates
	def get_block_index(image_shape, yx, block_size):
	y = np.arange(max(0, yx[0]-block_size), min(image_shape[0], yx[0]+block_size))
	x = np.arange(max(0, yx[1]-block_size), min(image_shape[1], yx[1]+block_size))
	return np.meshgrid(y, x)

	# Here is where the trick begins. We perform binarization from the
	# median value locally (the img_in is actually a slice of the image).
	# Here, following assumptions are held:
	# 1. The majority of pixels in the slice is background
	# 2. The median value of the intensity histogram probably
	# belongs to the background. We allow a soft margin DELTA
	# to account for any irregularities.
	# 3. We need to keep everything other than the background.
	#
	# We also do simple morphological operations here. It was just
	# something that I empirically found to be "useful", but I assume
	# this is pretty robust across different datasets.
	def adaptive_median_threshold(img_in):
	med = np.median(img_in)
	img_out = np.zeros_like(img_in)
	img_out[img_in - med < DELTA] = 255
	kernel = np.ones((3,3),np.uint8)
	img_out = 255 - cv2.dilate(255 - img_out,kernel,iterations = 2)
	return img_out

	# This function just divides the image into local regions (blocks),
	# and perform the `adaptive_mean_threshold(...)` function to each
	# of the regions.
	def block_image_process(image, block_size):
	out_image = np.zeros_like(image)
	for row in range(0, image.shape[0], block_size):
	for col in range(0, image.shape[1], block_size):
	idx = (row, col)
	block_idx = get_block_index(image.shape, idx, block_size)
	out_image[block_idx] = adaptive_median_threshold(image[block_idx])
	return out_image

	# This function invokes the whole pipeline of Step 2.
	def process_image(img):
	image_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
	image_in = preprocess(image_in)
	image_out = block_image_process(image_in, BLOCK_SIZE)
	image_out = postprocess(image_out)
	return image_out