Created
June 30, 2020 20:05
-
-
Save hav4ik/429a1927d08c7168436bfc0f87c68a1e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# These are probably the only important parameters in the | |
# whole pipeline (steps 0 through 3). | |
BLOCK_SIZE = 40 | |
DELTA = 25 | |
# Do the necessary noise cleaning and other stuffs. | |
# I just do a simple blurring here but you can optionally | |
# add more stuffs. | |
def preprocess(image): | |
image = cv2.medianBlur(image, 3) | |
return 255 - image | |
# Again, this step is fully optional and you can even keep | |
# the body empty. I just did some opening. The algorithm is | |
# pretty robust, so this stuff won't affect much. | |
def postprocess(image): | |
kernel = np.ones((3,3), np.uint8) | |
image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel) | |
return image | |
# Just a helper function that generates box coordinates | |
def get_block_index(image_shape, yx, block_size): | |
y = np.arange(max(0, yx[0]-block_size), min(image_shape[0], yx[0]+block_size)) | |
x = np.arange(max(0, yx[1]-block_size), min(image_shape[1], yx[1]+block_size)) | |
return np.meshgrid(y, x) | |
# Here is where the trick begins. We perform binarization from the | |
# median value locally (the img_in is actually a slice of the image). | |
# Here, following assumptions are held: | |
# 1. The majority of pixels in the slice is background | |
# 2. The median value of the intensity histogram probably | |
# belongs to the background. We allow a soft margin DELTA | |
# to account for any irregularities. | |
# 3. We need to keep everything other than the background. | |
# | |
# We also do simple morphological operations here. It was just | |
# something that I empirically found to be "useful", but I assume | |
# this is pretty robust across different datasets. | |
def adaptive_median_threshold(img_in): | |
med = np.median(img_in) | |
img_out = np.zeros_like(img_in) | |
img_out[img_in - med < DELTA] = 255 | |
kernel = np.ones((3,3),np.uint8) | |
img_out = 255 - cv2.dilate(255 - img_out,kernel,iterations = 2) | |
return img_out | |
# This function just divides the image into local regions (blocks), | |
# and perform the `adaptive_mean_threshold(...)` function to each | |
# of the regions. | |
def block_image_process(image, block_size): | |
out_image = np.zeros_like(image) | |
for row in range(0, image.shape[0], block_size): | |
for col in range(0, image.shape[1], block_size): | |
idx = (row, col) | |
block_idx = get_block_index(image.shape, idx, block_size) | |
out_image[block_idx] = adaptive_median_threshold(image[block_idx]) | |
return out_image | |
# This function invokes the whole pipeline of Step 2. | |
def process_image(img): | |
image_in = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
image_in = preprocess(image_in) | |
image_out = block_image_process(image_in, BLOCK_SIZE) | |
image_out = postprocess(image_out) | |
return image_out |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment