Skip to content

Instantly share code, notes, and snippets.

@nirajpandkar
Last active October 26, 2023 21:17
Show Gist options
  • Save nirajpandkar/887ac6e95db3920382718095dc82e582 to your computer and use it in GitHub Desktop.
Save nirajpandkar/887ac6e95db3920382718095dc82e582 to your computer and use it in GitHub Desktop.
"""
This script removes stray lines from images using image processing techniques with OpenCV library.
All credit where it's due - https://stackoverflow.com/a/45563349/4411757. Simon Mourier created a script in C# which I
have ported to python. Tested with python 3.5 and opencv-python==3.4.2.17. Shoot your suggestions/improvements in the
comments. Cheers!
"""
def clean_image(img):
height, width = img.shape[:2]
# Specify the color range of the lines you want to remove [lower, upper]
lower = [0, 0, 0]
upper = [120, 130, 130]
lower = np.array(lower, dtype = "uint8")
upper = np.array(upper, dtype = "uint8")
# Create a mask of the lines
mask = cv2.inRange(img, lower, upper)
output = cv2.bitwise_and(img, img, mask = mask)
# As the original comment explains, dilate lines a bit because aliasing
# may have filtered borders too much during masking
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
dilation = cv2.dilate(output, kernel, iterations = 3)
# Conver the mask to gray scale
gray = cv2.cvtColor(dilation, cv2.COLOR_BGR2GRAY)
# Reference: https://docs.opencv.org/trunk/df/d3d/tutorial_py_inpainting.html
# Apply the mask created above on the image
dst = cv2.inpaint(img,gray,3,cv2.INPAINT_TELEA)
# Post mask application, there will be inconsistency/gaps/separation of individual
# digits/alphabets. So we dilate (puff up the white blobs) so that each individual
# digit gets properly connected and considered as one blob (which can be further used
# to find contours)
dilation = cv2.dilate(dst, kernel, iterations = 2)
# Reference for blurring and bilateral filtering:
# https://docs.opencv.org/3.1.0/d4/d13/tutorial_py_filtering.html
blur = cv2.GaussianBlur(dilation,(5,5),0)
bilateral = cv2.bilateralFilter(blur,5,75,75)
gray = cv2.cvtColor(bilateral, cv2.COLOR_BGR2GRAY)
# If pixel value is greater than a threshold value, it is assigned one value (may be white),
# else it is assigned another value (may be black)
# Reference: https://docs.opencv.org/3.4.0/d7/d4d/tutorial_py_thresholding.html
thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU)[1]
return thresh
# Read the image using cv2.imread() and pass it to the clean_image function
# if __name__ == '__main__':
# img = cv2.imread("<path to captcha image>")
# img = clean_image(img)
# cv2.imshow("Final Output", img)
# cv2.waitKey(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment