Skip to content

Instantly share code, notes, and snippets.

@anantgupta129
Created June 6, 2022 06:01
Show Gist options
  • Save anantgupta129/e8befdc1b475e6eb70dddcfa522e699f to your computer and use it in GitHub Desktop.
Save anantgupta129/e8befdc1b475e6eb70dddcfa522e699f to your computer and use it in GitHub Desktop.
Image preprocessing for improving ocr
import os
from typing import Tuple
import cv2
import numpy as np
def image_binarization(image: np.ndarray) -> np.ndarray:
if len(image.shape) > 2: # convert to gray scale if not in gray scale
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
out_binary = cv2.threshold(noise_removal(image), 0, 255, cv2.THRESH_OTSU)[1]
return out_binary
def noise_removal(image: np.ndarray) -> np.ndarray:
if len(image.shape) > 2: # convert to gray scale if not in gray scale
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
se = cv2.getStructuringElement(cv2.MORPH_RECT, (8, 8))
bg = cv2.morphologyEx(image, cv2.MORPH_DILATE, se)
out_gray = cv2.divide(image, bg, scale=255)
return out_gray
def denoising(image: np.ndarray):
return cv2.fastNlMeansDenoisingColored(image, None, 10, 10, 7, 21)
def unsharp_mask(
image: np.ndarray,
kernel_size: Tuple[int, int] = (5, 5),
sigma: float = 1.0,
amount: float = 1.0,
threshold: int = 0,
):
blurred = cv2.GaussianBlur(image, kernel_size, sigma)
sharpened = float(amount + 1) * image - float(amount) * blurred
sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
sharpened = sharpened.round().astype(np.uint8)
if threshold > 0:
low_contrast_mask = np.absolute(image - blurred) < threshold
np.copyto(sharpened, image, where=low_contrast_mask)
return sharpened
image = cv2.imread(os.path.join("examples", "image.png")
im_bin = image_binarization(image)
out_gray = noise_removal(image)
speckle = cv2.imread(os.path.join("examples", "speckle.png"))
speckle = denoising(speckle)
blurry = cv2.imread(os.path.join("examples", "blurry.png"))
blurry = unsharp_mask(blurry)
cv2.imwrite("im_bin.png", im_bin)
cv2.imwrite("out_gray.png", out_gray)
cv2.imwrite("un_speckle.png", speckle)
cv2.imwrite("un_blurry.png", blurry)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment