uzl/cropping_char.py

## cropping_char.py
from pathlib import Path
from pprint import pprint
import cv2
import numpy as np
import matplotlib.pyplot as plt
import concurrent
from tqdm import tqdm

# Just for visualization. By default it is inactive
def display_img(img_list):
    fig = plt.figure(figsize=(12,8))
    for i in range(len(img_list)):
        ax = fig.add_subplot((len(img_list)//4)+1, 4, i+1)
        if len(img_list[i].shape) == 2:
            ax.imshow(img_list[i], cmap='gray')
        else:
            ax.imshow(img_list[i])

    plt.show()


# Track filename if ther is any error during cropping
def write_error_log(file_name):
    with open('cropping_error_log.txt', 'a') as fp:
        fp.write(str(file_name))
        fp.write('\n')

# cropping text part
def crop_important_region(img_src_path, is_display=False):
    # read image as grayscale
    img = cv2.imread(img_src_path, 0)
    org = img.copy()
    # apply bilateralFilter to clean background noise preserving edge sharpness
    smooth_img = cv2.bilateralFilter(img, 9, 75, 75)
    # binarize image (inverse binary is needed for Contours detection)
    binary_img = cv2.adaptiveThreshold(smooth_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
            cv2.THRESH_BINARY_INV,11,2)
    # apply some morphology operation to make edge more thicker
    kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
    dialated_img = cv2.dilate(binary_img, kernel, iterations=11)
    # detect Contours
    contours, _  = cv2.findContours(dialated_img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    # create an color image. This is only for displaying green bounding box
    # find the largest contours bounding position and drow the bounding box
    new_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    c = max(contours, key = cv2.contourArea)
    x,y,w,h = cv2.boundingRect(c)
    cv2.rectangle(new_img,(x,y),(x+w,y+h),(0,255,0),2)
    # crop only text region
    cropped_image = img[y:y+h, x:x+w]
    # make a square shape image and put the cropped text region in center
    center_x, center_y = x+(w/2), y+(h/2)
    larg_side = max(w, h)
    square_img = np.ones((larg_side, larg_side), dtype=np.uint8) * 255
    p = int(larg_side//2 - center_x)
    q = int(larg_side//2 - center_y)
    square_img[y+q:y+h+q, x+p:x+w+p] = cropped_image #

    resized_128 = cv2.resize(square_img, (128, 128))
    # display
    if is_display:
        display_img([org, smooth_img, binary_img, dialated_img, new_img, cropped_image,  resized_128])
    return resized_128


# ------------------------------------------------------------------------------------------
#-------------------------------------------------------------------------------------------
# collect path of all images
train_data_path = Path.cwd()/'dataset'/'bengali_ai_dataset'/'test'
iamge_path_list = []
for extension in ['*.png', '*.jpg']:
    iamge_path_list.extend(list(train_data_path.rglob(extension)))
print(f'Found {len(iamge_path_list)} images')

# create mapping of filename to image path
filename_to_path_mapping = dict()
for path in iamge_path_list:
    filename = path.stem # taking name without extention
    assert not filename in filename_to_path_mapping, 'Deuplicate file name.'
    filename_to_path_mapping[filename] = path


output_dir = Path.cwd()/'dataset'/ 'resized_128'
for filename in tqdm(filename_to_path_mapping):
    path = filename_to_path_mapping[filename]
    try:
        img_128 = crop_important_region(str(path), is_display=False)
        target_path = output_dir/ path.parent.name
        target_path.mkdir(parents=True, exist_ok=True)
        target_path = target_path/path.name
        cv2.imwrite(str(target_path), img_128)
    except:
        write_error_log(str(path))
    # break
	from pathlib import Path
	from pprint import pprint
	import cv2
	import numpy as np
	import matplotlib.pyplot as plt
	import concurrent
	from tqdm import tqdm

	# Just for visualization. By default it is inactive
	def display_img(img_list):
	fig = plt.figure(figsize=(12,8))
	for i in range(len(img_list)):
	ax = fig.add_subplot((len(img_list)//4)+1, 4, i+1)
	if len(img_list[i].shape) == 2:
	ax.imshow(img_list[i], cmap='gray')
	else:
	ax.imshow(img_list[i])

	plt.show()


	# Track filename if ther is any error during cropping
	def write_error_log(file_name):
	with open('cropping_error_log.txt', 'a') as fp:
	fp.write(str(file_name))
	fp.write('\n')

	# cropping text part
	def crop_important_region(img_src_path, is_display=False):
	# read image as grayscale
	img = cv2.imread(img_src_path, 0)
	org = img.copy()
	# apply bilateralFilter to clean background noise preserving edge sharpness
	smooth_img = cv2.bilateralFilter(img, 9, 75, 75)
	# binarize image (inverse binary is needed for Contours detection)
	binary_img = cv2.adaptiveThreshold(smooth_img,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
	cv2.THRESH_BINARY_INV,11,2)
	# apply some morphology operation to make edge more thicker
	kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, (3,3))
	dialated_img = cv2.dilate(binary_img, kernel, iterations=11)
	# detect Contours
	contours, _ = cv2.findContours(dialated_img, cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
	# create an color image. This is only for displaying green bounding box
	# find the largest contours bounding position and drow the bounding box
	new_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
	c = max(contours, key = cv2.contourArea)
	x,y,w,h = cv2.boundingRect(c)
	cv2.rectangle(new_img,(x,y),(x+w,y+h),(0,255,0),2)
	# crop only text region
	cropped_image = img[y:y+h, x:x+w]
	# make a square shape image and put the cropped text region in center
	center_x, center_y = x+(w/2), y+(h/2)
	larg_side = max(w, h)
	square_img = np.ones((larg_side, larg_side), dtype=np.uint8) * 255
	p = int(larg_side//2 - center_x)
	q = int(larg_side//2 - center_y)
	square_img[y+q:y+h+q, x+p:x+w+p] = cropped_image #

	resized_128 = cv2.resize(square_img, (128, 128))
	# display
	if is_display:
	display_img([org, smooth_img, binary_img, dialated_img, new_img, cropped_image, resized_128])
	return resized_128


	# ------------------------------------------------------------------------------------------
	#-------------------------------------------------------------------------------------------
	# collect path of all images
	train_data_path = Path.cwd()/'dataset'/'bengali_ai_dataset'/'test'
	iamge_path_list = []
	for extension in ['.png', '.jpg']:
	iamge_path_list.extend(list(train_data_path.rglob(extension)))
	print(f'Found {len(iamge_path_list)} images')

	# create mapping of filename to image path
	filename_to_path_mapping = dict()
	for path in iamge_path_list:
	filename = path.stem # taking name without extention
	assert not filename in filename_to_path_mapping, 'Deuplicate file name.'
	filename_to_path_mapping[filename] = path


	output_dir = Path.cwd()/'dataset'/ 'resized_128'
	for filename in tqdm(filename_to_path_mapping):
	path = filename_to_path_mapping[filename]
	try:
	img_128 = crop_important_region(str(path), is_display=False)
	target_path = output_dir/ path.parent.name
	target_path.mkdir(parents=True, exist_ok=True)
	target_path = target_path/path.name
	cv2.imwrite(str(target_path), img_128)
	except:
	write_error_log(str(path))
	# break