NicoLivesey/baseline_ocr.py

## baseline_ocr.py
def remove_clear(img, degree):
    '''
    Preprocess l'image avant l'algo de détection de texte de manière à avoir une image thresholdée optimale
    '''
    # img = cv2.bilateralFilter(img, degree, 75, 75)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
            cv2.THRESH_BINARY,65,11)
    # _, img = cv2.threshold(img,170,255,cv2.THRESH_BINARY)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
    return img

def detect_text(img, thresh):
    '''
    Algo de détection de texte basé sur le filtre de gradient de l'image
    '''
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, kernel)

    _, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 1))
    connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
    _, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    mask = np.zeros(bw.shape, dtype=np.uint8)
    final_mask = np.zeros(gray.shape, dtype=np.uint8)

    for idx in range(len(contours)):
        x, y, w, h = cv2.boundingRect(contours[idx])
        mask[y:y+h, x:x+w] = 0
        cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
        r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)

        if r > 0.3 and w > gray.shape[0]/70 and h > gray.shape[1]/100 and h < gray.shape[1]/15:
            final_mask[y:(y+h-1), x:(x+w-1)] = 255

    fg = cv2.bitwise_and(thresh, thresh, mask = final_mask)

    inv = cv2.bitwise_not(final_mask)
    background = np.full(gray.shape, 255, dtype=np.uint8)
    bk = cv2.bitwise_or(background, background, mask=inv)

    # combine foreground+background
    res = cv2.bitwise_or(fg, bk)
    score = (final_mask == 255).sum()/(final_mask.shape[0]*final_mask.shape[1])

    return res, score

def image_to_string(image):
    '''
    Regular preprocessing and Tesseract job
    '''
    thresh = remove_clear(image, 150)

    final, score = detect_text(image, thresh)

    # Recognize text with tesseract for python
    result = pytesseract.image_to_string(final, config = "-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz:. -l fra -oem 1")

    return result
	def remove_clear(img, degree):
	'''
	Preprocess l'image avant l'algo de détection de texte de manière à avoir une image thresholdée optimale
	'''
	# img = cv2.bilateralFilter(img, degree, 75, 75)
	img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
	cv2.THRESH_BINARY,65,11)
	# _, img = cv2.threshold(img,170,255,cv2.THRESH_BINARY)
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
	img = cv2.morphologyEx(img, cv2.MORPH_OPEN, kernel)
	return img

	def detect_text(img, thresh):
	'''
	Algo de détection de texte basé sur le filtre de gradient de l'image
	'''
	gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

	kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
	grad = cv2.morphologyEx(gray, cv2.MORPH_GRADIENT, kernel)

	_, bw = cv2.threshold(grad, 0.0, 255.0, cv2.THRESH_BINARY \| cv2.THRESH_OTSU)

	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (11, 1))
	connected = cv2.morphologyEx(bw, cv2.MORPH_CLOSE, kernel)
	_, contours, hierarchy = cv2.findContours(connected.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

	mask = np.zeros(bw.shape, dtype=np.uint8)
	final_mask = np.zeros(gray.shape, dtype=np.uint8)

	for idx in range(len(contours)):
	x, y, w, h = cv2.boundingRect(contours[idx])
	mask[y:y+h, x:x+w] = 0
	cv2.drawContours(mask, contours, idx, (255, 255, 255), -1)
	r = float(cv2.countNonZero(mask[y:y+h, x:x+w])) / (w * h)

	if r > 0.3 and w > gray.shape[0]/70 and h > gray.shape[1]/100 and h < gray.shape[1]/15:
	final_mask[y:(y+h-1), x:(x+w-1)] = 255

	fg = cv2.bitwise_and(thresh, thresh, mask = final_mask)

	inv = cv2.bitwise_not(final_mask)
	background = np.full(gray.shape, 255, dtype=np.uint8)
	bk = cv2.bitwise_or(background, background, mask=inv)

	# combine foreground+background
	res = cv2.bitwise_or(fg, bk)
	score = (final_mask == 255).sum()/(final_mask.shape[0]*final_mask.shape[1])

	return res, score

	def image_to_string(image):
	'''
	Regular preprocessing and Tesseract job
	'''
	thresh = remove_clear(image, 150)

	final, score = detect_text(image, thresh)

	# Recognize text with tesseract for python
	result = pytesseract.image_to_string(final, config = "-c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz:. -l fra -oem 1")

	return result