Created
March 3, 2019 16:57
-
-
Save risicle/b9548ce4b5552b1542e39c475cc5f8e9 to your computer and use it in GitHub Desktop.
Collection of utility functions to implement rudimentary template-matching based OCR using OpenCV, intended for use with large, uniform blocks of monospaced, non-language-correlated text (effectively resulting in a "character grid") where a 100.0% accuracy is required. This could be useful if one, for example, found themselves needing to recover…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Collection of utility functions to implement rudimentary template-matching based OCR using OpenCV, | |
intended for use with large, uniform blocks of monospaced, non-language-correlated text (effectively | |
resulting in a "character grid") where a 100.0% accuracy is required. This could be useful if one, for | |
example, found themselves needing to recover a hardcopy-printed backup of an OpenPGP key. | |
Modern general-purpose OCR software doesn't tend to do that well with these, relying heavily on | |
language-context-based guessing, often trying to be too clever in layout auto-detection, all the while | |
being extremely tricky to configure precisely enough to allow us to impart our existing knowledge | |
about the constraints of the target text (expected alphabet, line length, expected font...). | |
""" | |
from itertools import chain, product | |
import cv2 | |
import numpy as np | |
# | |
# following numpy convention, coordinates and dimensions are specified in (y, x) order. | |
# | |
default_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" | |
default_template_dims = (36, 24,) | |
default_search_padding = (4, 4,) | |
def get_perspective_transform( | |
px_coords, | |
grid_dims, | |
): | |
""" | |
Return a perspective transform matrix which will transform from character-grid coordinates | |
to pixel coordinates of an image. | |
:param px_coords: sequence of pixel coordinates defining the four corners of the character grid | |
in the image - ordered top left, top right, bottom left, bottom right | |
:param grid_dims: dimensions of character-grid | |
""" | |
return cv2.getPerspectiveTransform( | |
np.float32(tuple(product(*((0, a) for a in grid_dims)))), | |
np.float32(px_coords), | |
) | |
def get_char_representatives( | |
initial_lines, | |
grid_dims, | |
alphabet=default_alphabet, | |
): | |
""" | |
Given some initial_lines of a character grid, will generate a sequence of sequences of | |
character-grid coordinates of positions that can be used as representatives of a particular | |
character for comparisons. | |
:param initial_lines: the (known) initial lines of this character grid, the more the better. at | |
least enough will be needed so that each character in the alphabet has | |
at least one example appearance. | |
:param grid_dims: dimensions of character-grid | |
""" | |
if any(len(line) != grid_dims[1] for line in initial_lines[:-1]): | |
raise ValueError("All non-final initial_lines must be of length grid_dims[1]") | |
representatives = tuple([] for _ in alphabet) | |
for y, line in enumerate(initial_lines): | |
for x, char in enumerate(line): | |
representatives[alphabet.index(char)].append((y, x)) | |
for i, rprs in enumerate(representatives): | |
if not rprs: | |
raise ValueError(f"Alphabet character {alphabet.index(i)!r} missing from initial_lines") | |
return tuple(tuple(rprs) for rprs in representatives) | |
def get_correspondences( | |
img, | |
representatives, | |
pmat, | |
grid_dims, | |
template_dims=default_template_dims, | |
search_padding=default_search_padding, | |
method=cv2.TM_SQDIFF_NORMED, | |
score_finalizer=np.amin, | |
): | |
""" | |
Build an array of shape (len(representatives), grid_dims[0], grid_dims[1]) denoting the "scores" | |
of correspondence between each character/grid-position combination. | |
:param img: two-dimensional numpy array of monochrome image | |
:param representatives: character-grid coordinates of representatives for each character in | |
alphabet, as returned by ``get_char_representatives`` | |
:param pmat: perspective transform matrix from character-grid space to pixel space, as | |
returned by ``get_perspective_transform`` | |
:param grid_dims: dimensions of character-grid | |
""" | |
correspondences = np.zeros((len(representatives), *grid_dims), "float32") | |
# pre-allocate matchTemplate result array for heavy reuse | |
match_result = np.zeros(((2*search_padding[0])+1, (2*search_padding[1])+1, 1,), "float32") | |
for gridpos in product(range(grid_dims[0]), range(grid_dims[1])): | |
pxpos = np.int32(cv2.perspectiveTransform(np.float32(((gridpos,),)), pmat)[0][0]) | |
sample = img[ | |
pxpos[0]-search_padding[0]:pxpos[0]+template_dims[0]+search_padding[0], | |
pxpos[1]-search_padding[1]:pxpos[1]+template_dims[1]+search_padding[0], | |
0, | |
] | |
for char_i, char_rprs in enumerate(representatives): | |
finalized_results = [] | |
for t_gridpos in char_rprs: | |
t_pxpos = np.int32(cv2.perspectiveTransform(np.float32(((t_gridpos,),)), pmat)[0][0]) | |
template = img[ | |
t_pxpos[0]:t_pxpos[0]+template_dims[0], | |
t_pxpos[1]:t_pxpos[1]+template_dims[1], | |
0, | |
] | |
cv2.matchTemplate( | |
sample, | |
template, | |
method, | |
match_result, | |
) | |
finalized_results.append(score_finalizer(match_result)) | |
correspondences[(char_i, *gridpos,)] = score_finalizer(finalized_results) | |
return correspondences | |
def get_lines_from_correspondences_simple( | |
correspondences, | |
alphabet=default_alphabet, | |
selector=np.argmin, | |
): | |
""" | |
Return probable lines of target text guessed using extremely naive "best score wins" | |
:param correspondences: numpy array of character correspondences, as returned by | |
``get_correspondences`` | |
""" | |
return tuple( | |
"".join( | |
alphabet[selector(correspondences[:, y, x])] | |
for x in range(correspondences.shape[2]) | |
) for y in range(correspondences.shape[1]) | |
) | |
def get_contentions(correspondences, score_inverse=True): | |
""" | |
Return sequence of grid-coordinates, ordered by how small the score difference was in determining | |
the top match in ``correspondences``. | |
""" | |
return sorted( | |
(abs(sc[1]-sc[0]), gridpos) | |
for gridpos, sc in ( | |
(gridpos, sorted(correspondences[:,gridpos[0],gridpos[1]], reverse=(not score_inverse))) | |
for gridpos in product( | |
range(correspondences.shape[1]), | |
range(correspondences.shape[2]), | |
) | |
) | |
) | |
def occurrence_comparison( | |
img, | |
lines, | |
pmat, | |
alphabet=default_alphabet, | |
template_dims=default_template_dims, | |
search_padding=default_search_padding, | |
method=cv2.TM_SQDIFF_NORMED, | |
score_finalizer=np.amin, | |
): | |
""" | |
Compares each occurence of a character in the guessed output to all other occurrences. This output | |
can be used to try to identify the "odd ones out" in a character family, those presumably being | |
among the ones with the most dissimilarity. | |
:param img: two-dimensional numpy array of monochrome image | |
:param lines: sequence of strings of guessed contents of character grid | |
:param pmat: perspective transform matrix from character-grid space to pixel space, as returned | |
by ``get_perspective_transform`` | |
:returns: sequence of sequences of grid-position combinations and their respective similarity | |
scores, each entry being (score, grid_position0, grid_position1). grid-position | |
combinations are de-duplicated. | |
""" | |
char_locs = tuple([] for _ in alphabet) | |
similarities = tuple([] for _ in alphabet) | |
# pre-allocate matchTemplate result array for heavy reuse | |
match_result = np.zeros(((2*search_padding[0])+1, (2*search_padding[1])+1, 1,), "float32") | |
for y, line in enumerate(lines): | |
for x, char in enumerate(line): | |
char_i = alphabet.index(char) | |
pxpos = np.int32(cv2.perspectiveTransform(np.float32((((y, x,),),)), pmat)[0][0]) | |
sample = img[ | |
pxpos[0]-search_padding[0]:pxpos[0]+template_dims[0]+search_padding[0], | |
pxpos[1]-search_padding[1]:pxpos[1]+template_dims[1]+search_padding[0], | |
0, | |
] | |
new_similarities = [] | |
for t_gridpos in char_locs[char_i]: | |
t_pxpos = np.int32(cv2.perspectiveTransform(np.float32(((t_gridpos,),)), pmat)[0][0]) | |
template = img[ | |
t_pxpos[0]:t_pxpos[0]+template_dims[0], | |
t_pxpos[1]:t_pxpos[1]+template_dims[1], | |
0, | |
] | |
cv2.matchTemplate( | |
sample, | |
template, | |
method, | |
match_result, | |
) | |
new_similarities.append((score_finalizer(match_result), t_gridpos, (y, x),)) | |
similarities[char_i].extend(new_similarities) | |
char_locs[char_i].append((y, x)) | |
return similarities | |
def get_char_atlas( | |
img, | |
lines, | |
pmat, | |
alphabet=default_alphabet, | |
template_dims=default_template_dims, | |
search_padding=default_search_padding, | |
): | |
""" | |
Generates a "character atlas" image of all character samples in an image, grouped by character. | |
This allows "odd ones out" to be more easily identified by eye. | |
:param img: two-dimensional numpy array of monochrome image | |
:param lines: sequence of strings of guessed contents of character grid | |
:param pmat: perspective transform matrix from character-grid space to pixel space, as returned | |
by ``get_perspective_transform`` | |
:returns: tuple of output image followed by an index of character positions each sample in | |
output represents | |
""" | |
char_locs = tuple([] for _ in alphabet) | |
for y, line in enumerate(lines): | |
for x, char in enumerate(line): | |
char_locs[alphabet.index(char)].append((y, x)) | |
sample_dims = (template_dims[0] + (search_padding[0] * 2), template_dims[1] + (search_padding[1] * 2),) | |
atlas_width = max(len(locs) for locs in char_locs) * sample_dims[1] | |
atlas = np.zeros((sample_dims[0] * len(alphabet), atlas_width,), "uint8") | |
for atlas_grid_y, locs in enumerate(char_locs): | |
for atlas_grid_x, loc in enumerate(locs): | |
pxpos = np.int32(cv2.perspectiveTransform(np.float32(((loc,),)), pmat)[0][0]) | |
sample = img[ | |
pxpos[0]-search_padding[0]:pxpos[0]+template_dims[0]+search_padding[0], | |
pxpos[1]-search_padding[1]:pxpos[1]+template_dims[1]+search_padding[0], | |
0, | |
] | |
atlas[ | |
atlas_grid_y * sample_dims[0]:(atlas_grid_y * sample_dims[0]) + sample.shape[0], | |
atlas_grid_x * sample_dims[1]:(atlas_grid_x * sample_dims[1]) + sample.shape[1], | |
] = sample | |
return atlas, char_locs |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment