Skip to content

Instantly share code, notes, and snippets.

@risicle
Created March 3, 2019 16:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save risicle/b9548ce4b5552b1542e39c475cc5f8e9 to your computer and use it in GitHub Desktop.
Save risicle/b9548ce4b5552b1542e39c475cc5f8e9 to your computer and use it in GitHub Desktop.
Collection of utility functions to implement rudimentary template-matching based OCR using OpenCV, intended for use with large, uniform blocks of monospaced, non-language-correlated text (effectively resulting in a "character grid") where a 100.0% accuracy is required. This could be useful if one, for example, found themselves needing to recover…
"""
Collection of utility functions to implement rudimentary template-matching based OCR using OpenCV,
intended for use with large, uniform blocks of monospaced, non-language-correlated text (effectively
resulting in a "character grid") where a 100.0% accuracy is required. This could be useful if one, for
example, found themselves needing to recover a hardcopy-printed backup of an OpenPGP key.
Modern general-purpose OCR software doesn't tend to do that well with these, relying heavily on
language-context-based guessing, often trying to be too clever in layout auto-detection, all the while
being extremely tricky to configure precisely enough to allow us to impart our existing knowledge
about the constraints of the target text (expected alphabet, line length, expected font...).
"""
from itertools import chain, product
import cv2
import numpy as np
#
# following numpy convention, coordinates and dimensions are specified in (y, x) order.
#
default_alphabet = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
default_template_dims = (36, 24,)
default_search_padding = (4, 4,)
def get_perspective_transform(
px_coords,
grid_dims,
):
"""
Return a perspective transform matrix which will transform from character-grid coordinates
to pixel coordinates of an image.
:param px_coords: sequence of pixel coordinates defining the four corners of the character grid
in the image - ordered top left, top right, bottom left, bottom right
:param grid_dims: dimensions of character-grid
"""
return cv2.getPerspectiveTransform(
np.float32(tuple(product(*((0, a) for a in grid_dims)))),
np.float32(px_coords),
)
def get_char_representatives(
initial_lines,
grid_dims,
alphabet=default_alphabet,
):
"""
Given some initial_lines of a character grid, will generate a sequence of sequences of
character-grid coordinates of positions that can be used as representatives of a particular
character for comparisons.
:param initial_lines: the (known) initial lines of this character grid, the more the better. at
least enough will be needed so that each character in the alphabet has
at least one example appearance.
:param grid_dims: dimensions of character-grid
"""
if any(len(line) != grid_dims[1] for line in initial_lines[:-1]):
raise ValueError("All non-final initial_lines must be of length grid_dims[1]")
representatives = tuple([] for _ in alphabet)
for y, line in enumerate(initial_lines):
for x, char in enumerate(line):
representatives[alphabet.index(char)].append((y, x))
for i, rprs in enumerate(representatives):
if not rprs:
raise ValueError(f"Alphabet character {alphabet.index(i)!r} missing from initial_lines")
return tuple(tuple(rprs) for rprs in representatives)
def get_correspondences(
img,
representatives,
pmat,
grid_dims,
template_dims=default_template_dims,
search_padding=default_search_padding,
method=cv2.TM_SQDIFF_NORMED,
score_finalizer=np.amin,
):
"""
Build an array of shape (len(representatives), grid_dims[0], grid_dims[1]) denoting the "scores"
of correspondence between each character/grid-position combination.
:param img: two-dimensional numpy array of monochrome image
:param representatives: character-grid coordinates of representatives for each character in
alphabet, as returned by ``get_char_representatives``
:param pmat: perspective transform matrix from character-grid space to pixel space, as
returned by ``get_perspective_transform``
:param grid_dims: dimensions of character-grid
"""
correspondences = np.zeros((len(representatives), *grid_dims), "float32")
# pre-allocate matchTemplate result array for heavy reuse
match_result = np.zeros(((2*search_padding[0])+1, (2*search_padding[1])+1, 1,), "float32")
for gridpos in product(range(grid_dims[0]), range(grid_dims[1])):
pxpos = np.int32(cv2.perspectiveTransform(np.float32(((gridpos,),)), pmat)[0][0])
sample = img[
pxpos[0]-search_padding[0]:pxpos[0]+template_dims[0]+search_padding[0],
pxpos[1]-search_padding[1]:pxpos[1]+template_dims[1]+search_padding[0],
0,
]
for char_i, char_rprs in enumerate(representatives):
finalized_results = []
for t_gridpos in char_rprs:
t_pxpos = np.int32(cv2.perspectiveTransform(np.float32(((t_gridpos,),)), pmat)[0][0])
template = img[
t_pxpos[0]:t_pxpos[0]+template_dims[0],
t_pxpos[1]:t_pxpos[1]+template_dims[1],
0,
]
cv2.matchTemplate(
sample,
template,
method,
match_result,
)
finalized_results.append(score_finalizer(match_result))
correspondences[(char_i, *gridpos,)] = score_finalizer(finalized_results)
return correspondences
def get_lines_from_correspondences_simple(
correspondences,
alphabet=default_alphabet,
selector=np.argmin,
):
"""
Return probable lines of target text guessed using extremely naive "best score wins"
:param correspondences: numpy array of character correspondences, as returned by
``get_correspondences``
"""
return tuple(
"".join(
alphabet[selector(correspondences[:, y, x])]
for x in range(correspondences.shape[2])
) for y in range(correspondences.shape[1])
)
def get_contentions(correspondences, score_inverse=True):
"""
Return sequence of grid-coordinates, ordered by how small the score difference was in determining
the top match in ``correspondences``.
"""
return sorted(
(abs(sc[1]-sc[0]), gridpos)
for gridpos, sc in (
(gridpos, sorted(correspondences[:,gridpos[0],gridpos[1]], reverse=(not score_inverse)))
for gridpos in product(
range(correspondences.shape[1]),
range(correspondences.shape[2]),
)
)
)
def occurrence_comparison(
img,
lines,
pmat,
alphabet=default_alphabet,
template_dims=default_template_dims,
search_padding=default_search_padding,
method=cv2.TM_SQDIFF_NORMED,
score_finalizer=np.amin,
):
"""
Compares each occurence of a character in the guessed output to all other occurrences. This output
can be used to try to identify the "odd ones out" in a character family, those presumably being
among the ones with the most dissimilarity.
:param img: two-dimensional numpy array of monochrome image
:param lines: sequence of strings of guessed contents of character grid
:param pmat: perspective transform matrix from character-grid space to pixel space, as returned
by ``get_perspective_transform``
:returns: sequence of sequences of grid-position combinations and their respective similarity
scores, each entry being (score, grid_position0, grid_position1). grid-position
combinations are de-duplicated.
"""
char_locs = tuple([] for _ in alphabet)
similarities = tuple([] for _ in alphabet)
# pre-allocate matchTemplate result array for heavy reuse
match_result = np.zeros(((2*search_padding[0])+1, (2*search_padding[1])+1, 1,), "float32")
for y, line in enumerate(lines):
for x, char in enumerate(line):
char_i = alphabet.index(char)
pxpos = np.int32(cv2.perspectiveTransform(np.float32((((y, x,),),)), pmat)[0][0])
sample = img[
pxpos[0]-search_padding[0]:pxpos[0]+template_dims[0]+search_padding[0],
pxpos[1]-search_padding[1]:pxpos[1]+template_dims[1]+search_padding[0],
0,
]
new_similarities = []
for t_gridpos in char_locs[char_i]:
t_pxpos = np.int32(cv2.perspectiveTransform(np.float32(((t_gridpos,),)), pmat)[0][0])
template = img[
t_pxpos[0]:t_pxpos[0]+template_dims[0],
t_pxpos[1]:t_pxpos[1]+template_dims[1],
0,
]
cv2.matchTemplate(
sample,
template,
method,
match_result,
)
new_similarities.append((score_finalizer(match_result), t_gridpos, (y, x),))
similarities[char_i].extend(new_similarities)
char_locs[char_i].append((y, x))
return similarities
def get_char_atlas(
img,
lines,
pmat,
alphabet=default_alphabet,
template_dims=default_template_dims,
search_padding=default_search_padding,
):
"""
Generates a "character atlas" image of all character samples in an image, grouped by character.
This allows "odd ones out" to be more easily identified by eye.
:param img: two-dimensional numpy array of monochrome image
:param lines: sequence of strings of guessed contents of character grid
:param pmat: perspective transform matrix from character-grid space to pixel space, as returned
by ``get_perspective_transform``
:returns: tuple of output image followed by an index of character positions each sample in
output represents
"""
char_locs = tuple([] for _ in alphabet)
for y, line in enumerate(lines):
for x, char in enumerate(line):
char_locs[alphabet.index(char)].append((y, x))
sample_dims = (template_dims[0] + (search_padding[0] * 2), template_dims[1] + (search_padding[1] * 2),)
atlas_width = max(len(locs) for locs in char_locs) * sample_dims[1]
atlas = np.zeros((sample_dims[0] * len(alphabet), atlas_width,), "uint8")
for atlas_grid_y, locs in enumerate(char_locs):
for atlas_grid_x, loc in enumerate(locs):
pxpos = np.int32(cv2.perspectiveTransform(np.float32(((loc,),)), pmat)[0][0])
sample = img[
pxpos[0]-search_padding[0]:pxpos[0]+template_dims[0]+search_padding[0],
pxpos[1]-search_padding[1]:pxpos[1]+template_dims[1]+search_padding[0],
0,
]
atlas[
atlas_grid_y * sample_dims[0]:(atlas_grid_y * sample_dims[0]) + sample.shape[0],
atlas_grid_x * sample_dims[1]:(atlas_grid_x * sample_dims[1]) + sample.shape[1],
] = sample
return atlas, char_locs
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment