Skip to content

Instantly share code, notes, and snippets.

@mahmoudimus
Forked from flaviut/draw_box_file_data.py
Created May 29, 2023 22:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mahmoudimus/a4a489e058644cfe577ed09d022537d3 to your computer and use it in GitHub Desktop.
Save mahmoudimus/a4a489e058644cfe577ed09d022537d3 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""Script to draw bounding boxes and text from a Tesseract box file.
The script takes an image TIFF_FILE, draws the text and bounding boxes
of the corresponding BOX_FILE and saves the resulting OUT_FILE.
Tesseract box file columns:
<symbol> <left> <bottom> <right> <top> <page>
"""
import pandas as pd
from PIL import Image, ImageDraw, ImageFont
from csv import reader
FONT = '/usr/share/fonts/truetype/dejavu/DejaVuSansMono.ttf'
FONT_SIZE = 26
TIFF_FILE = 'val/val_invoice.tiff'
BOX_FILE = 'val/val_invoice.box'
OUT_FILE = 'val/val_invoice_ocr_deu.tiff'
def draw_tesseract_box(image_path, box_path):
"""Draw a box file coordinates as rectangles on an image. """
image = Image.open(image_path)
image_height = image.height
df_text = get_tesseract_coords(box_path)
for _, row in df_text.iterrows():
symbol = row['symbol'].replace('\t', '')
pillow_coords = convert_coords_to_pillow(row, image_height)
image = draw_bb_text(image, pillow_coords, symbol)
image.save(OUT_FILE)
def get_tesseract_coords(box_path):
"""Get the Tesseract text and bb coordinates. """
csv_reader = reader(open(box_path), delimiter=' ')
names = ['symbol', 'left', 'bottom', 'right', 'top', 'page']
box_data = []
for row in csv_reader:
# tesseract uses two empty strings as space
if row[0] == '' and row[1] == '':
row[0] = ' '
del row[1]
box_data.append(row)
df = pd.DataFrame.from_records(box_data, columns=names)
df_text = df.groupby([
'left', 'bottom', 'right', 'top', 'page'])['symbol'].apply(
lambda x: ''.join(x.values.tolist())
).reset_index(drop=False)
return df_text
def draw_bb_text(image, coords, text):
"""Draw a bb and write text from tesseract box file data. """
draw = ImageDraw.Draw(image)
shape = [
(coords['left'], coords['top']),
(coords['right'], coords['bottom'])
]
draw.rectangle(xy=shape, outline="green")
unicode_font = ImageFont.truetype(FONT, FONT_SIZE)
text_coord = (
coords['left'],
coords['top'] + (coords['top'] - coords['bottom'])
)
draw.text(text_coord, text, font=unicode_font, fill="red")
print(text)
return image
def convert_coords_to_pillow(coords_tess, image_height):
"""Convert coords from Tesseract boxfile coords to pillow coords. """
coords_json = {
'left': int(coords_tess['left']),
'top': image_height - int(coords_tess['top']),
'right': int(coords_tess['right']),
'bottom': image_height - int(coords_tess['bottom'])
}
return coords_json
def main():
box_path = BOX_FILE
image_path = TIFF_FILE
draw_tesseract_box(image_path, box_path)
if __name__ == '__main__':
main()
/usr/local/bin/lstmtraining \
--model_output output/fine_tuned \
--continue_from lstm_model/deu.lstm \
--traineddata tesseract/tessdata/best/deu.traineddata \
--train_listfile train/deu.training_files.txt \
--eval_listfile eval/deu.training_files.txt \
--max_iterations 400
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment