Skip to content

Instantly share code, notes, and snippets.

@nekiee13
Created November 30, 2023 22:10
Show Gist options
  • Save nekiee13/2fdc9f404b194abb58f2246eb5891a62 to your computer and use it in GitHub Desktop.
Save nekiee13/2fdc9f404b194abb58f2246eb5891a62 to your computer and use it in GitHub Desktop.
Unexpected Layoutparser output
import os
import numpy as np
import pandas as pd
import json
from PIL import Image
from matplotlib import pyplot as plt
import pytesseract
from layoutparser.models.detectron2.layoutmodel import Detectron2LayoutModel
from layoutparser.elements import Layout, TextBlock, Rectangle
from layoutparser.file_utils import is_torch_cuda_available #, PathManager
import warnings
from typing import Union
#Initialize model with local paths - set Detectron2LayoutModel Weights & configuration
#PubLayNet - mask_rcnn_R_50_FPN_3x
#config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\config.yml"
#model_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\model_final.pth"
#PubLayNet - mask_rcnn_X_101_32x8d_FPN_3x
config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\config.yaml"
model_path = "D:\\PDF\\vLayout\\xPrj\models\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\model_final.pth"
#PrimaLayout - mask_rcnn_R_50_FPN_3x
#config_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\config.yaml"
#model_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\model_final.pth"
model = Detectron2LayoutModel(config_path=config_path, model_path=model_path)
# Directories
input_dir = "D:\\PDF\\vLayout\\xPrj\\DocsIn"
output_dir = "D:\\PDF\\vLayout\\xPrj\\DocsOut"
if not os.path.exists(output_dir):
os.makedirs(output_dir)
# Visualization function
def draw_box(image, layout, show_element_type=True, show_element_id=True, box_width=2, color_map={}):
plt.figure(figsize=(10, 10))
plt.imshow(image)
for idx, block in enumerate(layout):
color = color_map.get(block.type, 'red')
plt.gca().add_patch(plt.Rectangle(
(block.coordinates[0], block.coordinates[1]),
block.coordinates[2] - block.coordinates[0],
block.coordinates[3] - block.coordinates[1],
fill=False,
edgecolor=color,
linewidth=box_width)
)
if show_element_type or show_element_id:
text = f"{block.type} {idx}" if show_element_id else block.type
plt.text(block.coordinates[0], block.coordinates[1], text, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))
plt.axis('off')
plt.show()
# OCR function
def perform_ocr(image, layout):
ocr_data = []
for block in layout:
x1, y1, x2, y2 = map(int, block.coordinates)
cropped_image = image[y1:y2, x1:x2]
text = pytesseract.image_to_string(cropped_image)
ocr_data.append({'block_type': block.type, 'text': text})
return ocr_data
# Save results
def save_results(ocr_data, output_dir, base_filename):
csv_path = os.path.join(output_dir, f"{base_filename}_OCRexport.csv")
json_path = os.path.join(output_dir, f"{base_filename}_OCRexport.json")
# Save to CSV
pd.DataFrame(ocr_data).to_csv(csv_path, index=False)
# Save to JSON
with open(json_path, 'w') as f:
json.dump(ocr_data, f)
# Process each image
for filename in os.listdir(input_dir):
if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
try:
image_path = os.path.join(input_dir, filename)
image = Image.open(image_path)
processed_image = np.array(image)
layout = model.detect(processed_image)
ocr_data = perform_ocr(processed_image, layout)
base_filename = os.path.splitext(filename)[0]
save_results(ocr_data, output_dir, base_filename)
draw_box(processed_image, layout)
except Exception as e:
print(f"Error processing {filename}: {e}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment