nekiee13/Layout.py

## Layout.py
import os
import numpy as np
import pandas as pd
import json
from PIL import Image
from matplotlib import pyplot as plt
import pytesseract
from layoutparser.models.detectron2.layoutmodel import Detectron2LayoutModel
from layoutparser.elements import Layout, TextBlock, Rectangle
from layoutparser.file_utils import is_torch_cuda_available #, PathManager
import warnings
from typing import Union

#Initialize model with local paths - set Detectron2LayoutModel Weights & configuration

#PubLayNet - mask_rcnn_R_50_FPN_3x
#config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\config.yml"
#model_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\model_final.pth"

#PubLayNet - mask_rcnn_X_101_32x8d_FPN_3x
config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\config.yaml"
model_path = "D:\\PDF\\vLayout\\xPrj\models\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\model_final.pth"

#PrimaLayout - mask_rcnn_R_50_FPN_3x
#config_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\config.yaml"
#model_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\model_final.pth"

model = Detectron2LayoutModel(config_path=config_path, model_path=model_path)

# Directories
input_dir = "D:\\PDF\\vLayout\\xPrj\\DocsIn"
output_dir = "D:\\PDF\\vLayout\\xPrj\\DocsOut"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Visualization function
def draw_box(image, layout, show_element_type=True, show_element_id=True, box_width=2, color_map={}):
    plt.figure(figsize=(10, 10))
    plt.imshow(image)
    for idx, block in enumerate(layout):
        color = color_map.get(block.type, 'red')
        plt.gca().add_patch(plt.Rectangle(
            (block.coordinates[0], block.coordinates[1]),
            block.coordinates[2] - block.coordinates[0],
            block.coordinates[3] - block.coordinates[1],
            fill=False,
            edgecolor=color,
            linewidth=box_width)
        )
        if show_element_type or show_element_id:
            text = f"{block.type} {idx}" if show_element_id else block.type
            plt.text(block.coordinates[0], block.coordinates[1], text, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    plt.show()

# OCR function
def perform_ocr(image, layout):
    ocr_data = []
    for block in layout:
        x1, y1, x2, y2 = map(int, block.coordinates)
        cropped_image = image[y1:y2, x1:x2]
        text = pytesseract.image_to_string(cropped_image)
        ocr_data.append({'block_type': block.type, 'text': text})
    return ocr_data

# Save results
def save_results(ocr_data, output_dir, base_filename):
    csv_path = os.path.join(output_dir, f"{base_filename}_OCRexport.csv")
    json_path = os.path.join(output_dir, f"{base_filename}_OCRexport.json")

    # Save to CSV
    pd.DataFrame(ocr_data).to_csv(csv_path, index=False)

    # Save to JSON
    with open(json_path, 'w') as f:
        json.dump(ocr_data, f)

# Process each image
for filename in os.listdir(input_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
        try:
            image_path = os.path.join(input_dir, filename)
            image = Image.open(image_path)
            processed_image = np.array(image)
            layout = model.detect(processed_image)
            ocr_data = perform_ocr(processed_image, layout)
            base_filename = os.path.splitext(filename)[0]
            save_results(ocr_data, output_dir, base_filename)
            draw_box(processed_image, layout)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
	import os
	import numpy as np
	import pandas as pd
	import json
	from PIL import Image
	from matplotlib import pyplot as plt
	import pytesseract
	from layoutparser.models.detectron2.layoutmodel import Detectron2LayoutModel
	from layoutparser.elements import Layout, TextBlock, Rectangle
	from layoutparser.file_utils import is_torch_cuda_available #, PathManager
	import warnings
	from typing import Union

	#Initialize model with local paths - set Detectron2LayoutModel Weights & configuration

	#PubLayNet - mask_rcnn_R_50_FPN_3x
	#config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\config.yml"
	#model_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_R_50_FPN_3x\\model_final.pth"

	#PubLayNet - mask_rcnn_X_101_32x8d_FPN_3x
	config_path = "D:\\PDF\\vLayout\\xPrj\\models\\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\config.yaml"
	model_path = "D:\\PDF\\vLayout\\xPrj\models\PubLayNet\\mask_rcnn_X_101_32x8d_FPN_3x\\model_final.pth"

	#PrimaLayout - mask_rcnn_R_50_FPN_3x
	#config_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\config.yaml"
	#model_path = r"D:\PDF\vLayout\xPrj\models\PrimaLayout\mask_rcnn_R_50_FPN_3x\model_final.pth"

	model = Detectron2LayoutModel(config_path=config_path, model_path=model_path)

	# Directories
	input_dir = "D:\\PDF\\vLayout\\xPrj\\DocsIn"
	output_dir = "D:\\PDF\\vLayout\\xPrj\\DocsOut"

	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	# Visualization function
	def draw_box(image, layout, show_element_type=True, show_element_id=True, box_width=2, color_map={}):
	plt.figure(figsize=(10, 10))
	plt.imshow(image)
	for idx, block in enumerate(layout):
	color = color_map.get(block.type, 'red')
	plt.gca().add_patch(plt.Rectangle(
	(block.coordinates[0], block.coordinates[1]),
	block.coordinates[2] - block.coordinates[0],
	block.coordinates[3] - block.coordinates[1],
	fill=False,
	edgecolor=color,
	linewidth=box_width)
	)
	if show_element_type or show_element_id:
	text = f"{block.type} {idx}" if show_element_id else block.type
	plt.text(block.coordinates[0], block.coordinates[1], text, fontsize=12, bbox=dict(facecolor='yellow', alpha=0.5))
	plt.axis('off')
	plt.show()

	# OCR function
	def perform_ocr(image, layout):
	ocr_data = []
	for block in layout:
	x1, y1, x2, y2 = map(int, block.coordinates)
	cropped_image = image[y1:y2, x1:x2]
	text = pytesseract.image_to_string(cropped_image)
	ocr_data.append({'block_type': block.type, 'text': text})
	return ocr_data

	# Save results
	def save_results(ocr_data, output_dir, base_filename):
	csv_path = os.path.join(output_dir, f"{base_filename}_OCRexport.csv")
	json_path = os.path.join(output_dir, f"{base_filename}_OCRexport.json")

	# Save to CSV
	pd.DataFrame(ocr_data).to_csv(csv_path, index=False)

	# Save to JSON
	with open(json_path, 'w') as f:
	json.dump(ocr_data, f)

	# Process each image
	for filename in os.listdir(input_dir):
	if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif')):
	try:
	image_path = os.path.join(input_dir, filename)
	image = Image.open(image_path)
	processed_image = np.array(image)
	layout = model.detect(processed_image)
	ocr_data = perform_ocr(processed_image, layout)
	base_filename = os.path.splitext(filename)[0]
	save_results(ocr_data, output_dir, base_filename)
	draw_box(processed_image, layout)
	except Exception as e:
	print(f"Error processing {filename}: {e}")