Su-s/PyTessBaseAPI.py

## PyTessBaseAPI.py
import io
from PIL import Image
from fpdf import FPDF
from wand.image import Image as wi
import cv2
import numpy as np
import tesserocr as tr
import os


api = tr.PyTessBaseAPI()
try:
    pdf = wi(filename = "abc.pdf", resolution = 300)

    pdfImage = pdf.convert('jpeg')
    imageBlobs = []

    for img in pdfImage.sequence:
        imgPage = wi(image = img)
        imageBlobs.append(imgPage.make_blob('jpeg'))

    recognized_text = []
    box_list = {}
    count = 0

    for imgBlob in imageBlobs:
        im = Image.open(io.BytesIO(imgBlob))
        api.SetImage(im)
        boxes = api.GetComponentImages(tr.RIL.TEXTLINE,True)
        text = api.GetUTF8Text()
        cv_img = np.array(im)
        for (im,box,_,_) in boxes:
            x,y,w,h = box['x'],box['y'],box['w'],box['h']
            cv2.rectangle(cv_img, (x,y), (x+w,y+h), color=(0,255,0))
        fname = 'result'+str(count)+'.png'
        cv2.imwrite(fname, cv_img)
        count += 1

    pdf = FPDF()
    for file in os.listdir():
        if file.endswith(".png") or file.endswith(".PNG"):
            img=os.path.join(file)
            pdf.add_page()
            pdf.set_font('Arial', 'B', 16)
            pdf.image('%s'%img, 3,3,204 )
            os.remove('%s'%img)
    pdf.output('processed.pdf')
except Exception as e:
    print(e)

finally:
    api.End()
	import io
	from PIL import Image
	from fpdf import FPDF
	from wand.image import Image as wi
	import cv2
	import numpy as np
	import tesserocr as tr
	import os


	api = tr.PyTessBaseAPI()
	try:
	pdf = wi(filename = "abc.pdf", resolution = 300)

	pdfImage = pdf.convert('jpeg')
	imageBlobs = []

	for img in pdfImage.sequence:
	imgPage = wi(image = img)
	imageBlobs.append(imgPage.make_blob('jpeg'))

	recognized_text = []
	box_list = {}
	count = 0

	for imgBlob in imageBlobs:
	im = Image.open(io.BytesIO(imgBlob))
	api.SetImage(im)
	boxes = api.GetComponentImages(tr.RIL.TEXTLINE,True)
	text = api.GetUTF8Text()
	cv_img = np.array(im)
	for (im,box,_,_) in boxes:
	x,y,w,h = box['x'],box['y'],box['w'],box['h']
	cv2.rectangle(cv_img, (x,y), (x+w,y+h), color=(0,255,0))
	fname = 'result'+str(count)+'.png'
	cv2.imwrite(fname, cv_img)
	count += 1

	pdf = FPDF()
	for file in os.listdir():
	if file.endswith(".png") or file.endswith(".PNG"):
	img=os.path.join(file)
	pdf.add_page()
	pdf.set_font('Arial', 'B', 16)
	pdf.image('%s'%img, 3,3,204 )
	os.remove('%s'%img)
	pdf.output('processed.pdf')
	except Exception as e:
	print(e)

	finally:
	api.End()