Skip to content

Instantly share code, notes, and snippets.

@Wuvist
Created December 25, 2023 15:23
Show Gist options
  • Save Wuvist/2fd232d0417954d5141f50bd3d319c27 to your computer and use it in GitHub Desktop.
Save Wuvist/2fd232d0417954d5141f50bd3d319c27 to your computer and use it in GitHub Desktop.
Python Script to process scanned book pages
from PIL import Image, ImageOps
import cv2
import numpy as np
import pytesseract
pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files\\Tesseract-OCR\\tesseract.exe'
crop_w = 1310
crop_h = 2020
# 度数转换
def DegreeTrans(theta):
res = theta / np.pi * 180
return res
# 逆时针旋转图像degree角度(原尺寸)
def rotateImage(src, degree):
# 旋转中心为图像中心
h, w = src.shape[:2]
# 计算二维旋转的仿射变换矩阵
RotateMatrix = cv2.getRotationMatrix2D((w/2.0, h/2.0), degree, 1)
# 仿射变换,背景色填充为白色
rotate = cv2.warpAffine(src, RotateMatrix, (w, h),
borderValue=(255, 255, 255))
return rotate
# 通过霍夫变换计算角度
def CalcDegree(srcImage):
midImage = cv2.cvtColor(srcImage, cv2.COLOR_BGR2GRAY)
dstImage = cv2.Canny(midImage, 50, 200, 3)
# 通过霍夫变换检测直线
# 第4个参数就是阈值,阈值越大,检测精度越高
# 如果第4个参数过小,有些(例如垂直)无法矫正
lines = cv2.HoughLines(dstImage, 1, np.pi/180, 300)
if lines is None:
return 0
sum = 0
counts = 0
# 依次画出每条线段
for i in range(len(lines)):
for rho, theta in lines[i]:
# print("theta:", theta, " rho:", rho)
a = np.cos(theta)
b = np.sin(theta)
x0 = a * rho
y0 = b * rho
x1 = int(round(x0 + 1000 * (-b)))
y1 = int(round(y0 + 1000 * a))
x2 = int(round(x0 - 1000 * (-b)))
y2 = int(round(y0 - 1000 * a))
# 只选角度最小的作为旋转角度
if abs((y1-y2)) < 100:
sum += theta
counts += 1
if counts == 0:
return 0
# 对所有角度求平均,这样做旋转效果会更好
average = sum / counts
angle = DegreeTrans(average) - 90
return angle
def save(image, fname):
open_cv_image = np.array(image)
degree = CalcDegree(open_cv_image)
print(fname, degree)
rotate = rotateImage(open_cv_image, degree)
image = Image.fromarray(rotate).convert('L')
image.save(fname)
pdf = pytesseract.image_to_pdf_or_hocr(fname, extension='pdf')
with open(fname+'.pdf', 'w+b') as f:
f.write(pdf)
for i in range(1, 264):
fname = str(i) + ".jpg"
im = Image.open(fname)
out_fname = "book_" + str(i) + ".jpg"
w, h = im.size
if w > crop_w and h > crop_h:
left = (w - crop_w)/2
top = (h - crop_h)/2
right = w - left
bottom = h - top
im1 = ImageOps.expand(
im.crop((left, top, right, bottom)), border=20, fill=(255, 255, 255))
save(im1, out_fname)
elif h > crop_h:
left = 0
top = (h - crop_h)/2
right = w
bottom = h - top
im1 = ImageOps.expand(
im.crop((left, top, right, bottom)), border=20, fill=(255, 255, 255))
save(im1, out_fname)
elif w > crop_w:
left = (w - crop_w)/2
top = 0
right = w - left
bottom = h
im1 = ImageOps.expand(
im.crop((left, top, right, bottom)), border=20, fill=(255, 255, 255))
save(im1, out_fname)
else:
print(fname)
save(im, out_fname)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment