Skip to content

Instantly share code, notes, and snippets.

@justinvh
Created April 8, 2014 04:31
Show Gist options
  • Save justinvh/10091078 to your computer and use it in GitHub Desktop.
Save justinvh/10091078 to your computer and use it in GitHub Desktop.
fft rotate for binarized text
import sys
import numpy as np
from PIL import Image
binarized_text = sys.argv[1] if len(sys.argv) == 2 else 'text.png'
# Binarized (1-bit image)
data = np.array(Image.open(binarized_text))
Image.fromarray(np.uint8(data * 255)).show()
# 2D FFT, determine the max peak
fft = np.fft.fftshift(np.fft.fft2(data))
max_peak = np.max(np.abs(fft))
# Threshold the lower 25% of the peak
fft[fft < (max_peak * 0.25)] = 0
# Log-scale the data
abs_data = 1 + np.abs(fft)
c = 255.0 / np.log(1 + max_peak)
log_data = c * np.log(abs_data)
# Find two points within 90% of the max peak of the scaled image
max_scaled_peak = np.max(log_data)
# Determine the angle of two high-peak points in the image
rows, cols = np.where(log_data > (max_scaled_peak * 0.90))
min_col, max_col = np.min(cols), np.max(cols)
min_row, max_row = np.min(rows), np.max(rows)
dy, dx = max_col - min_col, max_row - min_row
theta = np.arctan(dy / float(dx))
cos_theta = np.cos(theta)
sin_theta = np.sin(theta)
# Translate and scale the image by the value we found
width, height = data.shape
cx, cy = width / 2, height / 2
new_image = np.zeros(data.shape)
for x, row in enumerate(data):
for y, value in enumerate(row):
xp = cx + (x - cx) * cos_theta - (y - cy) * sin_theta
yp = cy + (x - cx) * sin_theta + (y - cy) * cos_theta
if xp < 0 or yp < 0 or xp > width or yp > height:
continue
new_image[xp, yp] = data[x, y]
# Since it is a binarized image, multiply by 255
Image.fromarray(np.uint8(new_image) * 255).show()
@mbartoli
Copy link

great work, thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment