Skip to content

Instantly share code, notes, and snippets.

@mamrehn
Created December 31, 2017 01:55
Show Gist options
  • Save mamrehn/4f100eb7d5673554e2abd1a027f84a7f to your computer and use it in GitHub Desktop.
Save mamrehn/4f100eb7d5673554e2abd1a027f84a7f to your computer and use it in GitHub Desktop.
Extract all images from a pdf file and enhance text in those images.
import sys
from pathlib import Path
import numpy as np
from skimage.io import imread, imsave
from skimage.color import gray2rgb
def to_rgba(img_):
if 3 == img_.ndim:
if 4 == img_.shape[2]:
return img_
out = np.ones((*img_.shape[:2], 4), dtype=np.float64)
out[:, :, :3] = img_
return out
if 2 == img_.ndim:
return gray2rgb(i, alpha=True)
if '__main__' == __name__:
assert len(sys.argv) >= 3
dir_names = [Path(d) for d in sys.argv[1:]]
all_file_names = zip(*[d.glob('*.png') for d in dir_names])
for file_names in all_file_names:
images = [imread(fname=fn) for fn in file_names]
# combine
img_out = sum(to_rgba(i) for i in images) / len(images)
img_out /= np.amax(img_out)
img_out[:, :, 3] = 1 # reset alpha channel to no transparency
path_out = file_names[0].parent.parent.joinpath('out', file_names[0].name)
imsave(fname=path_out, arr=img_out) # save to png
# imsave(fname=(str(path_out)[:-3] + 'jpg'), arr=img_out[..., :-1]) # save to jpg
print(f'images saved at out/')
!#/bin/bash
mkdir denoised_img
mkdir cleaned_denoised_img
mkdir enhanced_denoised_img
mkdir out
# convert my_file.pdf images.png
# http://www.fmwconcepts.com/imagemagick/denoise/index.php
for file in img/*.png; do
./denoise -f 1 -s "20x20+203+152" $file "denoised_${file%.png}.png"
done
# http://www.fmwconcepts.com/imagemagick/textcleaner/index.php
for file in denoised_img/*.png; do
# ./textcleaner -g -e none -f 15 -o 20 $file "cleaned_${file%.png}.png"
./textcleaner -e normalize -f 15 -o 5 -S 200 $file "cleaned_${file%.png}.png"
done
for file in denoised_img/*.png; do
# convert -enhance -equalize -contrast $file "enhanced_${file%.png}.png"
convert -auto-gamma -auto-level -normalize $file "enhanced_${file%.png}.png"
done
python combine_images.py cleaned_denoised_img enhanced_denoised_img
for file in out/*.png; do
convert $file -quality 99 "${file%.png}.jpg"
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment