Skip to content

Instantly share code, notes, and snippets.

@mszegedy
Last active August 5, 2020 09:20
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mszegedy/f5100d7c67cad4362e6c4be44a85621a to your computer and use it in GitHub Desktop.
Save mszegedy/f5100d7c67cad4362e6c4be44a85621a to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# crops a series of screenshots, splits them based on vertical blackspace (i.e.
# completely black scanlines), then aligns and merges them by y-concatenation.
# if a piece can't be y-catted to the previous one, a new file is created for
# it. this has the use of ripping ebooks that have been pirated from temporary
# views via screenshots, such as those lent out by archive.org.
import os
from os.path import join
import sys
import numpy as np
from PIL import Image
SOURCE_PATH = 'source/'
OUT_PATH = 'images/'
CROPBOX = (538, 186, 1369, 1046)
MIN_HEIGHT = 830
EXTENSION = 'jpg'
def is_black(im):
x, y = im.size
return (x*y, (0, 0, 0)) in im.getcolors(x*y)
def is_same(im_1, im_2):
return (np.asarray(im_1) == np.asarray(im_2)).all()
def scanline(im, i):
return im.crop((0, i, im.size[0], i+1))
def scanlines(im):
return (scanline(im, i) for i in range(im.size[1]))
def vcat(im_1, im_2):
return Image.fromarray(np.vstack((np.asarray(im_1), np.asarray(im_2))))
def align_and_merge(im_1, im_2):
width = im_1.size[0]
assert width == im_2.size[0]
offset = None
first_line = scanline(im_2, 0)
for y, line in enumerate(scanlines(im_1)):
if is_same(line, first_line):
offset = y
if offset == None:
raise ValueError('Images don\'t overlap.')
return vcat(im_1, im_2.crop((0, im_1.size[1]-offset, *im_2.size)))
def parts_from_path(path, source_im=None):
im = Image.open(path)
im = im.crop(CROPBOX)
parts = []
black = True
source_i = None
for i in range(im.size[1]):
line = scanline(im, i)
if is_black(line):
black = True
continue
if black or not parts:
black = False
parts.append(line)
continue
parts[-1] = vcat(parts[-1], line)
if source_im is not None:
try:
parts[0] = align_and_merge(source_im, parts[0])
except ValueError:
parts.insert(0, source_im)
return parts
def output():
global last_part
global parts
global name_i
for i, im in enumerate(parts[:-1]):
if im.size[1] >= MIN_HEIGHT:
im.save(join(OUT_PATH, '.'.join((str(name_i+i), EXTENSION.lower()))))
last_part = parts[-1]
name_i = name_i + len(parts) - 1
if __name__ == '__main__':
paths = os.listdir(SOURCE_PATH)
paths.sort()
last_part = None
name_i = 0
for path in paths:
path = join(SOURCE_PATH, path)
parts = parts_from_path(path, last_part)
for i, im in enumerate(parts[:-1]):
if im.size[1] >= MIN_HEIGHT:
im.save(join(OUT_PATH,
'.'.join((str(name_i+i), EXTENSION.lower()))))
last_part = parts[-1]
name_i = name_i + len(parts) - 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment