-
-
Save vonavi/1368605a4700008ba38278b0f6bc2fde to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3 | |
import os | |
import argparse | |
import random | |
import fitz | |
class Config: | |
def get_mark_range(self, y, mark): | |
rel_pos = (self.bottom - y) / (self.bottom - self.top) | |
if mark: | |
alpha = rel_pos + 2.0 * (1 - rel_pos) | |
mark_range = (0.5 * alpha, 1.0 * alpha) | |
else: | |
alpha = rel_pos + 1.0 * (1 - rel_pos) | |
mark_range = (1.0 * alpha, 1.5 * alpha) | |
return mark_range | |
def __init__(self, debug): | |
if debug: | |
# https://htmlcolorcodes.com/color-chart/material-design-color-chart/ | |
self.color = (255, 87, 51) | |
def get_width(top, bottom, mark): | |
mark_range = self.get_mark_range(top, mark) | |
height = bottom - top | |
return height * 0.5 * sum(mark_range) | |
self.get_width = get_width | |
else: | |
# https://htmlcolorcodes.com/color-chart/material-design-color-chart/ | |
self.color = (255, 249, 196) | |
def get_width(top, bottom, mark): | |
mark_range = self.get_mark_range(top, mark) | |
height = bottom - top | |
return height * random.uniform(*mark_range) | |
self.get_width = get_width | |
class Paint: | |
def __init__(self): | |
self.__mark = True | |
self.__next_w = None | |
self.__offset = 0 | |
def generate(self, rect, cfg): | |
if self.__next_w is None: | |
self.__next_w = cfg.get_width(rect.y0, rect.y1, self.__mark) | |
extra_width = rect.width + self.__offset | |
while True: | |
if self.__mark: | |
mark_left = rect.x1 - extra_width | |
r = fitz.Rect(mark_left, rect.y0, | |
mark_left + self.__next_w, rect.y1) | |
yield r & rect | |
if extra_width < self.__next_w: | |
break | |
extra_width -= self.__next_w | |
self.__mark = not self.__mark | |
self.__next_w = cfg.get_width(rect.y0, rect.y1, self.__mark) | |
extra_width += rect.width | |
self.__offset = extra_width - rect.width | |
def page_paint(page, cfg): | |
rect = page.rect | |
cfg.top = rect.y0 | |
cfg.bottom = rect.y1 | |
paint = Paint() | |
page_dict = page.getText('dict') | |
for block in page_dict['blocks']: | |
if block['type'] != 0: | |
continue | |
for line in block['lines']: | |
for r in paint.generate(fitz.Rect(line['bbox']), cfg): | |
# create a pixmap with RGB as colorspace and bounded by irect | |
pm = fitz.Pixmap(fitz.Colorspace(fitz.CS_RGB), r.round()) | |
pm.clearWith(0xff) | |
pm.tintWith(*cfg.color) | |
page.insertImage(r, pixmap=pm, overlay=True) | |
def pdf_paint(pdf, pages, cfg): | |
doc = fitz.open(pdf) | |
# Restrict pages to those presented in document | |
pages = pages & set(range(doc.pageCount)) | |
page_count = len(pages) | |
for count, page_num in enumerate(sorted(pages)): | |
page = doc[page_num] | |
print('Processing page {} / {}...'.format(count + 1, page_count), | |
end='\r') | |
page_paint(page, cfg) | |
print() | |
basename, ext = os.path.splitext(pdf) | |
doc.save(basename + '_paint' + ext) | |
doc.close() | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('pdf', type=str, help='PDF file') | |
parser.add_argument('-p', '--pages', required=True, type=int, nargs='+', | |
help='Pages to paint') | |
parser.add_argument('-d', '--debug', action='store_true', | |
help='Debug the script') | |
args = parser.parse_args() | |
# Pass zero-based pages to function | |
pages = set(map(lambda n: n - 1, args.pages)) | |
pdf_paint(args.pdf, pages, Config(args.debug)) |
PyMuPDF>=1.14,<=1.14.10 |
Sorry for a delay. I will check item 1 soon and fix the script in accordance. As for item 2, I agree to add such a parameter as "difficulty". The problem is how to formulate the difficulty itself when we need to control four parameters. Do you have a hint?
No worries. This script depends on the library, so issue with version will happen again and again I guess.
The problem is how to formulate the difficulty itself when we need to control four parameters. Do you have a hint?
I couldn't wrap my head around those parameters. I just tweaked them till result was acceptable. Image below generated by parameters - "mark_range = (1.0 * alpha, 0.3 * alpha)" - if I remember correctly, code was lost.
I think by experimenting one could come up to 3-4 configurations corresponding to easy-...-hard difficulty. I can do that later (by later I mean "don't know when"). No need for you to make any changes for now.
I'm not suggesting for you to make actual git repo, but it will allow making pull requests.
Tried to launch script today.
Needed to upgrade dependency from "PyMuPDF>=1.14,<=1.14.10" to "PyMuPDF>=1.14,<=1.14.20".
Also tried to replace line 13 with "mark_range = (0.3 * alpha, 0.2 * alpha)" - easier for new person, less shock :)
Can "difficulty" be passed in script on launch time?