Skip to content

Instantly share code, notes, and snippets.

@shreevatsa

shreevatsa/pages-cutout.lua Secret

Created Dec 9, 2017
Embed
What would you like to do?
print('')
local utils = require('utils.lua')
global = {}
hangoutfully = false
--[======================================================================[
Position of a paragraph
--]======================================================================]
-- Returns the interesting areas of the image: the rest of the current page, and the next page
function image_areas_for_paragraph(resized_image_height)
local area1 = image_offsets(tex.count[0], tex.pagetotal / tex.vsize, resized_image_height)
local area2 = image_offsets(tex.count[0] + 1, 0, resized_image_height)
local area3 = image_offsets(tex.count[0] + 2, 0, resized_image_height)
print(string.format('Shaping a paragraph on page %s, fraction %.2f%% (crop area: %s, %s, %s of total height %s)', tex.count[0], tex.pagetotal * 100 / tex.vsize, area1, area2, area3, resized_image_height))
return area1, area2, area3
end
--[[
Which row and column a given page number falls on, when arranged like:
1 num_rows+1 2*num_rows+1 ... (num_columns-1)*num_rows+1
2 num_rows+2 2*num_rows+2 ... (num_columns-1)*num_rows+2
3 num_rows+3 2*num_rows+3 ... (num_columns-1)*num_rows+3
... ... ...
num_rows 2*num_rows 3*num_rows ... num_columns*num_rows
--]]
local function row_column(page_number)
local current_column = math.ceil(page_number / global.num_rows)
local current_row = page_number - (current_column - 1) * global.num_rows
return current_row, current_column
end
-- The (x1, y1) ... (x2, y2) coordinates, real numbers in [0..1], for this page number and page-filled fraction
function image_offsets(page_number, f, resized_image_height)
local r, c = row_column(page_number)
if r > global.num_rows or c > global.num_columns then
print(string.format('Page number %d falls outside the %d x %d given', page_number, global.num_rows, global.num_columns))
return nil
end
local C = global.num_columns
local R = global.num_rows
local x1 = (c - 1)/C
local y1 = (r - 1 + f)/R
local x2 = c/C
local y2 = r/R
-- print(string.format('(x1, y1, x2, y2) = (%s, %s, %s, %s)', x1, y1, x2, y2))
local image_resize_ratio = resized_image_height / global.image_height
local resized_image_width = image_resize_ratio * global.image_width
local x_start = math.floor(x1 * resized_image_width + 0.5) -- The left edge of the “page” (text area) starts here
local y_start = math.floor(y1 * resized_image_height + 0.5)
local x_end = math.floor(x2 * resized_image_width + 0.5) -- The right edge of the “page” (text area) ends here
local y_end = math.floor(y2 * resized_image_height + 0.5)
if y_start >= resized_image_height then
return nil
end
-- print(string.format('Resized by %s to reach %s: (x_start, y_start, x_end, y_end) = (%s, %s, %s, %s)', image_resize_ratio, resized_image_height, x_start, y_start, x_end, y_end))
local offset_string = string.format("%dx%d+%d+%d",
x_end - x_start + 1,
y_end - y_start + 1,
x_start,
y_start)
return offset_string
end
--[======================================================================[
Using an area of image, translate into "runs" of 0s and 1s
--]======================================================================]
function get_runs(base_filename)
local base_filename = 'tmp-for-paragraph.pbm'
-- We want to scale the image so that a height of \baselineskip is 1 pixel. In other words, the total number of rows of pixels in the image should be: (vsize/baselineskip)*(num_rows)
local resized_image_height = math.floor(tex.vsize / tex.baselineskip.width) * global.num_rows
-- print(string.format('%s lines per page, with %s rows of pages, so a total of %s', math.floor(tex.vsize / tex.baselineskip.width), global.num_rows, resized_image_height))
local area1, area2, area3 = image_areas_for_paragraph(resized_image_height)
local filenames = {}
-- Note on the commandline: using a fixed threshold avoids selecting B&W based on the cropped area (will be different for different crop areas: inconsistent).
local command =
function(area, filename)
return string.format(
[[convert "%s" -resize "x%s" -crop "%s" -threshold "85%%" -compress none "%s"]],
global.image_filename, resized_image_height, area, utils.safe_filename(filename))
end
if area1 ~= nil then
local filename1 = '1' .. base_filename
print(command(area1, filename1))
os.execute(command(area1, filename1))
table.insert(filenames, filename1)
end
if area2 ~= nil then
local filename2 = '2' .. base_filename
print(command(area2, filename2))
os.execute(command(area2, filename2))
table.insert(filenames, filename2)
end
if area3 ~= nil then
local filename3 = '3' .. base_filename
print(command(area3, filename3))
os.execute(command(area3, filename3))
table.insert(filenames, filename3)
end
local ret = {}
for unused_filename_number, filename in ipairs(filenames) do
local line_number = 0
for line in io.lines(filename) do
line_number = line_number + 1
if line_number > 2 then
local debug_out = line
local runs = {}
local char = '0' -- 0 is white in image, which means text in paragraph
local run_length = 0
for c in string.gmatch(line, '%d') do
if c == char then
run_length = run_length + 1
else
if char == '1' then run_length = -run_length end -- Black pixels are glue, negative.
table.insert(runs, run_length)
char = c
run_length = 1
end
end
if char == '1' then run_length = -run_length end
table.insert(runs, run_length)
table.insert(ret, runs)
local run_str = ''
for j, elemspec in ipairs(runs) do
run_str = run_str .. string.format(' %s', elemspec)
end
debug_out = debug_out .. string.format('%s', run_str)
print(debug_out)
end
end
end
collectgarbage() -- https://tex.stackexchange.com/a/404623/48
return ret
end
--[======================================================================[
Translate runs of 0s and 1s into a parshape, avoiding tiny specks of text
--]======================================================================]
-- The spec
function get_paragraph_spec()
local runs = get_runs()
runs = clean_runs(runs, 0.02)
local ret = runs_to_parshape(runs)
return ret
end
-- Removes all text (positive numbers) that have width less than min_frac of the total length of the line.
-- This function can probably be simplified, as it looks like a lot of code for something so simple.
function clean_runs(runs, min_frac)
local ret = {}
for i, linespec in ipairs(runs) do
local linesum = 0
for j, elemspec in ipairs(linespec) do linesum = linesum + math.abs(elemspec) end
local newlinespec = {}
local add_to_next_glue = 0
for j, elemspec in ipairs(linespec) do
if elemspec > 0 then
if elemspec / linesum < min_frac then
if #newlinespec > 0 then
newlinespec[#newlinespec] = newlinespec[#newlinespec] - elemspec
else
add_to_next_glue = add_to_next_glue + elemspec
end
else
table.insert(newlinespec, elemspec)
end
else
elemspec = elemspec - add_to_next_glue
add_to_next_glue = 0
if #newlinespec > 0 and newlinespec[#newlinespec] <= 0 then
newlinespec[#newlinespec] = newlinespec[#newlinespec] + elemspec
else
table.insert(newlinespec, elemspec)
end
end
end
-- print(string.format('Changed linespec %s to %s', utils.print_array(linespec), utils.print_array(newlinespec)))
table.insert(ret, newlinespec)
end
return ret
end
function runs_to_parshape(runs)
--[[
Example: for a paragraph shaped like
aaaaaaaaaaaaaa aaaaaaaaaaaaaa 14 (7) 14
bbb bbbbbb bbbbbbb bbbbbbbb 3 (5) 6 (3) 7 (3) 8
ccccccccccccccccccccc 21 (14)
ddddd (16) 5 (14)
and therefore input (`runs`) like:
{
{14, -7, 14},
{3, -5, 6, -3, 7, -3, 8},
{21, -14},
{-16, 5, -14},
}
this function returns
{
{0 , hsize * 14/35},
{hsize * 21/35, hsize * 14/35},
{0 , hsize * 3/35},
{hsize * 8/35, hsize * 6/35},
{hsize * 17/35, hsize * 7/35},
{hsize * 27/35, hsize * 8/35},
{0 , hsize * 21/35},
{hsize * 16/35, hsize * 5/35},
{0, hsize},
}
--]]
local hsize = tex.hsize
local myparshape = {{hsize, 0, 0}}
local prev_baselineskip_glue = 0 -- How many multiples of baselineskip to add before a line
for i, linespec in ipairs(runs) do
local linesum = 0
-- print(string.format('A linespec that goes into parshape: {%s}', utils.print_array(linespec)))
for j, elemspec in ipairs(linespec) do linesum = linesum + math.abs(elemspec) end
local cursum = 0
if prev_baselineskip_glue < 0 then
prev_baselineskip_glue = 0
end
for j, elemspec in ipairs(linespec) do
if elemspec > 0 then
table.insert(myparshape, {hsize * cursum / linesum, hsize * elemspec / linesum, prev_baselineskip_glue})
prev_baselineskip_glue = -1 -- Because after the first line, we need to add a negative glue each time
end
cursum = cursum + math.abs(elemspec)
end
if prev_baselineskip_glue ~= -1 then
-- No text has been added so this line is fully glue, which means the next line must be preceded by \baselineskip
assert(cursum == linesum)
-- table.insert(myparshape, {hsize, 0})
myparshape[#myparshape][3] = myparshape[#myparshape][3] + 1
end
end
table.insert(myparshape, {0, hsize, 0})
return myparshape
end
--[======================================================================[
Putting it all together
--]======================================================================]
--[[
Typesets the text from `text_filename` as `num_rows` x `num_columns` pages,
with a “hole” shaped like `image_filename` when these pages are assembled.
If `is_latex` is true, assumes the conventions of LaTeX rather than plain TeX.
--]]
function pagesWithCutout(text_filename, num_rows, num_columns, image_filename, is_latex)
image_filename = utils.safe_filename(image_filename)
global.image_filename = image_filename
global.image_width = tonumber(utils.get_output('identify -format "%w" ' .. image_filename))
global.image_height = tonumber(utils.get_output('identify -format "%h" ' .. image_filename))
global.num_rows = num_rows
global.num_columns = num_columns
local setup = nil
if is_latex then setup = [[\pagestyle{empty}]] else setup = [[\nopagenumbers]] end -- Turn off page numbers
setup = setup .. [[\parskip=5pt \raggedbottom]] -- So that inter-paragraph glue stretch does not cause problems
setup = setup .. [[\hyphenpenalty=0 \lefthyphenmin=1 \righthyphenmin=1 \tolerance=9999 \emergencystretch=3em ]] -- Avoiding overfull boxes as much as possible
setup = setup .. [[\overfullrule=0pt\relax ]] -- For the few overfull boxes that do happen
tex.print(setup)
luatexbase.add_to_callback('linebreak_filter', shape_paragraph, 'Typeset each paragraph according to the "shape" from image.')
tex.print(string.format([[\input %s]], utils.safe_filename(text_filename))) -- \input the given source file
end
-- A linebreak_filter: For a given paragraph, determines the required shape and typesets accordingly.
function shape_paragraph(head, is_display)
-- print('Breaking paragraph {'); parinfo.traverse_paragraph_simple(head); print('}')
local myparshape = get_paragraph_spec()
local leading_glue = table.remove(myparshape, 1)
local broken, info = tex.linebreak(head, {parshape=myparshape})
tex.prevdepth = info.prevdepth -- https://tex.stackexchange.com/q/403801/48
tex.prevgraf = info.prevgraf
-- Insert proper glue (negative `baselineskip`s) before each hlist, so that the lines overlap as they should.
local tmp = broken
-- First insert the leading glue
local tmp = utils.find_first_of_type_in(broken, 'hlist')
assert(tmp ~= nil, 'Empty paragraph? Nowhere to insert this glue')
if not hangoutfully then
broken = insert_nondiscardable_glue_before(broken, tmp, tex.baselineskip.width, leading_glue[3])
end
-- Next insert the rest of the glue
for i, linespec in ipairs(myparshape) do
print(string.format('linespec: %s', utils.print_array(linespec)))
tmp = utils.find_first_of_type_in(tmp, 'hlist')
if tmp == nil then break end
-- Insert `linespec[3]` number of baselineskip glue before `tmp`
if not hangoutfully then
broken = insert_nondiscardable_glue_before(broken, tmp, tex.baselineskip.width, linespec[3])
end
tmp = tmp.next
end
-- print('Broke paragraph into {'); parinfo.traverse_post(broken); print('}')
return broken
end
function insert_nondiscardable_glue_before(head, tmp, glue_width, times)
if times == nil then times = 1 end
for i = 1, math.abs(times) do
local my_glue = node.new('glue')
node.setglue(my_glue, glue_width * utils.sign(times))
head = node.insert_before(head, tmp, my_glue)
local rule = node.new('rule')
rule.height = 0
rule.depth = 0
rule.width = 0
rule.subtype = 1
head = node.insert_before(head, my_glue, rule)
end
return head
end
local utils = {}
-- https://stackoverflow.com/a/1318344/4958
function utils.sign(x)
return x<0 and -1 or x>0 and 1 or 0
end
function utils.print_array(arr)
s = '{'
for i, elem in ipairs(arr) do
s = s .. string.format('%s,', elem)
end
s = s .. '}'
return s
end
function utils.find_first_of_type_in(inp, node_type)
local tmp = inp
while tmp do
if tmp.id == node.id(node_type) then break end
tmp = tmp.next
end
return tmp
end
function utils.safe_filename(filename)
-- Returns the filename in a form that is safe for passing to the shell (os.execute), or throws an error otherwise.
-- Right now, just insists that the filename contain only certain unproblematic characters, but later we can add *proper* escaping.
-- Not just '%q': https://stackoverflow.com/questions/22824905/how-good-is-using-q-in-lua-to-escape-shell-arguments/27370100#27370100
local safe = string.gsub(filename, [==[[^%a%d%.%-]]==], '')
assert(filename == safe, string.format('The filename %s has funny characters (a file named %s would be ok).', filename, safe))
return safe
end
-- The output of a shell command. Caller's responsibility to ensure that the command is safe to run in a shell!
function utils.get_output(command)
local handle = io.saved_popen(command)
local result = handle:read("*a")
handle:close()
return result
end
files_opened = 0
--[==[
function utils.debug_paragraph_position(row, column)
-- Usually:
-- pagefilstretch=0 pagefillstretch=0 pagefilllstretch=0 pageshrink=0 pagedepth=0
-- pagegoal = vsize except at beginning when it is maxdimen
print(string.format('Shaping paragraph for page number %s (column %s, row %s)', tex.count[0], column, row))
print(string.format([[done: %.2f plus %s of %.2f (=%.2f of %.2f)]],
tex.pagetotal / 65536, tex.pagestretch / 65536, tex.vsize/65536,
tex.pagetotal / tex.baselineskip.width,
tex.vsize / tex.baselineskip.width))
end
function utils.print_file_contents(filename)
local f = io.open(filename, 'r')
if f == nil then
print(string.format('<Could not open file %s>', filename))
return
end
print(f:read('*a'))
f:close()
end
function utils.debug_print_parshape(myparshape)
for i, parspec in ipairs(myparshape) do
s = ''
for j, something in ipairs(parspec) do
s = s .. string.format(' %s', something)
end
print(s)
end
end
--]==]
return utils
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.