Skip to content

Instantly share code, notes, and snippets.

@Skrylar
Last active May 4, 2020 05:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Skrylar/179648d5856ae49087ad34a87d0740f9 to your computer and use it in GitHub Desktop.
Save Skrylar/179648d5856ae49087ad34a87d0740f9 to your computer and use it in GitHub Desktop.
import printingpress
import skmap/robin
import math
type
FlowDirection* = enum
LeftToRight
TopToBottom
#-
BoxKind* = enum
Container ## Box is meant to contain other boxes.
Glyph ## Box contains an actual glyph.
#-
BoxFlag* = enum
Potato
BoxFlags* = set[BoxFlag]
#-
Box* = object
kind*: BoxKind ## What secrets does this box keep?
flags*: BoxFlags
flow*: FlowDirection ## What direction do items go?
width*, height*: int16 ## Size of the box contents.
glyph*: int16 ## Which glyph this box renders from the typeface (for glyph boxes.)
postglue*: int16 ## Glue space after this box
depth*: int16 ## How far below the baseline this box sits
children*: seq[Box] ## Boxes within this box (for containers.)
bestbreak*: int16 ## Offset to best breakpoint
badness*: int16 ## Calculated score to break on this box.
#- `FlowDirection` and `BoxKind` may get removed and folded in to
#- `BoxFlags` to save space.
#- == Recalculating bounding boxes
#- When first planning a paragraph the geometry of individual letters
#- may only be slightly known. Also when updating the contents of
#- various boxes the geometry needs to be recalculated. This is the
#- function that "trues up" the bounding boxes of boxes.
#-
#- If `deep` is `true` then children of the box will be recursively
#- updated as well. If `face` is a non-nil value then it will be used
#- to calculate the size of `Glyph` boxes.
proc recalculate_bounds*(self: var Box; face: ptr Typeface = nil; deep: bool = false) =
## Recalculates the width and height of a box based on contents.
case self.kind:
of Container:
# reset our dimensions; we will add up the dimensions of our children
self.width = 0
self.height = 0
# flow direction makes a difference
case self.flow
of LeftToRight:
for i in 0..<self.children.len:
template ch: untyped = self.children[i]
if deep:
recalculate_bounds(ch, face, true)
inc self.width, ch.width
inc self.width, ch.postglue
self.height = max(self.height, ch.height)
of TopToBottom:
for i in 0..<self.children.len:
template ch: untyped = self.children[i]
if deep:
recalculate_bounds(ch, face, true)
inc self.height, ch.height
inc self.height, ch.postglue
self.width = max(self.width, ch.width)
of Glyph:
# we just load dimensions off the typeface
if face != nil:
let g = face.glyphs[self.glyph.int]
self.width = g.region.width.int16
self.height = g.region.height.int16
self.depth = g.offset_y.int16
self.postglue = (g.advance - self.width)
#- == Creating a single paragraph
#- `set_to_ansi` will break apart the supplied string as though it
#- were a single paragraph. Container boxes are added which in turn
#- hold glyph boxes for each letter. Whitespace is converted to glue
#- (stretchable space) between words.
proc set_to_ansi*(self: var Box; paragraph: string) =
## Clears the box and creates sub-boxes to represent a given string.
## Does not do anything intelligent with Unicode; don't even try.
proc whitespace(c: char): bool =
result = (c == ' ')
# need to transmute lead in to gold
self.kind = Container
self.children.set_len(0)
self.flow = LeftToRight # XXX some languages would disagree
# clear unrelated things
self.width = 0
self.height = 0
self.depth = 0
var i = 0
var wordy = whitespace(paragraph[0])
var current_word = Box()
while i < paragraph.len:
if wordy:
# collect glyphs within a word
if not whitespace(paragraph[i]):
var letter = Box()
letter.kind = Glyph
letter.glyph = paragraph[i].int16
current_word.children.add(letter)
else:
wordy = false
continue
else:
# convert whitespace to postglue
if whitespace(paragraph[i]):
current_word.postglue += 1 # <1>
else:
wordy = true
self.children.add(current_word)
current_word = Box()
continue
inc i
if current_word.children.len > 0:
self.children.add(current_word)
#- <1> We would ideally use the width of the font's actual space
#- character here but we do not know what it is. So we have to put the
#- number of spaces which *should* be here such as, a single space,
#- multiple spaces for paragraph indentions, end of sentence spaces and
#- so forth. You have to resort to `hydrate_spaces` to correct the glue
#- here.
#-
#- == Hydration of whitespace
#- If your paragraph construction proc does not know the absolute amount
#- of spacing that belongs between letters it must use relative spacing.
#- You can use this function to multiply that relative spacing by the
#- width of your typeface's space character to correct that.
proc hydrate_spaces*(self: var Box; space_width: int) =
## Multiplies the `postglue` of every contained box by the given
## `space_width`.
for i in 0..<self.children.len:
self.children[i].postglue *= space_width.int16
#- == Line Breaking
#- Accepts a box of words and fits them to a given width. New lines
#- are added at various points which are called "line breaks." Several
#- algorithms for this exist. Some of them are fast but produce ugly
#- (but servicable) output. Some are more expensive but create type that
#- is comparable to professionally published books.
#-
#- === Greedy Line Break
#- . Greedy assumes you have a maximum width and a series of words.
#- . You pop a word from the start of your paragraph and consider adding it to the current line.
#- . If adding it to the line would not cause the line to become long, then do so and return to 2.
#- . Otherwise you must commit the current line and start a new one that starts with this word.
#- . When you run out of words to pop you commit the remaining line.
#-
#- This algorithm is simple and fast but produces paragraphs that are not
#- "print quality."
proc greedy_break*(box: Box; width: int): Box =
#- .Set up the vbox
result.kind = Container
result.flow = TopToBottom
result.children.set_len(0)
#- .Resetting the current hbox
var this_line: Box
template reset_line(line: var untyped) =
line = Box()
line.kind = Container
reset_line(this_line)
#- .Fit as many words to single lines as possible
var w = 0
for b in box.children:
if (w + b.width) > width:
# remove glue from end of line
this_line.children[this_line.children.high].postglue = 0
# XXX we may not actually need to do this
# commit current line and begin next one
this_line.recalculate_bounds
result.children.add(this_line)
reset_line(this_line)
w = 0
# now add new content to the line
inc w, b.width
inc w, b.postglue
this_line.children.add(b)
#- .Add final line, if needed
if this_line.children.len > 0:
# remove glue from end of line
this_line.children[this_line.children.high].postglue = 0
this_line.recalculate_bounds
result.children.add(this_line)
result.recalculate_bounds
#-
#- === Pretty Line Breaking
#- Pretty line breaking uses a combination of TeX's "dynamic programming"
#- based algorithm <<Knuth-Plass>>, coupled with optimizations from
#- <<Kenninga>>, <<jaroslov>> and Skrylar's own tweaking.
#-
#-
#-
#- ==== Planning (internal)
#- Knuth-Plass line breaking is recursive, so we model the majority of its
#- implementation as a function which can call itself.
#-
#- Some implementations construct trees instead of performing recursion.
proc knuthplass_break_inner(box: var Box; start, horizon, width, margin: int; wall: int = int.high) =
if horizon == 0:
box.children[start].badness = 0
box.children[start].bestbreak = int16.high
return
#- .Greedy fill the line until we hit the margin.
var w = 0
var i = start
while i < box.children.len:
if ((w + box.children[i].width) > width) and (w != 0): break
inc w, box.children[i].width
inc w, box.children[i].postglue # questionable
inc i
if w > margin: break
dec w, box.children[i-1].postglue
var best_index, best_score, barrier: int
#- .Calculate a line's badness score from its current width
proc score(w: int): int =
return pow(abs(margin - w).float, 2).int
#- .Check if there are words on the line after this.
if i < box.children.len:
#- .Use the obvious linebreak as a baseline for scoring
knuthplass_break_inner(box, i, horizon-1, width, margin)
best_index = i
best_score = box.children[i].badness
barrier = best_score + score(w)
dec i
#- .Start cutting back words until quality stops improving
while i > start:
knuthplass_break_inner(box, i, horizon-1, width, margin, barrier)
# DISQUALIFIED
if box.children[i].badness > wall:
return
# improvement
if box.children[i].badness < best_score:
best_score = box.children[i].badness
best_index = i
dec w, box.children[i].width
dec i
if i >= 0:
dec w, box.children[i].postglue
# hot garbage
else:
break
#- .We filled the end of the paragraph.
else:
best_index = box.children.len
#- .Commit best scores.
box.children[start].badness = barrier.int16
box.children[start].bestbreak = best_index.int16
#- ==== Breaking (public)
#-
#- `margin` is the *desired* width of text. Greedy line wrapping will occur
#- until one of two things happens:
#-
#- . The line's `width` budget would be blown, or,
#- . The last added word went over the margin
#-
#- Then the more expensive linear programming steps are taken to see how
#- many words need to be deleted to form an optimal paragraph.
#-
#- `width` is the *maximum* width of text. No line may be longer than this
#- unless it is impossible to do otherwise.
#-
#- `horizon` is an optimization <<Kenninga>>. By default the horizon is
#- infinite which is safe for trusted inputs which do not change rapidly.
#- Setting a horizon limits how tall of a tree is evaluated to find the
#- best break points. `horizon` lines will be evaluated for optimality and
#- committed to the paragraph. Then the next batch of lines are checked
#- until the whole paragaph is set.
proc knuthplass_break*(
box: var Box; width, margin: int;
horizon: int = int.high): Box =
# start by setting every break to maximal badness
for i in 0..box.children.high:
box.children[i].badness = int16.high
box.children[i].bestbreak = int16.high
var this_line: Box
result = Box()
result.flow = TopToBottom
template reset_line(line: var untyped) =
line = Box()
line.kind = Container
line.flow = LeftToRight
reset_line(this_line)
var i = 0
while i < box.children.len:
# evaluate break points
knuthplass_break_inner(box, i, horizon, width, margin)
# now follow the best breaks and build our vbox
while i < box.children.len:
template b: untyped = box.children[i]
# breakpoint not evaluated due to horizon constraints
if b.bestbreak == int16.high: break
for j in i..<b.bestbreak:
this_line.children.add(box.children[j])
this_line.recalculate_bounds
result.children.add(this_line)
reset_line(this_line)
i = b.bestbreak
if this_line.children.len > 0:
this_line.recalculate_bounds
result.children.add(this_line)
#-
when ismainmodule:
import printingpress
import skcbor
var binfile: File
var typeface: Typeface
var reader: CborReader
discard binfile.open("main_font.bin", fmRead)
reader.open_to_file(binfile)
reader.read(typeface)
reader.close()
var b = Box()
echo "step=set_to_ansi"
#b.set_to_ansi("I am a toast, the largest toast to ever have once been bread.")
b.set_to_ansi("wot do")
echo b
echo "step=hydrate_spaces"
b.hydrate_spaces(typeface.space_width)
echo "step=recalculate_bounds"
b.recalculate_bounds(addr typeface, true)
echo b
echo "step=linebreak"
#var broken = greedy_break(b, 100)
var broken = knuthplass_break(b, 100, 75)
echo broken
#- [bibliography]
#- == References
#- - [[[Knuth-Plass]]] Breaking Paragraphs into Lines. Donald E. Knuth, Michael F. Plass. https://jimfix.github.io/math382/knuth-plass-breaking.pdf
#- - [[[Kenninga]]] Optimal Line Break Determination. US Patent 6,510,441. Eric A. Kenninga.
#- - [[[jaroslov]]] knuth plass thoughts. Jacob N. Smith. https://github.com/jaroslov/knuth-plass-thoughts/blob/master/plass.md
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment