Skip to content

Instantly share code, notes, and snippets.

@zauguin
Last active October 29, 2023 14:30
Show Gist options
  • Save zauguin/e119669fa702b112c704a9337b30d446 to your computer and use it in GitHub Desktop.
Save zauguin/e119669fa702b112c704a9337b30d446 to your computer and use it in GitHub Desktop.
-- We should never have a hyphenation point inside of a grapheme cluster.
-- Hyphenating there would lead to very wrong results and even if no hyphenation is chosen it is likely to cause issues during shaping.
--
-- It would be nice if this were prevnted in the hyphenation patterns, but as a more general fix we can post-process hyphenated text to identify such situations.
require'provide_hyphenation_callbacks'
local graphemes = require'lua-uni-graphemes'
local read_codepoint = graphemes.read_codepoint
local traverse = node.traverse
local is_char = node.is_char
local node_free = node.free
local disc_t = node.id'disc'
local function process(head)
local state
-- Whenever we find a node which can not appear in the middle of a cluster,
-- we add it to pending. If the next node start a cluster, we just reset pending.
-- Otherwise we add it to cluster_delayed.
-- NOTE: head is always new_cluster, so .prev should never pass the first node.
local cluster_delayed_head, cluster_delayed_tail, pending
for n, id, sub in traverse(head) do
local ch = is_char(n)
local new_cluster, forbidden
if ch then
new_cluster, state = read_codepoint(ch, state)
elseif id == disc_t then
local replace = n.replace
if replace then
-- We consider the discrietionary to start a new cluster if the first node starts a new cluster.
local first = true
for nn in traverse(replace) do
local ch = is_char(nn)
local inner_new_cluster
if ch then
inner_new_cluster, state = read_codepoint(ch, state)
else
inner_new_cluster, state = true, nil
end
if first then new_cluster, first = inner_new_cluster, false end
end
else
forbidden = true
end
else
new_cluster, state = true, nil
end
if new_cluster then
pending = nil
elseif forbidden then
pending = pending or n
elseif pending then
local before_pending = pending.prev
before_pending.next, n.prev = n, before_pending
repeat
pending = node_free(pending)
until pending == n
pending = nil
end
end
return true
end
luatexbase.add_to_callback('post_hyphenation_filter', process, 'Remove hyphenation points inside grapheme clusters')
if luatexbase.callbacktypes.post_hyphenate_filter then return end
local saved_luatexbase, saved_callback_register = luatexbase, callback.register
luatexbase.uninstall()
local original_callback_register = callback.register
luatexbase, callback.register = saved_luatexbase, saved_callback_register
local lang_hyphenate = lang.hyphenate
local call_callback = luatexbase.call_callback
local callbacklist do
local id = 0
local name, value
repeat
id = id + 1
name, value = debug.getupvalue(luatexbase.add_to_callback, id)
until name == 'callbacklist' or name == nil
if not name then error'weird' end
callbacklist = value
end
-- local existing_hyphenate_callbacks = luatexbase.callback_descriptions'hyphenate'
-- for _, cb in ipairs(existing_hyphenate_callbacks) do
-- print(require'inspect'(cb))
-- luatexbase.remove_from_callback('hyphenate', cb.description)
-- end
luatexbase.create_callback('pre_hyphenation_filter', 'list')
luatexbase.create_callback('post_hyphenation_filter', 'reverselist')
luatexbase.callbacktypes.hyphenate = nil
luatexbase.create_callback('hyphenation', 'exclusive', function(head)
local temp = node.new'temp'
temp.next = head
lang_hyphenate(head)
node.free(temp)
return true
end)
luatexbase.create_callback('hyphenate', 'simple')
-- for _, cb in ipairs(existing_hyphenate_callbacks) do
-- luatexbase.add_to_callback('hyphenate', cb.func, cb.description)
-- end
local function emulate_hyphenate_callback(callbacks, head, tail)
local saved_lang_hyphenate = lang.hyphenate
local min_level = math.huge
local lang_hyphenate do
local function new_lang_hyphenate(head, tail)
min_level = 0
lang.hyphenate = saved_lang_hyphenate
local result = call_callback('hyphenation', head.next)
if result == false then
node.flush_list(head.next)
head.next = nil
elseif result ~= true then
head.next = result
end
lang.hyphenate = new_lang_hyphenate
return true
end
lang_hyphenate = new_lang_hyphenate
end
for i, cb in ipairs(callbacks) do
local cb_func = cb.func
local current_lang_hyphenate = lang_hyphenate
local function new_lang_hyphenate(head, tail)
lang.hyphenate = current_lang_hyphenate
cb_func(head, tail or node.tail(head))
lang.hyphenate = new_lang_hyphenate
if i < min_level then min_level = i end
return true
end
lang_hyphenate = new_lang_hyphenate
end
lang_hyphenate(head, tail)
lang.hyphenate = saved_lang_hyphenate
return min_level
end
original_callback_register('hyphenate', function(initial_temp, tail)
if tail.next then
error'This should not happen, please report a bug.'
end
local head = initial_temp.next
local result = call_callback('pre_hyphenation_filter', head)
if result == false then
node.flush_list(head)
initial_temp.next = nil
return
end
head = result
local callbacks = callbacklist['hyphenate'] or {}
initial_temp.next = head
local nesting = emulate_hyphenate_callback(callbacks, initial_temp, node.tail(head))
if nesting == 0 then
-- Expected since hyphenation acts like an additional hyphenate call with index 0
elseif nesting == 1 then
-- We skipped `hyphenation`. That's fine unless `hyphenation` was set.
if luatexbase.callback_descriptions'hyphenation'[1] then
tex.error'Missing call to lang.hyphenate, hyphenation callback was ignored.'
end
else
tex.error'Missing call to lang.hyphenate, at least one hyphenate callback was ignored.'
end
head = initial_temp.next
result = call_callback('post_hyphenation_filter', head)
if result == false then
node.flush_list(head)
initial_temp.next = nil
return
end
head = result
initial_temp.next = head
end)
Display the source blob
Display the rendered blob
Raw
\documentclass{article}
\patterns{ е1 }
% \patterns{ 2^^^^0308 }
\usepackage{fontspec}
\setmainfont{Noto Sans}[
% Renderer=Harfbuzz,
Script=Cyrillic, Language=Bulgarian]
\directlua{require'prevent_grapheme_hyphens'}
\begin{document}
\showoutput
азе^^^^0308аз
азезаз
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment