Skip to content

Instantly share code, notes, and snippets.

@zauguin
Last active October 29, 2023 14:30
Show Gist options
  • Save zauguin/e119669fa702b112c704a9337b30d446 to your computer and use it in GitHub Desktop.
Save zauguin/e119669fa702b112c704a9337b30d446 to your computer and use it in GitHub Desktop.
-- We should never have a hyphenation point inside of a grapheme cluster.
-- Hyphenating there would lead to very wrong results and even if no hyphenation is chosen it is likely to cause issues during shaping.
--
-- It would be nice if this were prevnted in the hyphenation patterns, but as a more general fix we can post-process hyphenated text to identify such situations.
require'provide_hyphenation_callbacks'
local graphemes = require'lua-uni-graphemes'
local read_codepoint = graphemes.read_codepoint
local traverse = node.traverse
local is_char = node.is_char
local node_free = node.free
local disc_t = node.id'disc'
local function process(head)
local state
-- Whenever we find a node which can not appear in the middle of a cluster,
-- we add it to pending. If the next node start a cluster, we just reset pending.
-- Otherwise we add it to cluster_delayed.
-- NOTE: head is always new_cluster, so .prev should never pass the first node.
local cluster_delayed_head, cluster_delayed_tail, pending
for n, id, sub in traverse(head) do
local ch = is_char(n)
local new_cluster, forbidden
if ch then
new_cluster, state = read_codepoint(ch, state)
elseif id == disc_t then
local replace = n.replace
if replace then
-- We consider the discrietionary to start a new cluster if the first node starts a new cluster.
local first = true
for nn in traverse(replace) do
local ch = is_char(nn)
local inner_new_cluster
if ch then
inner_new_cluster, state = read_codepoint(ch, state)
else
inner_new_cluster, state = true, nil
end
if first then new_cluster, first = inner_new_cluster, false end
end
else
forbidden = true
end
else
new_cluster, state = true, nil
end
if new_cluster then
pending = nil
elseif forbidden then
pending = pending or n
elseif pending then
local before_pending = pending.prev
before_pending.next, n.prev = n, before_pending
repeat
pending = node_free(pending)
until pending == n
pending = nil
end
end
return true
end
luatexbase.add_to_callback('post_hyphenation_filter', process, 'Remove hyphenation points inside grapheme clusters')
if luatexbase.callbacktypes.post_hyphenate_filter then return end
local saved_luatexbase, saved_callback_register = luatexbase, callback.register
luatexbase.uninstall()
local original_callback_register = callback.register
luatexbase, callback.register = saved_luatexbase, saved_callback_register
local lang_hyphenate = lang.hyphenate
local call_callback = luatexbase.call_callback
local callbacklist do
local id = 0
local name, value
repeat
id = id + 1
name, value = debug.getupvalue(luatexbase.add_to_callback, id)
until name == 'callbacklist' or name == nil
if not name then error'weird' end
callbacklist = value
end
-- local existing_hyphenate_callbacks = luatexbase.callback_descriptions'hyphenate'
-- for _, cb in ipairs(existing_hyphenate_callbacks) do
-- print(require'inspect'(cb))
-- luatexbase.remove_from_callback('hyphenate', cb.description)
-- end
luatexbase.create_callback('pre_hyphenation_filter', 'list')
luatexbase.create_callback('post_hyphenation_filter', 'reverselist')
luatexbase.callbacktypes.hyphenate = nil
luatexbase.create_callback('hyphenation', 'exclusive', function(head)
local temp = node.new'temp'
temp.next = head
lang_hyphenate(head)
node.free(temp)
return true
end)
luatexbase.create_callback('hyphenate', 'simple')
-- for _, cb in ipairs(existing_hyphenate_callbacks) do
-- luatexbase.add_to_callback('hyphenate', cb.func, cb.description)
-- end
local function emulate_hyphenate_callback(callbacks, head, tail)
local saved_lang_hyphenate = lang.hyphenate
local min_level = math.huge
local lang_hyphenate do
local function new_lang_hyphenate(head, tail)
min_level = 0
lang.hyphenate = saved_lang_hyphenate
local result = call_callback('hyphenation', head.next)
if result == false then
node.flush_list(head.next)
head.next = nil
elseif result ~= true then
head.next = result
end
lang.hyphenate = new_lang_hyphenate
return true
end
lang_hyphenate = new_lang_hyphenate
end
for i, cb in ipairs(callbacks) do
local cb_func = cb.func
local current_lang_hyphenate = lang_hyphenate
local function new_lang_hyphenate(head, tail)
lang.hyphenate = current_lang_hyphenate
cb_func(head, tail or node.tail(head))
lang.hyphenate = new_lang_hyphenate
if i < min_level then min_level = i end
return true
end
lang_hyphenate = new_lang_hyphenate
end
lang_hyphenate(head, tail)
lang.hyphenate = saved_lang_hyphenate
return min_level
end
original_callback_register('hyphenate', function(initial_temp, tail)
if tail.next then
error'This should not happen, please report a bug.'
end
local head = initial_temp.next
local result = call_callback('pre_hyphenation_filter', head)
if result == false then
node.flush_list(head)
initial_temp.next = nil
return
end
head = result
local callbacks = callbacklist['hyphenate'] or {}
initial_temp.next = head
local nesting = emulate_hyphenate_callback(callbacks, initial_temp, node.tail(head))
if nesting == 0 then
-- Expected since hyphenation acts like an additional hyphenate call with index 0
elseif nesting == 1 then
-- We skipped `hyphenation`. That's fine unless `hyphenation` was set.
if luatexbase.callback_descriptions'hyphenation'[1] then
tex.error'Missing call to lang.hyphenate, hyphenation callback was ignored.'
end
else
tex.error'Missing call to lang.hyphenate, at least one hyphenate callback was ignored.'
end
head = initial_temp.next
result = call_callback('post_hyphenation_filter', head)
if result == false then
node.flush_list(head)
initial_temp.next = nil
return
end
head = result
initial_temp.next = head
end)
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
\documentclass{article}
\patterns{ е1 }
% \patterns{ 2^^^^0308 }
\usepackage{fontspec}
\setmainfont{Noto Sans}[
% Renderer=Harfbuzz,
Script=Cyrillic, Language=Bulgarian]
\directlua{require'prevent_grapheme_hyphens'}
\begin{document}
\showoutput
азе^^^^0308аз
азезаз
\end{document}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment