Last active
October 29, 2023 14:30
-
-
Save zauguin/e119669fa702b112c704a9337b30d446 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- We should never have a hyphenation point inside of a grapheme cluster. | |
-- Hyphenating there would lead to very wrong results and even if no hyphenation is chosen it is likely to cause issues during shaping. | |
-- | |
-- It would be nice if this were prevnted in the hyphenation patterns, but as a more general fix we can post-process hyphenated text to identify such situations. | |
require'provide_hyphenation_callbacks' | |
local graphemes = require'lua-uni-graphemes' | |
local read_codepoint = graphemes.read_codepoint | |
local traverse = node.traverse | |
local is_char = node.is_char | |
local node_free = node.free | |
local disc_t = node.id'disc' | |
local function process(head) | |
local state | |
-- Whenever we find a node which can not appear in the middle of a cluster, | |
-- we add it to pending. If the next node start a cluster, we just reset pending. | |
-- Otherwise we add it to cluster_delayed. | |
-- NOTE: head is always new_cluster, so .prev should never pass the first node. | |
local cluster_delayed_head, cluster_delayed_tail, pending | |
for n, id, sub in traverse(head) do | |
local ch = is_char(n) | |
local new_cluster, forbidden | |
if ch then | |
new_cluster, state = read_codepoint(ch, state) | |
elseif id == disc_t then | |
local replace = n.replace | |
if replace then | |
-- We consider the discrietionary to start a new cluster if the first node starts a new cluster. | |
local first = true | |
for nn in traverse(replace) do | |
local ch = is_char(nn) | |
local inner_new_cluster | |
if ch then | |
inner_new_cluster, state = read_codepoint(ch, state) | |
else | |
inner_new_cluster, state = true, nil | |
end | |
if first then new_cluster, first = inner_new_cluster, false end | |
end | |
else | |
forbidden = true | |
end | |
else | |
new_cluster, state = true, nil | |
end | |
if new_cluster then | |
pending = nil | |
elseif forbidden then | |
pending = pending or n | |
elseif pending then | |
local before_pending = pending.prev | |
before_pending.next, n.prev = n, before_pending | |
repeat | |
pending = node_free(pending) | |
until pending == n | |
pending = nil | |
end | |
end | |
return true | |
end | |
luatexbase.add_to_callback('post_hyphenation_filter', process, 'Remove hyphenation points inside grapheme clusters') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if luatexbase.callbacktypes.post_hyphenate_filter then return end | |
local saved_luatexbase, saved_callback_register = luatexbase, callback.register | |
luatexbase.uninstall() | |
local original_callback_register = callback.register | |
luatexbase, callback.register = saved_luatexbase, saved_callback_register | |
local lang_hyphenate = lang.hyphenate | |
local call_callback = luatexbase.call_callback | |
local callbacklist do | |
local id = 0 | |
local name, value | |
repeat | |
id = id + 1 | |
name, value = debug.getupvalue(luatexbase.add_to_callback, id) | |
until name == 'callbacklist' or name == nil | |
if not name then error'weird' end | |
callbacklist = value | |
end | |
-- local existing_hyphenate_callbacks = luatexbase.callback_descriptions'hyphenate' | |
-- for _, cb in ipairs(existing_hyphenate_callbacks) do | |
-- print(require'inspect'(cb)) | |
-- luatexbase.remove_from_callback('hyphenate', cb.description) | |
-- end | |
luatexbase.create_callback('pre_hyphenation_filter', 'list') | |
luatexbase.create_callback('post_hyphenation_filter', 'reverselist') | |
luatexbase.callbacktypes.hyphenate = nil | |
luatexbase.create_callback('hyphenation', 'exclusive', function(head) | |
local temp = node.new'temp' | |
temp.next = head | |
lang_hyphenate(head) | |
node.free(temp) | |
return true | |
end) | |
luatexbase.create_callback('hyphenate', 'simple') | |
-- for _, cb in ipairs(existing_hyphenate_callbacks) do | |
-- luatexbase.add_to_callback('hyphenate', cb.func, cb.description) | |
-- end | |
local function emulate_hyphenate_callback(callbacks, head, tail) | |
local saved_lang_hyphenate = lang.hyphenate | |
local min_level = math.huge | |
local lang_hyphenate do | |
local function new_lang_hyphenate(head, tail) | |
min_level = 0 | |
lang.hyphenate = saved_lang_hyphenate | |
local result = call_callback('hyphenation', head.next) | |
if result == false then | |
node.flush_list(head.next) | |
head.next = nil | |
elseif result ~= true then | |
head.next = result | |
end | |
lang.hyphenate = new_lang_hyphenate | |
return true | |
end | |
lang_hyphenate = new_lang_hyphenate | |
end | |
for i, cb in ipairs(callbacks) do | |
local cb_func = cb.func | |
local current_lang_hyphenate = lang_hyphenate | |
local function new_lang_hyphenate(head, tail) | |
lang.hyphenate = current_lang_hyphenate | |
cb_func(head, tail or node.tail(head)) | |
lang.hyphenate = new_lang_hyphenate | |
if i < min_level then min_level = i end | |
return true | |
end | |
lang_hyphenate = new_lang_hyphenate | |
end | |
lang_hyphenate(head, tail) | |
lang.hyphenate = saved_lang_hyphenate | |
return min_level | |
end | |
original_callback_register('hyphenate', function(initial_temp, tail) | |
if tail.next then | |
error'This should not happen, please report a bug.' | |
end | |
local head = initial_temp.next | |
local result = call_callback('pre_hyphenation_filter', head) | |
if result == false then | |
node.flush_list(head) | |
initial_temp.next = nil | |
return | |
end | |
head = result | |
local callbacks = callbacklist['hyphenate'] or {} | |
initial_temp.next = head | |
local nesting = emulate_hyphenate_callback(callbacks, initial_temp, node.tail(head)) | |
if nesting == 0 then | |
-- Expected since hyphenation acts like an additional hyphenate call with index 0 | |
elseif nesting == 1 then | |
-- We skipped `hyphenation`. That's fine unless `hyphenation` was set. | |
if luatexbase.callback_descriptions'hyphenation'[1] then | |
tex.error'Missing call to lang.hyphenate, hyphenation callback was ignored.' | |
end | |
else | |
tex.error'Missing call to lang.hyphenate, at least one hyphenate callback was ignored.' | |
end | |
head = initial_temp.next | |
result = call_callback('post_hyphenation_filter', head) | |
if result == false then | |
node.flush_list(head) | |
initial_temp.next = nil | |
return | |
end | |
head = result | |
initial_temp.next = head | |
end) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\documentclass{article} | |
\patterns{ е1 } | |
% \patterns{ 2^^^^0308 } | |
\usepackage{fontspec} | |
\setmainfont{Noto Sans}[ | |
% Renderer=Harfbuzz, | |
Script=Cyrillic, Language=Bulgarian] | |
\directlua{require'prevent_grapheme_hyphens'} | |
\begin{document} | |
\showoutput | |
азе^^^^0308аз | |
азезаз | |
\end{document} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment