This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------------------------------------------------------------- | |
-- FILE: charsperline.lua | |
-- USAGE: called from t-charsperline.mkvi | |
-- DESCRIPTION: extract line contents and lengths | |
-- REQUIREMENTS: ConTeXt MkIV | |
-- AUTHOR: Philipp Gesang (Phg), <phg42.2a@gmail.com> | |
-- VERSION: 1.0 | |
-- CREATED: 2013-08-04 20:01:04+0200 | |
----------------------------------------------------------------------- | |
-- | |
--- based on t-wordsperline: | |
--- http://www.ntg.nl/pipermail/ntg-context/2012/070668.html | |
thirddata = thirddata or { } | |
thirddata.chars_per_line = thirddata.chars_per_line or { } | |
local stringformat = string.format | |
local tableconcat = table.concat | |
local utfchar = utf.char | |
local traverse_nodetype = node.traverse_id | |
local traverse_nodelist = node.traverse | |
local nodecodes = nodes.nodecodes | |
local listcodes = nodes.listcodes | |
local skipcodes = nodes.skipcodes | |
local hlist_t = nodecodes.hlist | |
local vlist_t = nodecodes.vlist | |
local glue_t = nodecodes.glue | |
local glyph_t = nodecodes.glyph | |
local line_t = listcodes.line | |
local userskip_t = skipcodes.userskip | |
local tasks = nodes.tasks | |
local enableaction = tasks.enableaction | |
local disableaction = tasks.disableaction | |
local linedata = { } | |
local resolve_ligatures | |
resolve_ligatures = function (lst, hd) | |
for n in traverse_nodetype (glyph_t, hd) do | |
local components = n.components | |
if components then | |
lst = resolve_ligatures (lst, components) | |
else | |
lst[#lst+1] = utfchar (n.char) | |
end | |
end | |
return lst | |
end | |
local collect = function (hd, groupcode) | |
if groupcode == "vbox" then | |
return hd | |
end | |
for current in traverse_nodetype (hlist_t, hd) do | |
if current.subtype == line_t then | |
local chars, has_glyphs = { }, false | |
for n in traverse_nodelist (current.list) do | |
local ntype, nsubtype = n.id, n.subtype | |
-- we care only for glyphs’n’glue | |
if ntype == glyph_t then | |
has_glyphs = true | |
if n.components then | |
chars = resolve_ligatures (chars, n.components) | |
else | |
chars[#chars+1] = utfchar (n.char) | |
end | |
elseif ntype == glue_t and nsubtype == userskip_t then | |
chars[#chars+1] = " " | |
end | |
end | |
if has_glyphs then | |
linedata[#linedata+1] = chars | |
end | |
end | |
end | |
return hd | |
end | |
thirddata.chars_per_line.collect = collect | |
tasks.appendaction ("finalizers", "before", | |
"thirddata.chars_per_line.collect") | |
tasks.disableaction("finalizers", | |
"thirddata.chars_per_line.count_words") | |
local write_stats = function (...) texiowrite_nl(stringformat(...)) end | |
local datafile = "./linedata.txt" | |
write_linedata = function (filename) | |
filename = filename or datafile | |
local result = { } | |
for i = 1, #linedata do local line = linedata[i] | |
result[#result+1] = stringformat ("%q,%d", | |
tableconcat (line), | |
#line) | |
end | |
io.savedata (filename, result, "\n") | |
end | |
local active --- callback state | |
commands.start_chars_per_line = function () | |
if not active then | |
enableaction("finalizers", | |
"thirddata.chars_per_line.count_words") | |
active = true | |
end | |
end | |
commands.stop_chars_per_line = function () | |
if active then | |
disableaction("finalizers", | |
"thirddata.chars_per_line.count_words") | |
active = false | |
end | |
end | |
commands.write_linedata = write_linedata |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\startmodule [charsperline] | |
\unprotect | |
\ctxloadluafile{charsperline} | |
\def\startdumplines{\ctxcommand{start_chars_per_line ()}} | |
\def\stopdumplines{\endgraf\ctxcommand{stop_chars_per_line ()}} | |
\prependtoks \charsperline_dump \to \everystoptext | |
\def\charsperline_dump{\ctxcommand{write_linedata ()}} | |
\protect | |
\stopmodule \endinput | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\usemodule[charsperline] | |
\setuplayout[width=5cm] | |
\starttext | |
\startdumplines | |
\input knuth\par | |
\stopdumplines | |
\stoptext |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment