Created
May 12, 2022 11:29
-
-
Save denismaier/eb2d6301ef273c80c98c228e1d64323c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- check-ligatures.lua | |
--[[ | |
analyze Ligatures based on a ConTeXt log file | |
enable Ligature tracking in the ConTeXt file with | |
\enabletrackers[languages.goodies] | |
then run this script with | |
mtxrun --script check-ligatures.lua input_file whitelist_file | |
for the input_file we assume .log, so no need to add this | |
for the whitelist_file the file ending must be specified | |
results will be saved to input_file-ligatures-new.log | |
if no whitelist is specified as a command line option | |
the script falls back to input_file-ligatures-old.log | |
]] | |
-- this is used only for testing purposes | |
-- local pprint = require('pprint') | |
-------------------------------------------------------------------- | |
-------------------------------------------------------------------- | |
function main (input_file, whitelist_file) | |
local lines = string.splitlines(io.loaddata(input_file .. ".log")or "") or { } | |
local whitelist = {} | |
if whitelist_file == nil then | |
whitelist = string.splitlines(io.loaddata(input_file .. '-ligatures-old.log')or "") or { } | |
else | |
whitelist = string.splitlines(io.loaddata(whitelist_file)or "") or { } | |
end | |
local filteredWordlist = | |
filterLigaturesWordlist | |
(cleanLines | |
(getLigatureLines(lines)), | |
whitelist) | |
-- sort the table in-place | |
table.sort(filteredWordlist) | |
saveResultsToFile(filteredWordlist, input_file .. '-ligatures-new.log') | |
end | |
---------------------------------------------------------------- | |
---------------------------------------------------------------- | |
-- String testing | |
function starts_with(str, start) | |
return str:sub(1, #start) == start | |
end | |
-- get relevant lines | |
function getLigatureLines(lines) | |
local result = {} | |
for k,v in pairs(lines) do | |
if | |
(starts_with(v, "languages > goodies") | |
and not string.find(v, "goodies > properties:") | |
and not string.find(v, "goodies > tag") | |
-- goodies > tag | |
) | |
then table.insert(result, v) end | |
end | |
return result | |
end | |
-- String cleaning | |
-- wrapper functions | |
function cleanLines (xs) | |
local result = {} | |
for k,v in pairs(xs) do | |
table.insert(result, cleanLine(v)) | |
end | |
return result | |
end | |
function cleanLine (x) | |
--return removeTrailingPunctuation(getWord(x)) -- leave out for now | |
return getWord(x) | |
end | |
-- 1. Start reading at colon | |
function getWord(x) | |
-- wir lesen das Wort nach dem Doppelpunkt | |
return string.match(x, ".*: (%w+)") | |
end | |
-- 2. Remove trailing punctuation | |
function removeTrailingPunctuation (x) | |
if string.find(x, ',') then | |
return x:sub(1, -2) | |
else | |
return x | |
end | |
end | |
-- test if word is in list | |
function inList (x, list) | |
for k,v in ipairs(list) do | |
if v == x then | |
return true | |
end | |
end | |
return nil | |
end | |
-- Filter words based on second list (whitelist) | |
function filterLigaturesWordlist (xs, list) | |
local result = {} | |
for k,v in ipairs(xs) do | |
if (hasMaybeLigature(v) and not inList(v, list) and not inList(v, result)) then table.insert(result, v) end | |
end | |
return result | |
end | |
-- keep only words with potential ligatures | |
function hasMaybeLigature (x) | |
return | |
string.find(x, "fi") | |
or string.find(x, "fl") | |
or string.find(x, "ff") | |
-- or string.find(x, "ffi") | |
-- or string.find(x, "ffl") | |
or string.find(x, "ft") | |
-- or string.find(x, "fft") | |
or string.find(x, "fb") | |
-- or string.find(x, "ffb") | |
or string.find(x, "fh") | |
-- or string.find(x, "ffh") | |
or string.find(x, "fj") | |
-- or string.find(x, "ffj") | |
or string.find(x, "fk") | |
-- or string.find(x, "ffk") | |
end | |
-- | |
function saveResultsToFile(results, output_file) | |
-- Opens a file in write mode | |
output_file = io.open(output_file, "w") | |
-- sets the default output file as output_file | |
io.output(output_file) | |
-- iterate oiver | |
for k,v in ipairs(results) do | |
io.write(v..'\n') | |
end | |
-- closes the open file | |
io.close(output_file) | |
end | |
-- Run | |
main(arg[1], arg[2]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment