Skip to content

Instantly share code, notes, and snippets.

@denismaier
Created May 12, 2022 11:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save denismaier/eb2d6301ef273c80c98c228e1d64323c to your computer and use it in GitHub Desktop.
Save denismaier/eb2d6301ef273c80c98c228e1d64323c to your computer and use it in GitHub Desktop.
-- check-ligatures.lua
--[[
analyze Ligatures based on a ConTeXt log file
enable Ligature tracking in the ConTeXt file with
\enabletrackers[languages.goodies]
then run this script with
mtxrun --script check-ligatures.lua input_file whitelist_file
for the input_file we assume .log, so no need to add this
for the whitelist_file the file ending must be specified
results will be saved to input_file-ligatures-new.log
if no whitelist is specified as a command line option
the script falls back to input_file-ligatures-old.log
]]
-- this is used only for testing purposes
-- local pprint = require('pprint')
--------------------------------------------------------------------
--------------------------------------------------------------------
function main (input_file, whitelist_file)
local lines = string.splitlines(io.loaddata(input_file .. ".log")or "") or { }
local whitelist = {}
if whitelist_file == nil then
whitelist = string.splitlines(io.loaddata(input_file .. '-ligatures-old.log')or "") or { }
else
whitelist = string.splitlines(io.loaddata(whitelist_file)or "") or { }
end
local filteredWordlist =
filterLigaturesWordlist
(cleanLines
(getLigatureLines(lines)),
whitelist)
-- sort the table in-place
table.sort(filteredWordlist)
saveResultsToFile(filteredWordlist, input_file .. '-ligatures-new.log')
end
----------------------------------------------------------------
----------------------------------------------------------------
-- String testing
function starts_with(str, start)
return str:sub(1, #start) == start
end
-- get relevant lines
function getLigatureLines(lines)
local result = {}
for k,v in pairs(lines) do
if
(starts_with(v, "languages > goodies")
and not string.find(v, "goodies > properties:")
and not string.find(v, "goodies > tag")
-- goodies > tag
)
then table.insert(result, v) end
end
return result
end
-- String cleaning
-- wrapper functions
function cleanLines (xs)
local result = {}
for k,v in pairs(xs) do
table.insert(result, cleanLine(v))
end
return result
end
function cleanLine (x)
--return removeTrailingPunctuation(getWord(x)) -- leave out for now
return getWord(x)
end
-- 1. Start reading at colon
function getWord(x)
-- wir lesen das Wort nach dem Doppelpunkt
return string.match(x, ".*: (%w+)")
end
-- 2. Remove trailing punctuation
function removeTrailingPunctuation (x)
if string.find(x, ',') then
return x:sub(1, -2)
else
return x
end
end
-- test if word is in list
function inList (x, list)
for k,v in ipairs(list) do
if v == x then
return true
end
end
return nil
end
-- Filter words based on second list (whitelist)
function filterLigaturesWordlist (xs, list)
local result = {}
for k,v in ipairs(xs) do
if (hasMaybeLigature(v) and not inList(v, list) and not inList(v, result)) then table.insert(result, v) end
end
return result
end
-- keep only words with potential ligatures
function hasMaybeLigature (x)
return
string.find(x, "fi")
or string.find(x, "fl")
or string.find(x, "ff")
-- or string.find(x, "ffi")
-- or string.find(x, "ffl")
or string.find(x, "ft")
-- or string.find(x, "fft")
or string.find(x, "fb")
-- or string.find(x, "ffb")
or string.find(x, "fh")
-- or string.find(x, "ffh")
or string.find(x, "fj")
-- or string.find(x, "ffj")
or string.find(x, "fk")
-- or string.find(x, "ffk")
end
--
function saveResultsToFile(results, output_file)
-- Opens a file in write mode
output_file = io.open(output_file, "w")
-- sets the default output file as output_file
io.output(output_file)
-- iterate oiver
for k,v in ipairs(results) do
io.write(v..'\n')
end
-- closes the open file
io.close(output_file)
end
-- Run
main(arg[1], arg[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment