Skip to content

Instantly share code, notes, and snippets.

@randrews
Last active April 18, 2023 04:32
Show Gist options
  • Save randrews/26d4a5cd2a59b95bb376 to your computer and use it in GitHub Desktop.
Save randrews/26d4a5cd2a59b95bb376 to your computer and use it in GitHub Desktop.
Split a telegram string into component words
function load_dictionary(filename)
local dictionary = {}
for line in io.lines(filename) do
if line:match("^%l%l+$") then
insert(dictionary, line .. "$")
end
end
-- We're cutting all single-letter words, but
-- two of them are real words
insert(dictionary, "a$")
insert(dictionary, "i$")
return dictionary
end
function insert(dictionary, word)
local first = word:sub(1,1)
dictionary[first] = dictionary[first] or {}
if first ~= "$" then
insert(dictionary[first], word:sub(2))
end
end
local dictionary = load_dictionary("/usr/share/dict/words")
--------------------------------------------------
function find_words(telegram, dictionary)
local words = {} -- list of tuples, {start, word}
for start = 1, #telegram do
local current_index = start
local current_letter = telegram:sub(start, start)
local current_node = dictionary
while current_node[ current_letter ] do
current_node = current_node[ current_letter ]
if current_node['$'] then
table.insert(words, {start, telegram:sub(start, current_index)})
end
current_index = current_index + 1
current_letter = telegram:sub(current_index, current_index)
end
end
return words
end
function print_words(words)
for _,w in ipairs(words) do
for n=1, w[1]-1 do io.write('-') end
print(w[2])
end
end
function cull_words(words, telegram_length)
-- Map from start / end position, to word count
local by_start = setmetatable( {}, {__index=function() return 0 end} )
local by_end = setmetatable( {}, {__index=function() return 0 end} )
-- Initialize these counts
for _, w in ipairs(words) do
local l = w[1]
local r = w[1] + #(w[2]) - 1
by_start[l] = by_start[l] + 1
by_end[r] = by_end[r] + 1
end
-- Loop until we run out of things to remove
while true do
local new_words = {}
for i, w in ipairs(words) do
local left = w[1]
local right = w[1] + #(w[2]) - 1
local left_neighbor = (left == 1) or (by_end[left-1] > 0)
local right_neighbor = (right == telegram_length) or (by_start[right+1] > 0)
if left_neighbor and right_neighbor then
table.insert(new_words, w)
else
by_start[left] = by_start[left] - 1
by_end[right] = by_end[right] - 1
end
end
if #new_words == #words then break end
words = new_words
end
return words
end
function build_word_tree(words)
local function find_by_start(start)
local arr = {}
for _,w in ipairs(words) do
if w[1] == start then table.insert(arr, w[2]) end
end
return arr
end
local function tree_for_start(start)
local tree = {}
for _, w in ipairs(find_by_start(start)) do
tree[w] = tree_for_start( start + #w )
end
return tree
end
return tree_for_start(1)
end
function print_tree(word_tree)
local strings = {} -- The paths through the tree
local function dft(path, current_node)
for word, children in pairs(current_node) do
local new_path = path and path .. " " .. word or word
if not next(children) then
table.insert(strings, new_path)
else
dft(new_path, children)
end
end
end
dft(nil, word_tree)
for _, s in ipairs(strings) do
print(s)
end
end
--------------------------------------------------
function parse_telegram(telegram)
local words = find_words(telegram, dictionary)
local culled = cull_words(words, #telegram)
local tree = build_word_tree(culled)
print_tree(tree)
end
--------------------------------------------------
parse_telegram("turkeytrotstowaterwhereistaskforcethirtyfourtheworldwonders")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment