Skip to content

Instantly share code, notes, and snippets.

@MrSimbax
Last active May 8, 2022 10:54
Show Gist options
  • Save MrSimbax/893f874540398b339f6f338449ba6271 to your computer and use it in GitHub Desktop.
Save MrSimbax/893f874540398b339f6f338449ba6271 to your computer and use it in GitHub Desktop.
LPEG example with simple error handling
--[[
# LPEG example with simple error handling
This script was inspired by an attempt to teach myself some LPEG.
## Problem statement
Given a list of character pairs (openChar, closeChar), create a parser for a language of well balanced pairs, i.e.
a language consisting of an empty string and of expressions starting with any openChar,
followed by a chunk or nothing, and ending with the corresponding closeChar.
For example, the list {("(", ")")} results in a language of well balanced parentheses {"", "()", "(())", "()()", ...}.
The list {("(", ")"), ("{", "}")} is a language of well balanced parentheses and braces, e.g. "({}){}{()()}".
Some tests are at the end.
## Constraints
* Use LPEG to create the parser
* Add error handling, i.e. print an error in case of failures with a reasonable position and stated reason
Error handling is tricky because lpeg.match can only have two results (success and captures, or failure).
Without modifying the LPEG library, a solution is to modify the grammar to include patterns for failure cases.
--]]
-- Solution
-- --------
local lpeg = require "lpeg"
-- Some simple useful patterns
local any = lpeg.P(1)
local endOfString = -any
-- Handle only the first error
-- LPEG machine will backtrack after failed match which might result in more than one error,
-- but any error other than the first is useless and confusing in this case.
-- The errorCount is global for simplicity, another way would be to give a mutable state to lpeg.match
-- as an extra argument and capture it when error is matched.
local errorCount
-- This pattern is used at the beginning of the grammar to reset the counter before matching the actual language.
local resetErr = lpeg.P(function ()
errorCount = 0
return true
end)
-- They key for error handling is to use lpeg.Cmt pattern.
-- Error is a factory which creates an error pattern.
-- It always immediately matches, captures the given handler + optional arguments, and calls the handler.
-- The result of the match is what is returned by the handler.
local function Error (handler, ...)
return lpeg.Cmt(lpeg.Cc(...), function (subject, pos, ...)
if errorCount > 0 then return end
errorCount = 1
return handler(subject, pos, ...)
end)
end
-- Error handlers
local function printError (subject, position, msg)
local prefix <const> = "error in string "
print(string.format("%s%q at position %d: %s", prefix, subject, position, msg))
print(string.rep(" ", #prefix + position) .. "^")
end
local function onUnexpectedCharacter (subject, pos, expectedChar)
local char = pos <= #subject and subject:sub(pos, pos) or "EOF"
local msg
if expectedChar and #expectedChar > 0 then
msg = string.format("expected %q, got %q", expectedChar, char)
else
msg = string.format("unexpected character: %q", char)
end
printError(subject, pos, msg)
end
local function onUnclosedChunk (subject, pos, chunkChar)
printError(subject, pos, string.format("unclosed %q", chunkChar))
end
-- Grammar factory
local function BalancedPairs (characters)
local grammar = {"expr",
expr = resetErr * lpeg.V"chunk"^0 * (endOfString + #any * Error(onUnexpectedCharacter)),
chunk = lpeg.P(false)
}
for openChar, closeChar in pairs(characters) do
grammar.chunk = grammar.chunk + lpeg.V(openChar)
grammar[openChar] = lpeg.P(openChar) * lpeg.V"chunk"^0 * (
lpeg.P(closeChar) +
#any * Error(onUnexpectedCharacter, closeChar) +
Error(onUnclosedChunk, openChar))
end
return lpeg.P(grammar)
end
-- Tests
-- -----
local grammar = BalancedPairs{
["("] = ")",
["{"] = "}",
["<"] = ">",
["["] = "]"
}
local function tobool (x)
return not not x
end
local failedTestsCount = 0
local function test (subject, expected)
local result = grammar:match(subject)
result = (tobool(result) == expected)
if not result then
failedTestsCount = failedTestsCount + 1
end
print(string.format("%q %s", subject, result and "passed" or "fail"))
end
-- basic
test("", true)
test("(", false) --> error in string "(" at position 2: unclosed "("
test(")", false) --> error in string ")" at position 1: unexpected character: ")"
test("()", true)
test("())", false) --> error in string "())" at position 3: unexpected character: ")"
test("(()", false) --> error in string "(()" at position 4: unclosed "("
test("(())", true)
test("()()", true)
-- basic with other character
test("[", false) --> error in string "[" at position 2: unclosed "["
test("]", false) --> error in string "]" at position 1: unexpected character: "]"
test("[]", true)
test("[]]", false) --> error in string "[]]" at position 3: unexpected character: "]"
test("[[]", false) --> error in string "[[]" at position 4: unclosed "["
test("[[]]", true)
test("[][]", true)
-- multiple characters
test("([])", true)
test("([)", false) --> error in string "([)" at position 3: expected "]", got ")"
test("(])", false) --> error in string "(])" at position 2: expected ")", got "]"
test("(<{}>)", true)
test("(<{}(<><>)>)", true)
test("(<{}(<<>)>)", false) --> error in string "(])" at position 2: expected ")", got "]"
-- non-sense
test("a", false) --> error in string "a" at position 1: unexpected character: "a"
test("(a)", false) --> error in string "(a)" at position 2: expected ")", got "a"
test("()a", false) --> error in string "()a" at position 3: unexpected character: "a"
test("()a[]", false) --> error in string "()a[]" at position 3: unexpected character: "a"
if failedTestsCount > 0 then
print(string.format("\n%d tests failed", failedTestsCount))
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment