Last active
May 8, 2022 10:54
-
-
Save MrSimbax/893f874540398b339f6f338449ba6271 to your computer and use it in GitHub Desktop.
LPEG example with simple error handling
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
# LPEG example with simple error handling | |
This script was inspired by an attempt to teach myself some LPEG. | |
## Problem statement | |
Given a list of character pairs (openChar, closeChar), create a parser for a language of well balanced pairs, i.e. | |
a language consisting of an empty string and of expressions starting with any openChar, | |
followed by a chunk or nothing, and ending with the corresponding closeChar. | |
For example, the list {("(", ")")} results in a language of well balanced parentheses {"", "()", "(())", "()()", ...}. | |
The list {("(", ")"), ("{", "}")} is a language of well balanced parentheses and braces, e.g. "({}){}{()()}". | |
Some tests are at the end. | |
## Constraints | |
* Use LPEG to create the parser | |
* Add error handling, i.e. print an error in case of failures with a reasonable position and stated reason | |
Error handling is tricky because lpeg.match can only have two results (success and captures, or failure). | |
Without modifying the LPEG library, a solution is to modify the grammar to include patterns for failure cases. | |
--]] | |
-- Solution | |
-- -------- | |
local lpeg = require "lpeg" | |
-- Some simple useful patterns | |
local any = lpeg.P(1) | |
local endOfString = -any | |
-- Handle only the first error | |
-- LPEG machine will backtrack after failed match which might result in more than one error, | |
-- but any error other than the first is useless and confusing in this case. | |
-- The errorCount is global for simplicity, another way would be to give a mutable state to lpeg.match | |
-- as an extra argument and capture it when error is matched. | |
local errorCount | |
-- This pattern is used at the beginning of the grammar to reset the counter before matching the actual language. | |
local resetErr = lpeg.P(function () | |
errorCount = 0 | |
return true | |
end) | |
-- They key for error handling is to use lpeg.Cmt pattern. | |
-- Error is a factory which creates an error pattern. | |
-- It always immediately matches, captures the given handler + optional arguments, and calls the handler. | |
-- The result of the match is what is returned by the handler. | |
local function Error (handler, ...) | |
return lpeg.Cmt(lpeg.Cc(...), function (subject, pos, ...) | |
if errorCount > 0 then return end | |
errorCount = 1 | |
return handler(subject, pos, ...) | |
end) | |
end | |
-- Error handlers | |
local function printError (subject, position, msg) | |
local prefix <const> = "error in string " | |
print(string.format("%s%q at position %d: %s", prefix, subject, position, msg)) | |
print(string.rep(" ", #prefix + position) .. "^") | |
end | |
local function onUnexpectedCharacter (subject, pos, expectedChar) | |
local char = pos <= #subject and subject:sub(pos, pos) or "EOF" | |
local msg | |
if expectedChar and #expectedChar > 0 then | |
msg = string.format("expected %q, got %q", expectedChar, char) | |
else | |
msg = string.format("unexpected character: %q", char) | |
end | |
printError(subject, pos, msg) | |
end | |
local function onUnclosedChunk (subject, pos, chunkChar) | |
printError(subject, pos, string.format("unclosed %q", chunkChar)) | |
end | |
-- Grammar factory | |
local function BalancedPairs (characters) | |
local grammar = {"expr", | |
expr = resetErr * lpeg.V"chunk"^0 * (endOfString + #any * Error(onUnexpectedCharacter)), | |
chunk = lpeg.P(false) | |
} | |
for openChar, closeChar in pairs(characters) do | |
grammar.chunk = grammar.chunk + lpeg.V(openChar) | |
grammar[openChar] = lpeg.P(openChar) * lpeg.V"chunk"^0 * ( | |
lpeg.P(closeChar) + | |
#any * Error(onUnexpectedCharacter, closeChar) + | |
Error(onUnclosedChunk, openChar)) | |
end | |
return lpeg.P(grammar) | |
end | |
-- Tests | |
-- ----- | |
local grammar = BalancedPairs{ | |
["("] = ")", | |
["{"] = "}", | |
["<"] = ">", | |
["["] = "]" | |
} | |
local function tobool (x) | |
return not not x | |
end | |
local failedTestsCount = 0 | |
local function test (subject, expected) | |
local result = grammar:match(subject) | |
result = (tobool(result) == expected) | |
if not result then | |
failedTestsCount = failedTestsCount + 1 | |
end | |
print(string.format("%q %s", subject, result and "passed" or "fail")) | |
end | |
-- basic | |
test("", true) | |
test("(", false) --> error in string "(" at position 2: unclosed "(" | |
test(")", false) --> error in string ")" at position 1: unexpected character: ")" | |
test("()", true) | |
test("())", false) --> error in string "())" at position 3: unexpected character: ")" | |
test("(()", false) --> error in string "(()" at position 4: unclosed "(" | |
test("(())", true) | |
test("()()", true) | |
-- basic with other character | |
test("[", false) --> error in string "[" at position 2: unclosed "[" | |
test("]", false) --> error in string "]" at position 1: unexpected character: "]" | |
test("[]", true) | |
test("[]]", false) --> error in string "[]]" at position 3: unexpected character: "]" | |
test("[[]", false) --> error in string "[[]" at position 4: unclosed "[" | |
test("[[]]", true) | |
test("[][]", true) | |
-- multiple characters | |
test("([])", true) | |
test("([)", false) --> error in string "([)" at position 3: expected "]", got ")" | |
test("(])", false) --> error in string "(])" at position 2: expected ")", got "]" | |
test("(<{}>)", true) | |
test("(<{}(<><>)>)", true) | |
test("(<{}(<<>)>)", false) --> error in string "(])" at position 2: expected ")", got "]" | |
-- non-sense | |
test("a", false) --> error in string "a" at position 1: unexpected character: "a" | |
test("(a)", false) --> error in string "(a)" at position 2: expected ")", got "a" | |
test("()a", false) --> error in string "()a" at position 3: unexpected character: "a" | |
test("()a[]", false) --> error in string "()a[]" at position 3: unexpected character: "a" | |
if failedTestsCount > 0 then | |
print(string.format("\n%d tests failed", failedTestsCount)) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment