Skip to content

Instantly share code, notes, and snippets.

@soasme
Created March 5, 2022 07:25
Show Gist options
  • Save soasme/bb2873ea4f6cd10ed8b9a9e4427006d5 to your computer and use it in GitHub Desktop.
Save soasme/bb2873ea4f6cd10ed8b9a9e4427006d5 to your computer and use it in GitHub Desktop.
---- MODULE VirtualMachine ----
VM_Version == "1.0.0"
LOCAL INSTANCE Sequences
LOCAL INSTANCE Integers
LOCAL INSTANCE TLC
CONSTANT PC
CONSTANT SUBJ
VARIABLES pc, \* index of PC
subj, \* index of SUBJ
err, \* err code, halt immediately
match, \* if not match, back track
refs,
callstack,
capstack,
precstack
Vars == <<pc, subj, match, err, refs, callstack, capstack, precstack>>
CallStack == INSTANCE Stack WITH arr <- callstack
NatStackEntry(n) == [type |-> "nat", pc |-> n, pos |-> 0, captures |-> <<>>, cut |-> FALSE]
BacktrackStackEntry(p, pos, captures, precs, cut) == [type |-> "backtrack", pc |-> p, pos |-> pos, captures |-> captures, precs |-> precs, cut |-> cut]
CapStack == INSTANCE Stack WITH arr <- capstack
LeftCapEntry(p, pos) == [type |-> "L", pc |-> p, pos |-> pos]
RightCapEntry(p, pos) == [type |-> "R", pc |-> p, pos |-> pos]
PrecStack == INSTANCE Stack WITH arr <- precstack
Inst == PC[pc]
Code == Inst[1]
Label == Inst[2]
LabelEx == Inst[3]
ErrInvalidInstCode == 1
ErrNoMatch == 2
ErrNothingToCommit == 3
ErrEndOfText == 4
ErrUnpairPrec == 5
\* TLA Plus does not support very complex string processing so we have limited support
\* in this spec. The real implementation can compare the ascii code of the characters.
CompareChar(a, b) == LET chrs == {"\t", "\r", "\n", " ", "!", "\"", "#", "$", "'", "(", ")", "*", "+", ",", "-", ".", "/",
"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@",
"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z",
"[", "]", "^", "_",
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z",
"{", "|", "}", "~"}
aIdx == CHOOSE idx \in 1..Len(chrs): chrs[idx] = a
bIdx == CHOOSE idx \in 1..Len(chrs): chrs[idx] = b
IN aIdx > bIdx
\* Find nth char in str. N is 1-based.
FindNth(str, idx) == IF idx <= Len(str) THEN SubSeq(str, idx, idx) ELSE ""
\* A matching pair of l & r in stack means l.type is L and r.type is R and all inners are matching.
RECURSIVE MatchCapture(_, _, _)
MatchCapture(stack, l, r) == /\ stack[l].type = "L"
/\ stack[r].type = "R"
/\ \/ l + 1 = r
\/ MatchCapture(stack, l + 1, r - 1)
FindLeftCapture(stack, idx) == CHOOSE idx_l \in 1..idx: MatchCapture(stack, idx_l, idx)
HandleReturn == IF Len(callstack) = 0
THEN UNCHANGED Vars /\ PrintT(Vars) \* [][VM_Next]_Vars stutters
ELSE /\ pc' = CallStack!SPeek.pc
/\ CallStack!SPop
/\ UNCHANGED <<subj, match, err, refs, capstack, precstack>>
HandleChar == IF FindNth(SUBJ, subj) = Label
THEN /\ subj' = subj + 1
/\ pc' = pc + 1
/\ UNCHANGED <<match, err, refs, callstack, capstack, precstack>>
ELSE /\ match' = FALSE
/\ PrintT(<<"Char not match", "pc", pc, PC[pc]>>)
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>>
HandleJump == /\ pc' = pc + Label
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack, precstack>>
HandleCall == /\ pc' = pc + Label
/\ CallStack!SPush(NatStackEntry(pc + 1))
/\ UNCHANGED <<subj, match, err, refs, capstack, precstack>>
HandleChoice == /\ pc' = pc + 1
/\ CallStack!SPush(BacktrackStackEntry(pc+Label, subj, capstack, precstack, FALSE))
/\ UNCHANGED <<subj, match, err, refs, capstack, precstack>>
HandleCommit == IF CallStack!SEmpty
THEN /\ err' = ErrNothingToCommit
/\ PrintT(<<"Nothing to commit", "pc", pc, PC[pc]>>)
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
ELSE /\ pc' = pc + Label
/\ CallStack!SPop
/\ match' = TRUE
/\ UNCHANGED <<subj, err, refs, capstack, precstack>>
HandleBackCommit == /\ pc' = pc + Label
/\ subj' = CallStack!SPeek.pos
/\ capstack' = CallStack!SPeek.captures
/\ CallStack!SPop
/\ UNCHANGED <<match, err, refs, precstack>>
HandlePrecedencePush == IF Label = 0 \/ PrecStack!SEmpty
THEN /\ PrecStack!SPush(Label)
/\ pc' = pc + 1
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack>>
ELSE LET prec == Label
assoc == LabelEx
IN IF \/ PrecStack!SPeek < prec
\/ assoc = "R" /\ PrecStack!SPeek = prec
THEN /\ PrecStack!SPush(prec)
/\ pc' = pc + 1
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack>>
ELSE /\ match' = FALSE
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>>
HandlePrecedencePop == IF PrecStack!SEmpty
THEN /\ err' = ErrUnpairPrec
/\ PrintT(<<"Unpair Precedence Pop", Code>>)
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
ELSE /\ PrecStack!SPop
/\ pc' = pc + 1
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack>>
HandleLeftCapture == /\ pc' = pc + 1
/\ CapStack!SPush(LeftCapEntry(pc, subj))
/\ UNCHANGED <<subj, match, err, refs, callstack, precstack>>
HandleRightCapture == /\ pc' = pc + 1
/\ CapStack!SPush(RightCapEntry(pc, subj))
/\ UNCHANGED <<subj, match, err, refs, callstack, precstack>>
\* find capstack top R and its pairing L, associate two indices with an ID globally.
HandleDefineReference == LET r == Len(capstack)
l == FindLeftCapture(capstack, r)
IN /\ pc' = pc + 1
/\ refs' = [n \in DOMAIN refs \union {Label} |-> IF n = Label THEN [l |-> l, r |-> r] ELSE refs[n]]
/\ UNCHANGED <<subj, match, err, callstack, capstack, precstack>>
\* check if a reference has been defined. consume nothing.
HandleHasReference == IF Label \in DOMAIN refs
THEN /\ pc' = pc + 1
/\ UNCHANGED <<subj, match, err, refs,callstack, capstack, precstack>>
ELSE /\ match' = FALSE
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>>
\* compare the substring indexed by ID.
HandleBackReference == LET slice == refs[Label]
len == slice.r - slice.l
patt == SubSeq(SUBJ, slice.l, slice.r - 1)
text == SubSeq(SUBJ, subj, subj + len - 1)
IN IF patt = text
THEN /\ pc' = pc + 1
/\ subj' = subj + len
/\ UNCHANGED <<match, err, refs, callstack, capstack, precstack>>
ELSE /\ match' = FALSE
/\ PrintT(<<"backref not match", slice, len, patt, text>>)
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>>
HandleDropCapture == LET r == Len(capstack)
l == FindLeftCapture(capstack, r)
IN /\ pc' = pc + 1
/\ capstack' = IF l = 1 THEN <<>> ELSE SubSeq(capstack, 1, l-1)
/\ UNCHANGED <<subj, err, match, refs, callstack, precstack>>
HandleCut == /\ pc' = pc + 1
/\ IF CallStack!SEmpty THEN UNCHANGED callstack ELSE CallStack!SUpdateTop(BacktrackStackEntry(CallStack!SPeek.pc, CallStack!SPeek.pos, CallStack!SPeek.captures, CallStack!SPeek.precs, TRUE))
/\ UNCHANGED <<subj, err, match, refs, capstack, precstack>>
HandleLoopSet == IF FindNth(SUBJ, subj) \in Label
THEN /\ subj' = subj + 1
/\ UNCHANGED <<pc, err, match, refs, callstack, capstack, precstack>>
ELSE /\ pc' = pc + 1
/\ UNCHANGED <<subj, err, match, refs, callstack, capstack, precstack>>
HandleSet == IF FindNth(SUBJ, subj) \in Label
THEN /\ pc' = pc + 1
/\ subj' = subj + 1
/\ UNCHANGED <<err, match, refs, callstack, capstack, precstack>>
ELSE /\ match' = FALSE
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>>
HandleNoOp == /\ pc' = pc + 1
/\ UNCHANGED <<subj, match, err, refs, callstack, capstack, precstack>>
HandleAny == IF subj + Label <= Len(SUBJ) + 1
THEN /\ subj' = subj + Label
/\ pc' = pc + 1
/\ UNCHANGED <<match, err, refs, callstack, capstack, precstack>>
ELSE /\ err' = ErrEndOfText
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
HandleLoop == /\ pc' = pc - Label
/\ CallStack!SUpdateTop(BacktrackStackEntry(CallStack!SPeek.pc, subj, capstack, precstack, FALSE))
/\ UNCHANGED <<match, subj, err, refs, capstack, precstack>>
HandleFail == /\ match' = FALSE
/\ UNCHANGED <<pc, subj, err, refs, callstack, capstack, precstack>>
HandleInvalidInstCode == /\ err' = ErrInvalidInstCode
/\ PrintT(<<"Invalid Inst Code", Code>>)
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
HandleEndOfText == /\ err' = ErrEndOfText
/\ PrintT(<<"End of text", Code>>)
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
HandleExit == IF CallStack!SEmpty
THEN /\ err' = ErrNoMatch
/\ IF Len(callstack) = 0 THEN PrintT(<<"Can't back track", "pc", pc, PC[pc]>>)
ELSE PrintT(<<"No match", "pc", CallStack!SPeek.pc-1, PC[CallStack!SPeek.pc-1]>>)
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
ELSE IF CallStack!SPeek.cut
THEN /\ err' = ErrNoMatch
/\ PrintT(<<"Cut", "pc", CallStack!SPeek.pc-1, PC[CallStack!SPeek.pc-1]>>)
/\ UNCHANGED <<pc, subj, match, refs, callstack, capstack, precstack>>
ELSE \/ /\ CallStack!SPeek.type = "nat"
/\ CallStack!SPop
/\ UNCHANGED <<pc, subj, err, refs, capstack, precstack>>
\/ /\ CallStack!SPeek.type = "backtrack"
/\ pc' = CallStack!SPeek.pc
/\ subj' = CallStack!SPeek.pos
/\ capstack' = CallStack!SPeek.captures
/\ precstack' = CallStack!SPeek.precs
/\ CallStack!SPop
/\ match' = TRUE
/\ UNCHANGED <<err, refs>>
VM_Invariant == err = 0
VM_Init == /\ pc = 1
/\ subj = 1
/\ err = 0
/\ match = TRUE
/\ refs = [n \in {} |-> TRUE]
/\ callstack = <<>>
/\ capstack = <<>>
/\ precstack = <<>>
VM_Next == IF match = FALSE THEN HandleExit
ELSE IF Code = "RET" THEN HandleReturn
ELSE IF Code = "CHAR" THEN HandleChar
ELSE IF Code = "JMP" THEN HandleJump
ELSE IF Code = "CALL" THEN HandleCall
ELSE IF Code = "FAIL" THEN HandleFail
ELSE IF Code = "CHOICE" THEN HandleChoice
ELSE IF Code = "CMT" THEN HandleCommit
ELSE IF Code = "NOP" THEN HandleNoOp
ELSE IF Code = "ANY" THEN HandleAny
ELSE IF Code = "LOOP" THEN HandleLoop
ELSE IF Code = "BCMT" THEN HandleBackCommit
ELSE IF Code = "PPSH" THEN HandlePrecedencePush
ELSE IF Code = "PPOP" THEN HandlePrecedencePop
ELSE IF Code = "LCAP" THEN HandleLeftCapture
ELSE IF Code = "RCAP" THEN HandleRightCapture
ELSE IF Code = "DREF" THEN HandleDefineReference
ELSE IF Code = "BREF" THEN HandleBackReference
ELSE IF Code = "HREF" THEN HandleHasReference
ELSE IF Code = "DCAP" THEN HandleDropCapture
ELSE IF Code = "CUT" THEN HandleCut
ELSE IF Code = "LSET" THEN HandleLoopSet
ELSE IF Code = "SET" THEN HandleSet
ELSE HandleInvalidInstCode
VM_Spec == VM_Init /\ [][VM_Next]_Vars
\* rule = "a"
VM_TestCase0_PC == <<
<<"CHAR", "a">>,
<<"RET">>
>>
VM_TestCase0_SUBJ == "a"
\* rule = "a"
VM_TestCase1_PC == <<
<<"JMP", 1>>,
<<"CHAR", "a">>,
<<"RET">>
>>
VM_TestCase1_SUBJ == "a"
\* rule = a
\* a = "a"
VM_TestCase2_PC == <<
<<"CALL", 2>>,
<<"RET">>,
<<"CHAR", "a">>,
<<"RET">>
>>
VM_TestCase2_SUBJ == "a"
\* rule = "a" / "A"
VM_TestCase3_PC == <<
<<"CHOICE", 3>>,
<<"CHAR", "a">>,
<<"CMT", 2>>,
<<"CHAR", "A">>,
<<"RET">>
>>
VM_TestCase3_SUBJ == "A"
\* rule = "a"? "b"
VM_TestCase4_PC == <<
<<"CHOICE", 3>>,
<<"CHAR", "a">>,
<<"CMT", 2>>,
<<"NOP">>,
<<"CHAR", "b">>,
<<"RET">>
>>
VM_TestCase4_SUBJ == "b"
VM_TestCase4_1_SUBJ == "ab"
\* rule = .
VM_TestCase5_PC == <<
<<"ANY", 1>>,
<<"RET">>
>>
VM_TestCase5_SUBJ == "a"
\* rule = "a"*
VM_TestCase6_PC == <<
<<"CHOICE", 3>>,
<<"CHAR", "a">>,
<<"LOOP", 1>>,
<<"RET">>
>>
VM_TestCase6_SUBJ == "aaaaaa"
\* rule = &"a" . "b"
VM_TestCase7_PC == <<
<<"CHOICE", 3>>,
<<"CHAR", "a">>,
<<"BCMT", 2>>,
<<"FAIL">>,
<<"ANY">>,
<<"CHAR", "b">>,
<<"RET">>
>>
VM_TestCase7_SUBJ == "ab"
VM_TestCase7_1_SUBJ == "bb"
\* rule = "+" "1" ^ 1 / "*" "1" ^ 2 / "^" "1" ^^ 3
VM_TestCase8_PC == <<
<<"CHAR", "1">>,
<<"CHOICE", 6>>,
<<"PPSH", 1, "L">>,
<<"CHAR", "+">>,
<<"CHAR", "1">>,
<<"PPOP">>,
<<"CMT", 14>>,
<<"CHOICE", 6>>,
<<"PPSH", 1, "L">>,
<<"CHAR", "*">>,
<<"CHAR", "1">>,
<<"PPOP">>,
<<"CMT", 8>>,
<<"CHOICE", 6>>,
<<"PPSH", 1, "R">>,
<<"CHAR", "^">>,
<<"CHAR", "1">>,
<<"PPOP">>,
<<"CMT", 2>>,
<<"FAIL">>,
<<"RET">>
>>
VM_TestCase8_SUBJ == "1+1"
VM_TestCase8_1_SUBJ == "1*1"
VM_TestCase8_2_SUBJ == "1^1"
\* rule = {"a"}
VM_TestCase9_PC == <<
<<"LCAP">>,
<<"CHAR", "a">>,
<<"RCAP">>,
<<"RET">>
>>
VM_TestCase9_SUBJ == "a"
\* rule = <REF_1 {"a"}> <REF_1> {<REF_1>}
VM_TestCase10_PC == <<
<<"LCAP">>,
<<"CHAR", "a">>,
<<"RCAP">>,
<<"DREF", 1>>,
<<"BREF", 1>>,
<<"LCAP">>,
<<"BREF", 1>>,
<<"RCAP">>,
<<"RET">>
>>
VM_TestCase10_SUBJ == "aaa"
VM_TestCase11_PC == <<
<<"CHAR", "a">>,
<<"LCAP">>,
<<"CHAR", "b">>,
<<"RCAP">>,
<<"DCAP">>, \* https://docs.racket-lang.org/peg/index.html | pop the last capture
<<"RET">>
>>
VM_TestCase11_SUBJ == "ab"
\* rule = "a" ~ "b" / "a" "c"
VM_TestCase12_PC == <<
<<"CHOICE", 5>>,
<<"CHAR", "a">>,
<<"CUT">>,
<<"CHAR", "b">>,
<<"CMT", 3>>,
<<"CHAR", "a">>,
<<"CHAR", "c">>,
<<"RET">>
>>
VM_TestCase12_SUBJ == "ac"
VM_TestCase13_PC == <<
<<"LSET", {"0", "1"}>>,
<<"RET">>
>>
VM_TestCase13_SUBJ == "01010101"
VM_TestCase14_PC == <<
<<"SET", {"0", "1"}>>,
<<"RET">>
>>
VM_TestCase14_SUBJ == "1"
\* rule = <REF_1 {"a"}> HREF_1
VM_TestCase15_PC == <<
<<"LCAP">>,
<<"CHAR", "a">>,
<<"RCAP">>,
<<"DREF", 1>>,
<<"HREF", 1>>,
<<"RET">>
>>
VM_TestCase15_SUBJ == "a"
================================
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment