Last active
August 29, 2015 14:00
-
-
Save fowlmouth/11284189 to your computer and use it in GitHub Desktop.
Some kind of crazy parsing thing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import fowltek/maybe_t, strutils | |
| type | |
| TMatcher* = proc(input:string;start:var int): TMaybe[string] | |
| Rule* = ref object | |
| matcher*: TMatcher | |
| proc newRule* (matcher: TMatcher): Rule = | |
| Rule(matcher: matcher) | |
| proc `&`* (a,b: Rule): Rule = | |
| # match `a` followed by `b` | |
| newRule(proc(input:string;start:var int): TMaybe[string] = | |
| let ZZ = start | |
| let x = a.matcher(input,start) | |
| if x.has: | |
| let y = b.matcher(input,start) | |
| if y.has: | |
| return just(x.val & y.val) | |
| start = ZZ | |
| ) | |
| proc `|`* (a,b: Rule): Rule = | |
| # match `a` or `b` | |
| newRule(proc(input:string; start:var int): TMaybe[string] = | |
| let ZZ = start | |
| let x = a.matcher(input,start) | |
| if x.has: | |
| return just(x.val) | |
| start = ZZ | |
| let y = b.matcher(input,start) | |
| if y.has: | |
| return just(y.val) | |
| start = ZZ | |
| ) | |
| proc str* (s:string): Rule = | |
| # match a string | |
| newRule(proc(input:string; start:var int): TMaybe[string] = | |
| if input.continuesWith(s, start): | |
| result = just(s) | |
| inc start, len(s) | |
| ) | |
| proc chr* (chrs: varargs[char]): Rule = | |
| # match one of a set of characters | |
| let chrs = @chrs | |
| newRule(proc(input:string;start:var int): TMaybe[string] = | |
| if input[start] in chrs: | |
| result = just($ input[start]) | |
| inc start | |
| ) | |
| proc chr* (chr: set[char]): Rule = | |
| # match one of a set of characters | |
| newRule(proc(input: string; start: var int): TMaybe[string] = | |
| if input[start] in chr: | |
| result = just($ input[start]) | |
| inc start | |
| ) | |
| proc repeat* (R:Rule; min: int): Rule = | |
| # Repeat a minimum `min` times | |
| newRule(proc(input:string; start:var int): TMaybe[string] = | |
| var i = 0 | |
| let ZZ = start | |
| result.val = "" | |
| while true: | |
| let ret = r.matcher(input,start) | |
| if not ret.has: break | |
| result.val.add ret.val | |
| inc i | |
| if start > input.high: break | |
| if i >= min: | |
| result.has = true | |
| else: | |
| start = ZZ | |
| ) | |
| template reset_return : stmt = | |
| result.has = false | |
| result.val = "" | |
| return | |
| proc repeat* (R:Rule; slc: TSlice[int]): Rule = | |
| # Repeat a rule min..max times | |
| newRule(proc (input:string; start:var int): TMaybe[string] = | |
| let ZZ = start | |
| var i = 0 | |
| result.val = "" | |
| while i <= slc.b: | |
| if start > input.high: break | |
| if (let ret = r.matcher(input,start); ret.has): | |
| result.val.add ret.val | |
| else: break | |
| inc i | |
| if i >= slc.a: | |
| result.has = true | |
| else: | |
| start = ZZ | |
| ) | |
| proc `+`* (rule: Rule): Rule = | |
| # match 1 or more | |
| result = rule.repeat(1) | |
| discard """ newRule(proc (input:string; start:var int): TMaybe[string] = | |
| result.val = "" | |
| while true: | |
| let (has,str) = rule.matcher(input,start) | |
| if has: | |
| result.has = true | |
| result.val.add str | |
| if not has or start > input.high: | |
| break | |
| ) """ | |
| proc many* (R:Rule): Rule {.inline.} = | |
| # match 1 or more | |
| +r | |
| proc `?`* (R:Rule): Rule = | |
| # consume 1 or 0 (always pass) | |
| newRule(proc(input:string; start:var int):TMaybe[string]= | |
| result = just(r.matcher(input,start).val) | |
| if result.val.isNil: result.val = "" | |
| ) | |
| proc maybe* (R:Rule): Rule {.inline.}= | |
| # consume 1 or 0 (always pass) | |
| ?r | |
| proc `*`* (R:Rule): Rule {.inline.} = | |
| # consume 0 or many (always pass) | |
| # this is just `?many(R)` | |
| #?many(R) | |
| repeat(R, 0) | |
| proc present* (R:Rule): Rule = | |
| # look ahead, returns true if the pattern matches but does | |
| # not consume | |
| newRule(proc(input:string; start:var int): TMaybe[string] = | |
| let x = start | |
| if r.matcher(input,start).has: | |
| result = just("") | |
| start = x | |
| ) | |
| proc absent* (R:Rule): Rule = | |
| # look ahead only, returns true if the pattern was NOT found | |
| newRule(proc(input:string; start:var int): TMaybe[string] = | |
| if not present(R).matcher(input,start).has: | |
| result = just("") | |
| ) | |
| proc `!`* (R: Rule): Rule {.inline.} = | |
| absent(R) | |
| proc tag (R:Rule; tag:string):Rule = | |
| # TODO | |
| # this will be used to store a match in the resulting tree | |
| # the tree will probably be json because its easy to work with | |
| Rule( | |
| matcher:proc(input:string;start:var int):TMaybe[string] = | |
| result = R.matcher(input,start) | |
| if result.has: | |
| #echo "tag discarded: ", tag, ": ", result.val | |
| ) | |
| when defined(HelperRules) or isMainModule: | |
| # Some helper rules | |
| const | |
| printableChars = {'\x20' .. '\x7E'} | |
| alphaChars = {'A'..'Z', 'a'..'z'} | |
| digits = {'0'..'9'} | |
| identChars = {'A'..'Z','a'..'z','0'..'9','_'} | |
| allChars = {'\x00' .. '\xFF'} | |
| proc join* (r, on: Rule; min = 0; max = -1): Rule = | |
| # Join a rule on another rule in the sequence | |
| # `on & r` must repeat `min` times | |
| # `max` may be -1 | |
| if max > -1: r & (on & r).repeat(min .. max) | |
| else: r & (on & r).repeat(min) | |
| proc comma_list* (r: Rule; min = 0; max = -1): Rule = | |
| join(r, chr(','), min,max) | |
| proc wrapped_rule (open,close:Rule): proc(inner:Rule): Rule = | |
| # Creates a rule that matches like `open & inner & close` | |
| return proc(inner:Rule): Rule = | |
| when true: | |
| let | |
| open = open | |
| close = close | |
| Rule( | |
| matcher: proc(input:string; start:var int): TMaybe[string] = | |
| let x = start | |
| if open.matcher(input,start): | |
| result = inner.matcher(input,start) | |
| if result.has: | |
| if close.matcher(input,start): | |
| return | |
| #reset the result because the closing rule didnt match | |
| result.has = false | |
| start = x | |
| ) | |
| let | |
| braces* = wrapped_rule(chr('{'), chr('}')) | |
| parens* = wrapped_rule(chr('('), chr(')')) | |
| brackets* = wrapped_rule(chr('['), chr(']')) | |
| proc add (L:var string; R:TMaybe[string]) = | |
| if r.has: L.add r.val | |
| discard """ proc comma_list* (R:Rule; postComma: Rule = nil): Rule = | |
| newRule(proc(input:string; start:var int):TMaybe[string] = | |
| result.val = "" | |
| while(let (has,s) = r.matcher(input,start); has): | |
| result.has = true | |
| result.val.add s | |
| if input[start] != ',': | |
| break | |
| result.val.add ',' | |
| inc start | |
| if not PostComma.isNil: | |
| result.val.add postComma.matcher(input,start) | |
| ) | |
| """ | |
| proc keyword* (s: string): Rule = | |
| str(s) & absent(chr(identChars)) | |
| proc stri* (s: string): Rule = | |
| # case insensitive str | |
| # probably more efficient to use a regex rule here | |
| template accum (x): stmt = | |
| if result.isNil: | |
| result = x | |
| else: | |
| result = result & x | |
| for character in s.items: | |
| if character in alphaChars: | |
| accum chr({character.toLower, character.toUpper}) | |
| else: | |
| accum chr(character) | |
| proc newRule*(): Rule = Rule() | |
| proc `:=`* (a,b:Rule) = | |
| # update rule a, set its matcher to rule b | |
| # you can use this to refer to rules before | |
| # they're initialized. | |
| a.matcher = b.matcher | |
| import macros | |
| macro grammar* (name:expr; body:stmt):stmt {.immediate.} = | |
| # accepts a list of statements like | |
| # | |
| # hello := "Hello" | |
| # digits <- many(chr( {'0' .. '9'} )) | |
| # | |
| # you can refer to a rule here before it is defined | |
| # | |
| assert body.kind == nnkStmtList | |
| result = newStmtList() | |
| let varDecl = newNimNode(nnkVarSection) | |
| result.add varDecl | |
| for i in 0 .. < len(body): | |
| let s = body[i] | |
| if s.kind == nnkInfix and $(s[0]) in [":=","<-"]: | |
| varDecl.add newIdentDefs(s[1], newEmptyNode(), newCall("newRule")) | |
| result.add s[1].infix(":=", s[2]) | |
| elif s.kind == nnkAsgn: | |
| varDecl.add newIdentDefs(s[0], newEmptyNode(), newCall("newRule")) | |
| result.add s[0].infix(":=", s[1]) | |
| else: | |
| result.add s | |
| when defined(Debug): | |
| echo repr(result) | |
| proc match* (R:Rule; S:String): TMaybe[string] = | |
| var i = 0 | |
| return r.matcher(s,i) | |
| when isMainModule: | |
| const | |
| spaceChars = {' ','\t'} | |
| grammar(my_grammar): | |
| #so meta | |
| program = | |
| +(space_newlines | rule_decl) & ?space_newlines | |
| rule_decl = | |
| name & ?space & | |
| (str("=") | str(":=") | str("<-")) & | |
| ?space & mainRule | |
| nl = chr({'\L'}) | |
| space = | |
| +chr(spaceChars) | |
| space_newlines = | |
| +( | |
| space | nl | | |
| (chr('#') & +(absent(chr({'\x00','\L'})) & chr(allChars))) | |
| ) | |
| name = chr({'A'..'Z','a'..'z','_'}) & *chr({'A'..'Z','a'..'z','_','0'..'9'}) | |
| mainRule = join(prefixRule, ?space & bin_op & ?space_newlines, 0) | |
| prefixChar = chr({'?','!','+','*'}) | |
| bin_op = chr({'&','|'}) | |
| prefixRule = ?(prefixChar & *(space & prefixChar)) & ?space & baseRule | |
| func_chr = | |
| keyword("chr") & | |
| parens( | |
| (?space & nim_char_set & ?space) | | |
| comma_list(?space & nim_char_lit & ?space) | |
| ) | |
| baseRule = | |
| ( keyword("str") & parens(?space & str_lit & ?space) ) | | |
| func_chr | | |
| ( (keyword("comma") )) | | |
| ( parens(mainRule) ) | |
| str_lit = | |
| chr('\"') & | |
| (repeat( | |
| ( chr('\\')& ( | |
| chr('"') | | |
| (chr('x') & repeat(chr({'A'..'F','a'..'f','0'..'9'}), 2..2)) | |
| )) | | |
| chr(allChars-{'"'}), 0 | |
| )) & | |
| chr('\"') | |
| nim_char_lit = | |
| chr('\'') & | |
| ( | |
| (chr('\\') & | |
| ( chr({'\'','L'}) | | |
| chr('x') & repeat(chr({'A'..'F','a'..'f','0'..'9'}), 2..2) | |
| ) | |
| ) | | |
| chr(allChars-{'\''}) | |
| ) & | |
| chr('\'') | |
| nim_char_set = | |
| braces(comma_list(?space & nim_char_lit & ?(?space & str("..") & ?space & nim_char_lit), 0)) | |
| comma = chr(',') | |
| let r = program #(?space & mainrule & ?space) | |
| echo r.match("""rule1 = ?chr('x')""") | |
| when defined(repl): | |
| import rdstdin,tables | |
| block: | |
| const helptext = """ | |
| Commands: | |
| def-rules | |
| show-rules | |
| """ | |
| echo helptext | |
| var rules = initTable[string,Rule]() | |
| var input = "" | |
| var line: string | |
| while readlineFromStdin("> ", line): | |
| if (line.len == 0 and input.len > 0) or keyword("end").match(line): | |
| let m = program.match(input) | |
| echo m | |
| input.setLen 0 | |
| else: | |
| input.add line | |
| input.add '\L' | |
| #quit 1 | |
| # minitest thing | |
| template test (name:expr; body:stmt): stmt {.immediate.} = | |
| block thisBlock: | |
| template check (cond): stmt = | |
| if not cond: | |
| echo name, " [Failed] `", astToStr(cond), '`' | |
| break thisBlock | |
| body | |
| echo name, " [Passed]" | |
| template testAll (name:expr; body:stmt): stmt{.immediate.}= | |
| # like test but checks can fail | |
| block fowl_is_cool: | |
| template check(cond): stmt = | |
| when not definedInScope(failed): | |
| var failed{.inject.} = newSeq[string](0) | |
| if not cond: | |
| failed.add astToStr(cond) | |
| body | |
| if failed.len > 0: | |
| if failed.len == 1: | |
| echo name, " [", failed.len, " Failed] `", failed[0], '`' | |
| else: | |
| echo name, " [", failed.len, " Failed]" | |
| for expression in failed: | |
| echo " `", expression, '`' | |
| failed.setLen 0 | |
| else: | |
| echo name, " [Passed]" | |
| block: | |
| const digit = {'0'..'9'} | |
| grammar(sendy): | |
| ws_soft = +chr(' ','\t') | |
| ws = +chr(' ','\t','\L') | |
| anyChar = chr({'\x00'..'\xFF'}) | |
| operator = +chr({'!','@','~','$','%','^','&','*','-','+','\\','/','<','>','?','.'}) | |
| assignment_op = +operator & chr('=') | |
| literal = str_lit | float_lit | int_lit | |
| float_lit = ?chr({'-','+'}) & +chr(digit) & chr('.') & +chr(digit) | |
| int_lit = ?chr({'-','+'}) & +chr(digit) | |
| str_lit = | |
| chr('"') & | |
| *('"'.chr.absent & anyChar) & | |
| chr('"') | |
| ident = chr({'A'..'Z','a'..'z','_'}) & *chr({'A'..'Z','a'..'z','_','-','0'..'9'}) | |
| message = | |
| ident & | |
| ?parens(comma_list(?ws & expression & ?ws, 0) | ?ws) | |
| base_expr = literal | parens(?ws & expression & ?ws) | |
| postfix_expr = | |
| (base_expr & *(ws_soft & message)) | | |
| message.join(ws_soft) | |
| prefix_expr = ?(operator & ?ws_soft) & postfix_expr | |
| binary_expr = prefix_expr & ?many(?ws_soft & operator & ?ws & prefix_expr) | |
| expression = binary_expr | |
| testall "Idents": | |
| check ident.match("x") | |
| check ident.match("print-line") | |
| testall "expressions": | |
| check expression.match "1 + 2" | |
| check expression.match "-1 abs" | |
| check expression.match("x foo(42)") | |
| check expression.match("""stdout print-line("Hello")""") | |
| check expression.match("actor message(foo, 1) linked-message(1.2)") | |
| quit 0 | |
| # example usage for a weird js-like language | |
| # there is a test suite below | |
| block: | |
| grammar(js_like): | |
| ws <- | |
| many(chr({' ', '\t', '\L'})) | |
| ws_soft <- | |
| many(chr({' ', '\t'})) | |
| ident <- | |
| chr({'A'..'Z', 'a'..'z', '_'}) & *chr(identChars) | |
| int_lit <- | |
| +chr(digits) | |
| str_lit <- | |
| chr('"') & | |
| *(absent(chr('"')) & chr({char.low..char.high})) & | |
| chr('"') | |
| operator <- +chr({'!','@','~','$','%','^','&','*','-','+','\\','/','<','>','?','='}) | |
| base_expr <- | |
| int_lit | ident | str_lit | parens(?ws & expression & ?ws) | |
| postfix_expr <- | |
| base_expr & | |
| ?many( | |
| ( | |
| chr('.') & | |
| ident | |
| ) | | |
| ( | |
| parens(?ws & ?(commaList(?ws & expression & ?ws_soft,0) & ?ws)) | |
| ) | | |
| ( | |
| brackets(?ws & commaList(?ws & expression & ?ws_soft, 0).maybe) | |
| ) | |
| ) | |
| prefix_expr <- | |
| ?(operator & ?ws) & postfix_expr | |
| binary_expr <- | |
| prefix_expr & ?many(?ws_soft & operator & ?ws & prefix_expr) | |
| expression <- | |
| binary_expr | |
| braces_statements <- | |
| braces(?ws & statement_list & ?ws) | |
| statements_or_single <- | |
| (?ws & braces_statements) | | |
| (?ws & statement) | |
| assignment <- | |
| ident & ?ws_soft & | |
| chr('=') & ?ws & | |
| expression | |
| if_statement <- | |
| ( | |
| keyword("if") & ?ws & expression & | |
| statements_or_single | |
| ) & | |
| *( | |
| ?ws & keyword("elseif") & ws & expression & | |
| statements_or_single | |
| ) & | |
| ?( | |
| ?ws & keyword("else") & | |
| statements_or_single | |
| ) | |
| while_statement <- | |
| keyword("while") & ws & expression & statements_or_single | |
| return_statement <- | |
| keyword("return") & ws & expression | |
| statement <- | |
| return_statement | | |
| if_statement | | |
| while_statement | | |
| assignment | | |
| expression | |
| statement_list <- | |
| join(statement, ?ws_soft & chr({'\L',';'}) & ?ws) | |
| func_def <- | |
| keyword("fn") & ws & | |
| ident.tag("name") & ?ws & | |
| parens(comma_list(?ws & ident & ?ws) | ?ws) & ?ws & | |
| braces(?ws & statement_list & ?ws) | |
| toplevel_stmt <- | |
| func_def | |
| program <- | |
| ?ws & join(toplevel_stmt, ?ws, 0) & ?ws | |
| proc `=~` (S:String; L:Rule): bool = | |
| var i = 0 | |
| result = l.matcher(s, i) | |
| if result and i < s.len: | |
| result = false | |
| testAll "stri()": | |
| check stri("FoO").match("foo") | |
| check stri("FoO").match("FOO") | |
| testAll "Numbers": | |
| check match(int_lit, "1") | |
| check match(int_lit, "6969") | |
| check match(expression, "911") | |
| test "Strings": | |
| check "\"\"" =~ str_lit | |
| check "\"hi\"" =~ str_lit | |
| check(not("\"hi" =~ str_lit)) | |
| test "Identifiers": | |
| check match(ident, "u") | |
| check match(ident, "x_y") | |
| check(not match(ident, "3e")) | |
| testAll "Whitespace": | |
| check match(?ws, " ") | |
| check match(?ws, "") | |
| check match(?ws_soft, " ") | |
| check match(?ws_soft, "") | |
| check(not match(ws_soft, "\L")) | |
| testAll "Assignment": | |
| check "y=2" =~ assignment | |
| check "x =\L 4" =~ assignment | |
| testAll "Expressions": | |
| check "42" =~ expression | |
| check "x + 1" =~ expression | |
| check "1 + (2-3)" =~ expression | |
| check "true" =~ expression | |
| check "hello(1)" =~ expression | |
| check "sup()" =~ expression | |
| testAll "Statements": | |
| check "if x {\L 42\L }" =~ statement | |
| check "if true {1} elseif false {0}" =~ statement | |
| check "if true {1} elseif false {0} else {3}" =~ statement | |
| check "42+1" =~ statements_or_single | |
| check "while false {print(\"Hello!\"); break}" =~ statement | |
| testAll "Function defintion": | |
| check("fn no_args() { return 1 }\L" =~ func_def & ?ws) | |
| check("fn two_args(a,b) {1+2}" =~ func_def) | |
| testAll "Program": | |
| check("""fn main() { | |
| print("Hello") | |
| } | |
| fn fib(x) { | |
| if x < 2 { return x } | |
| return fib(x-1)+fib(x-2) | |
| } | |
| """ =~ program) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import | |
| json,strutils, | |
| fowltek.maybe_t | |
| type | |
| TInput = object | |
| len, pos: int | |
| str: string | |
| TMatchKind* = enum | |
| mUnrefined, mJson | |
| TPositiveMatch = object | |
| case kind*: TMatchKind | |
| of mUnrefined: str*:string | |
| of mJSON: j*: PJsonNode | |
| TMatchResult = TMaybe[TPositiveMatch] | |
| TMatcher* = proc(input: var TInput): TMatchResult | |
| Rule* = ref object | |
| matcher: TMatcher | |
| proc newRule (m:TMatcher): Rule = | |
| Rule(matcher:m) | |
| proc `$`* (R: TPositiveMatch): string = | |
| case r.kind | |
| of mUnrefined: | |
| result = r.str | |
| of mJson: | |
| result = $r.j | |
| proc match* (R: Rule; str: string): TMatchResult = | |
| var input = TInput(len: str.len, str: str, pos: 0) | |
| result = r.matcher(input) | |
| template matchf(body:stmt):expr {.immediate.} = | |
| (proc(input: var TInput): TMatchResult = | |
| body) | |
| proc mk_unref (s:string): TMatchResult = | |
| just(TPositiveMatch(kind: mUnrefined, str: s)) | |
| proc mk_j (j:PJsonNode): TMatchResult = | |
| just(TPositiveMatch(kind: mJson, j: j)) | |
| proc isArray (r:TMatchResult): bool = | |
| r.has and r.val.kind == mJson and r.val.j.kind == jArray | |
| proc high* (i: TInput): int = i.str.high | |
| let matchFail = Nothing[TPositiveMatch]() | |
| template any* (iter, name, cond: expr): expr {.immediate.}= | |
| var res{.gensym.} = false | |
| for name in iter: | |
| if cond: | |
| res = true | |
| break | |
| res | |
| proc merge* (J1, J2: PJsonNode) = | |
| if j1.kind == jObject and j2.kind == jObject: | |
| for key,val in items(j2.fields): | |
| j1[key] = val | |
| template testFeature (name;body:stmt):stmt{.immediate.}= | |
| block: | |
| when not defined(failed_tests): | |
| var failed_tests{.inject.}: seq[string] = @[] | |
| template check (xpr): stmt = | |
| discard """ when not defined(failed_tests): | |
| var failed_tests{.inject.}: seq[string] = @[] """ | |
| if not xpr: | |
| failed_tests.add astToStr(xpr) | |
| body | |
| if failed_tests.len > 0: | |
| echo name, " [", failed_tests.len, " Failures]" | |
| for f in failed_tests: | |
| echo " ", f | |
| failed_tests.setLen 0 | |
| else: | |
| echo name, " [Passed]" | |
| proc accumJson (results:varargs[TPositiveMatch]): TMatchResult = | |
| # discard any unrefineds | |
| var res = newSeq[TPositiveMatch](results.len) | |
| res.setLen 0 | |
| for it in results: | |
| if it.kind == mUnrefined: continue | |
| res.add it | |
| assert res.len > 0 | |
| if res.len == 1: | |
| result = just(res[0]) | |
| return | |
| # try to merge json objects | |
| var i = 0 | |
| when defined(Debug): | |
| echo "res.len: ", res.len | |
| var iters = 0 | |
| while i < high(res): | |
| echo iters, " (", i,")" | |
| if iters > 5: break | |
| inc iters | |
| if res[i].j.kind == jObject and res[i+1].j.kind == jObject: | |
| type TKV = tuple[key:string,val:PJsonNode] | |
| proc keys (J: PJsonNode): seq[string] = | |
| j.fields.map(proc(kv:TKV):string = kv.key) | |
| let | |
| r1 = res[i].j | |
| r2 = res[i+1].j | |
| r2_keys = r2.keys | |
| when defined(Debug): | |
| echo "Comparing $# and $#".format(r1,r2) | |
| proc filter (item:string): bool = | |
| result = item in r2_keys | |
| if result and defined(Debug): | |
| echo "Clash: ", item | |
| if any(r1.keys, it, filter(it)): | |
| when defined(debug): | |
| echo "Cannot join them." | |
| inc i, 1 | |
| continue | |
| # merge and delete r2 | |
| r1.merge r2 | |
| res.delete i+1 | |
| inc i, 1 | |
| else: | |
| inc i | |
| if res.len == 1: | |
| return just(res[0]) | |
| else: | |
| # multiple results, return it as a jarray | |
| var arr = newJarray() | |
| for it in res: | |
| arr.add it.j | |
| result = mk_j(arr) | |
| proc currentChar* (input:TInput): char = input.str[input.pos] | |
| proc chrMatcher (chars: seq[char]|set[char]): TMatcher = | |
| return (matchf do: | |
| if input.currentChar in chars: | |
| result = mk_unref($ input.currentChar) | |
| input.pos.inc | |
| ) | |
| proc chr* (chars: varargs[char]): Rule = | |
| newRule(chrMatcher(@chars) ) | |
| proc chr* (chars: set[char]): Rule = | |
| newRule(chrMatcher(chars)) | |
| proc tag* (R:Rule; name:string): Rule = | |
| newRule( | |
| (matchf do: | |
| result = r.matcher(input) | |
| if result.has: | |
| if result.val.kind == mUnrefined: | |
| result = mk_j(%{ name: %result.val.str }) | |
| else: | |
| result = mk_j(%{ name: result.val.j }) | |
| )) | |
| testFeature "tag()": | |
| check((let r = chr('A').tag("x").match("A"); r.has and r.val.kind == mjson and $r.val.j == "{\"x\": \"A\"}")) | |
| proc accumulate (results: varargs[TPositiveMatch]): TMatchResult = | |
| if results.len == 1: return just(results[0]) | |
| if results.any(it, it.kind == mJson): | |
| result = results.accumJson | |
| else: | |
| # all unrefineds. join them. | |
| result = just(TPositiveMatch(kind: mUnrefined, str: "")) | |
| for it in results: | |
| result.val.str.add it.str | |
| proc `&` * (A,B: Rule): Rule = | |
| newRule(matchf do: | |
| let zz = input.pos | |
| let ma = a.matcher(input) | |
| if ma.has: | |
| let mb = b.matcher(input) | |
| if mb.has: | |
| result = accumulate(ma.val, mb.val) | |
| return | |
| input.pos = zz | |
| ) | |
| testFeature "& sequence": | |
| let rule = chr('A') & chr('x') | |
| check rule.match("Ax") | |
| check(not rule.match("xlkj")) | |
| let match_rule = rule.tag("match") | |
| check match_rule.match("Ax").val.j["match"].str == "Ax" | |
| let test5 = (chr('a') & chr('b') & chr('c') & chr('d')).tag("match") | |
| check test5.match("abcd").val.j["match"].str == "abcd" | |
| proc `|`* (A,B: Rule): Rule = | |
| newRule(matchf do: | |
| let zz = input.pos | |
| if (let (has,m) = a.matcher(input); has): | |
| return just(m) | |
| input.pos = zz | |
| if (let (has,m) = b.matcher(input); has): | |
| return just(m) | |
| input.pos = zz | |
| ) | |
| testFeature "OR sequence": | |
| let rule = chr('a') | chr('b') | |
| check rule.match("a") | |
| check rule.match("b") | |
| check(not rule.match("c")) | |
| proc repeat* (R:Rule; min,max:int): Rule = | |
| newRule(matchf do: | |
| var i = 0 | |
| let zz = input.pos | |
| var results: seq[TPositiveMatch] = @[] | |
| while i < max and input.pos < input.len: | |
| if (let (has,res) = r.matcher(input); has): | |
| results.add res | |
| inc i | |
| else: | |
| break | |
| if i < min: | |
| input.pos = zz | |
| result = match_fail | |
| else: | |
| if i > 0: | |
| result = accumulate(results) | |
| else: | |
| result = mk_unref("") | |
| ) | |
| proc repeat* (R:Rule; min:int): Rule = | |
| newRule(matchf do: | |
| var i = 0 | |
| let zz = input.pos | |
| var results: seq[TPositiveMatch] = @[] | |
| while input.pos < input.len: | |
| if (let (has,res) = r.matcher(input); has): | |
| results.add res | |
| inc i | |
| else: | |
| break | |
| if i < min: | |
| input.pos = zz | |
| result = match_fail | |
| else: | |
| if i > 0: | |
| result = accumulate(results) | |
| else: | |
| result = mk_unref("") | |
| ) | |
| testFeature "Repeat": | |
| #echo($chr('x').repeat(1).match("xx")) | |
| #check(not chr('x').repeat(1).match("x").has) | |
| check($chr('x').repeat(1).match("xx") == "xx") | |
| # repetitions are tagged together | |
| let rule = chr('x').repeat(1).tag("x") | |
| echo rule.match("xx") | |
| check($ chr('x').repeat(1).tag("x").match("xx").val.j == """{"x": "xx"}""") | |
| # repetitions are merged as an array | |
| block: | |
| #echo chr('x').tag("x").match("xx").val.j | |
| check($chr('x').tag("x").repeat(1).match("xx").val.j == """[{"x": "x"}, {"x": "x"}]""") | |
| proc `+`* (R:Rule): Rule = | |
| repeat(R, 1) | |
| proc `*`* (R:Rule): Rule = | |
| repeat(R, 0) | |
| proc `?`* (R:Rule): Rule = | |
| repeat(R, 0,1) | |
| block: | |
| const digits = {'0'..'9'} | |
| let | |
| int_lit = chr(digits).repeat(1).tag("x") | |
| space = +chr(' ','\t') | |
| expression = int_lit & *(space & int_lit) | |
| block: | |
| let x = expression.match("1 2") | |
| assert x.has and $x.val.j == """[{"x": "1"}, {"x": "2"}]""" | |
| #echo expression.match("1 2 3") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment