Last active
January 6, 2017 14:19
-
-
Save ceving/b719e72acf25e5e8e1b9858ad50432fa to your computer and use it in GitHub Desktop.
Basic parsing combinators
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; The following code uses two function types: `parsers` and `continuations`. The `parser` functions | |
;; require three arguments: `input`, `continue` and `backtrack`. The `continuation` functions | |
;; require just one argument `input`. The argument `input` is the list of input tokens to parse. | |
;; The arguments `continue` and `backtrack` are continuations. The continuation `continue` is | |
;; followed, when the tried parsing solution up the to current point is correct, which means that it | |
;; is not sure but it might lead to a overall success. And `backtrack` is used, if a missmatch has | |
;; been found, which means that an alternative parsing solution must be tried. The process is | |
;; similar to finding the way out of a maze. | |
;; Throw an error, if the assertion fails. | |
(define-syntax assert | |
(syntax-rules (=>) | |
((assert x => y) | |
(if (not (equal? x y)) | |
(error "Assertion failed."))))) | |
;; This are the most basic parsers, which always succeed and always fail. | |
(define success (lambda _ #t)) | |
(define failure (lambda _ #f)) | |
;; This are debugging parsers, which display the remaining input. | |
(define success* (lambda (input . _) (write input) (newline) #t)) | |
(define failure* (lambda (input . _) (write input) (newline) #f)) | |
;; Make a character tester function. | |
(define (char= character) | |
(lambda (input) | |
(and (pair? input) | |
(char=? character (car input))))) | |
;; Make parsers, which match a predicate. | |
(define (const match?) | |
(lambda (input continue backtrack) | |
(if (match? input) | |
(continue (cdr input)) | |
(backtrack input)))) | |
;; Make some character parsers. | |
(define a? (const (char= #\a))) | |
(define b? (const (char= #\b))) | |
;; Test some characters. | |
(assert (a? '(#\a) success failure) => #t) | |
(assert (a? '(#\b) success* failure*) => #f) | |
(assert (b? '(#\a) success failure) => #f) | |
(assert (b? '(#\b) success failure) => #t) | |
;; Make a seqence of two parsers. | |
(define (sequence p q) | |
(lambda (input-p continue backtrack) | |
(p input-p | |
(lambda (input-q) | |
(q input-q | |
continue | |
(lambda _ (backtrack input-p)))) ; When backtrack of q gets called, this means, that | |
; q has failed. If q has failed the sequence of p | |
; and q also fails and backtrack must continue with | |
; the input of p throwing away any fallacious | |
; success of p. | |
backtrack))) | |
;; Make some sequence parsers. | |
(define a+a? (sequence a? a?)) | |
(define a+b? (sequence a? b?)) | |
;; Test the sequences. | |
(assert (a+a? '(#\a #\a) success* failure*) => #t) | |
(assert (a+a? '(#\a #\b) success* failure*) => #f) | |
(assert (a+b? '(#\a #\b) success* failure*) => #t) | |
;; Make an alternation of two parsers. | |
(define (alternation p q) | |
(lambda (input continue backtrack) | |
(p input | |
continue | |
(lambda _ | |
(q input | |
continue | |
backtrack))))) | |
;; Make a single alternation and test it. | |
(define a-b? (alternation a? b?)) | |
(assert (a-b? '(#\a) success* failure*) => #t) | |
(assert (a-b? '(#\b) success* failure*) => #t) | |
(assert (a-b? '(#\c) success* failure*) => #f) | |
;; Make a sequence containing an alternation and test it. | |
(define a+a-b? (sequence a? (alternation a? b?))) | |
(assert (a+a-b? '(#\a #\a) success* failure*) => #t) | |
(assert (a+a-b? '(#\a #\b) success* failure*) => #t) | |
(assert (a+a-b? '(#\b #\a) success* failure*) => #f) | |
(assert (a+a-b? '(#\a #\c) success* failure*) => #f) | |
;; Make an alternation of two sequences sharing the same beginning and test it. | |
(define a+a-a+b? (alternation a+a? a+b?)) | |
(assert (a+a-a+b? '(#\a #\b) success* failure*) => #t) | |
(assert (a+a-a+b? '(#\a #\c) success* failure*) => #f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment