Skip to content

Instantly share code, notes, and snippets.

@Lovesan
Created November 2, 2011 10:39
Show Gist options
  • Save Lovesan/1333362 to your computer and use it in GitHub Desktop.
Save Lovesan/1333362 to your computer and use it in GitHub Desktop.
simple parser combinator library
(in-package :cl-user)
(defpackage #:peg-combinators
(:use #:cl)
(:nicknames #:pegc)
(:export
#:defparser
#:defrule
#:result
#:result-p
#:result-value
#:result-position
#:result-error
#:parsing-error
#:parsing-error-p
#:parsing-error-position
#:parsing-error-error
#:parsing-error-message
#:select-error
#:char-class
#:any-char
#:literal
#:seq
#:choice
#:optional
#:zero-or-more
#:one-or-more
#:not-p
#:and-p
#:enable-pegc-syntax
#:disable-pegc-syntax))
(in-package #:peg-combinators)
(deftype index () '(integer 0 #.(1- array-total-size-limit)))
(defmacro with-gensyms ((&rest symbols) &body body)
`(let ,(mapcar (lambda (symbol)
`(,symbol (gensym ,(symbol-name symbol))))
symbols)
,@body))
(defmacro defparser (name (&rest args) (input start end &rest other)
&body body)
`(progn
(defmacro ,name ,args
(with-gensyms (,input ,start ,end ,@other)
`(lambda (,,input &key ((:start ,,start) 0) ((:end ,,end) nil))
(declare (type string ,,input)
(type index ,,start)
(type (or null index) ,,end))
(when (null ,,end) (setf ,,end (length ,,input)))
,,@body)))
,(when (null args)
`(define-symbol-macro ,name (,name)))
',name))
(defstruct (result
(:constructor result (position value &optional error)))
(value nil :read-only t)
(position 0 :read-only t)
(error nil :read-only t))
(defstruct (parsing-error
(:include result)
(:constructor parsing-error (position message &optional error)))
(message "Parsing error" :read-only t))
(defgeneric select-error (e1 e2)
(:method ((e1 null) (e2 null))
e1)
(:method ((e1 null) (e2 result))
(result-error e2))
(:method ((e1 null) (e2 parsing-error))
e2)
(:method ((e1 result) (e2 result))
(select-error (result-error e1) (result-error e2)))
(:method ((e1 result) (e2 parsing-error))
(select-error (result-error e1) e2))
(:method ((e1 result) (e2 null))
(result-error e1))
(:method ((e1 parsing-error) (e2 parsing-error))
(if (<= (result-position e1) (result-position e2))
e2
e1))
(:method ((e1 parsing-error) (e2 result))
(select-error e1 (result-error e2)))
(:method ((e1 parsing-error) (e2 null))
e1))
(defparser any-char () (input start end)
`(if (< ,start ,end)
(result (1+ ,start) (char ,input ,start))
(parsing-error ,start "Character expected.")))
(defparser char-class (&rest cases) (input start end char)
`(if (and (> (- ,end ,start) 0)
(let ((,char (char ,input ,start)))
,(intern-char-cases char cases)))
(result (1+ ,start) (char ,input ,start))
(parsing-error ,start ,(make-char-error-message cases))))
(eval-when (:compile-toplevel :load-toplevel :execute)
(defun intern-char-cases (char-var cases)
`(or ,@(mapcar (lambda (case)
(etypecase case
(character
`(char= ,char-var ,case))
((cons character (cons character null))
`(char<= ,(first case) ,char-var ,(second case)))))
cases)))
(defun make-char-error-message (cases)
(format nil "Expected character of class [~{~a~}]."
(mapcar (lambda (case)
(if (characterp case)
(escape-char case)
(concatenate 'string
(escape-char (first case))
"-"
(escape-char (second case)))))
cases)))
(defun escape-char (char)
(case char
(#\Tab "\\t")
(#\Return "\\r")
(#\Newline "\\n")
(#\[ "\\[")
(#\] "\\]")
(T (string char)))))
(defparser literal (string-designator) (input start end end*)
(let* ((string (string string-designator))
(length (length string)))
`(let ((,end* (+ ,start ,length)))
(if (or (< ,end ,end*)
(string/= ,input ,string
:start1 ,start
:end1 ,end*))
(parsing-error ,start ,(format nil "~s expected." string))
(result ,end* (copy-seq ,string))))))
(defparser seq (&rest expressions)
(input start end block result error values position)
`(block ,block
(let (,result ,error ,values (,position ,start))
(declare (ignorable ,result ,error))
,@(loop :for expr :in expressions :collect
`(progn
(setf ,result (funcall ,expr ,input
:start ,position
:end ,end)
,error (select-error ,error ,result)
,position (result-position ,result))
(when (parsing-error-p ,result)
(return-from ,block ,error))
(push (result-value ,result) ,values)))
(result ,position (nreverse ,values) ,error))))
(defparser choice (first &rest rest)
(input start end block result error)
`(block ,block
(let* (,result ,error)
,@(loop :for expr :in (cons first rest) :collect
`(progn
(setf ,result (funcall ,expr ,input
:start ,start
:end ,end)
,error (select-error ,error ,result))
(unless (parsing-error-p ,result)
(return-from ,block (result (result-position ,result)
(result-value ,result)
,error)))))
,error)))
(defmacro optional (rule)
`(choice ,rule (seq)))
(defparser zero-or-more (rule)
(input start end result error values position)
`(let (,result ,error ,values (,position ,start))
(loop (setf ,result (funcall ,rule ,input
:start ,position
:end ,end)
,error (select-error ,error ,result))
(if (parsing-error-p ,result)
(return)
(setf ,position (result-position ,result)
,values (cons (result-value ,result) ,values))))
(result ,position (nreverse ,values) ,error)))
(defparser one-or-more (rule) (input start end result func)
`(let* ((,func ,rule)
(,result (funcall (seq ,func (zero-or-more ,func)) ,input
:start ,start
:end ,end)))
(if (parsing-error-p ,result)
,result
(result (result-position ,result)
(apply #'cons (result-value ,result))
(result-error ,result)))))
(defparser not-p (rule) (input start end result)
`(let ((,result (funcall ,rule ,input
:start ,start
:end ,end)))
(if (parsing-error-p ,result)
(result ,start nil (result-error ,result))
(parsing-error ,start
,(format nil "Unexpected success of ~s" rule)
(result-error ,result)))))
(defparser and-p (rule) (input start end result)
`(let ((,result (funcall ,rule ,input
:start ,start
:end ,end)))
(if (parsing-error-p ,result)
,result
(result ,start nil (result-error ,result)))))
(defmacro defrule (name rule &optional (value-transformer
'(function value-identity)))
(with-gensyms (input start end result)
`(progn (defun ,name (,input &key ((:start ,start) 0) ((:end ,end) nil))
(declare (type string ,input)
(type index ,start)
(type (or null index) ,end))
(when (null ,end) (setf ,end (length ,input)))
(let ((,result (funcall ,rule ,input :start ,start :end ,end)))
(if (parsing-error-p ,result)
,result
(result (result-position ,result)
(funcall ,value-transformer
(result-value ,result)
,input
,start
,end)
(result-error ,result)))))
(define-symbol-macro ,name #',name))))
(defun value-identity (value input start end)
(declare (ignore input start end))
value)
(in-package #:cl-user)
(defpackage #:peg-combinators.numbers
(:use #:cl #:pegc)
(:nicknames #:pegc.numbers)
(:export #:unsigned-int
#:signed-int
#:rationum
#:flonum))
(in-package #:pegc.numbers)
(defrule unsigned-int
(one-or-more (char-class (#\0 #\9)))
#'intern-unsigned-int)
(defun intern-unsigned-int (value input start end)
(declare (ignore input start end))
(parse-integer (coerce value 'string)))
(defrule signed-int
(seq (optional (char-class #\+ #\-))
unsigned-int)
#'intern-signed-int)
(defun intern-signed-int (value input start end)
(declare (ignore input start end))
(destructuring-bind
(sign value) value
(* value (if (eql sign #\-) -1 1))))
(defrule rationum
(seq signed-int (char-class #\/) unsigned-int)
#'intern-rationum)
(defun intern-rationum (value input start end)
(declare (ignore input start end))
(destructuring-bind
(numerator slash denominator) value
(declare (ignore slash))
(/ numerator denominator)))
(defrule expt-part
(seq (char-class #\s #\S #\e #\E #\d #\D #\l #\L)
signed-int)
#'intern-expt-part)
(defun intern-expt-part (value input start end)
(declare (ignore input start end))
(float (expt 10 (second value))
(ecase (character (first value))
((#\s #\S) 1.0s0)
((#\e #\E) 1.0e0)
((#\d #\D) 1.0d0)
((#\l #\L) 1.0l0))))
(defrule flonum
(choice (seq (optional signed-int)
(char-class #\.)
(one-or-more (char-class (#\0 #\9)))
(optional expt-part))
(seq signed-int
(char-class #\.)
(zero-or-more (char-class (#\0 #\9)))
(optional expt-part)))
#'intern-flonum)
(defun intern-flonum (value input start end)
(declare (ignore input start end))
(destructuring-bind
(int-part dot frac-part expt-part) value
(declare (ignore dot))
(let ((frac-part (coerce frac-part 'string)))
(* (+ (or int-part 0)
(/ (if (string= "" frac-part)
0
(parse-integer frac-part))
(expt 10 (length frac-part))))
(or expt-part 1.0e0)))))
(in-package #:cl-user)
(defpackage #:peg-combinators.syntax
(:use #:cl #:pegc)
(:nicknames #:pegc.syntax))
(in-package #:pegc.syntax)
#|
Expression <- Spacing Sequence (SLASH Sequence)*
Sequence <- Prefix*
Prefix <- (AND / NOT)? Suffix
Suffix <- Primary (QUESTION / STAR / PLUS)?
Primary <- LispForm
/ Identifier !LEFTARROW
/ OPEN Expression CLOSE
/ Literal / Class / DOT
# Lexical syntax
Identifier <- IdentStart IdentCont* Spacing
IdentStart <- [a-zA-Z_]
IdentCont <- IdentStart / [0-9]
LispForm <- [`] (![`] Char)* [`] Spacing
Literal <- [’] (![’] Char)* [’] Spacing
/ ["] (!["] Char)* ["] Spacing
Class <- ’[’ (!’]’ Range)* ’]’ Spacing
Range <- Char ’-’ Char / Char
Char <- ’\\’ [nrt`’"\[\]\\]
/ ’\\’ [0-2][0-7][0-7]
/ ’\\’ [0-7][0-7]?
/ !’\\’ .
LEFTARROW <- ’<-’ Spacing
SLASH <- ’/’ Spacing
AND <- ’&’ Spacing
NOT <- ’!’ Spacing
QUESTION <- ’?’ Spacing
STAR <- ’*’ Spacing
PLUS <- ’+’ Spacing
LPAREN <- ’(’ Spacing
RPAREN <- ’)’ Spacing
DOT <- ’.’ Spacing
Spacing <- (Space / Comment)*
Comment <- ’#’ (!EndOfLine .)* EndOfLine
Spaces <- ’ ’ / ’\t’ / EndOfLine
EndOfLine <- ’\r\n’ / ’\n’ / ’\r’
EndOfFile <- !.
|#
(defrule eof
(not-p any-char))
(defrule eol
(choice (literal #.(coerce '(#\Return #\Newline) 'string))
(char-class #\Newline #\Return)))
(defrule spaces
(choice (char-class #\Space #\Tab)
eol))
(defrule comment
(seq (char-class #\#)
(zero-or-more (seq (not-p eol) any-char))
eol))
(defrule spacing
(zero-or-more (choice spaces comment)))
(defrule dot
(seq (literal #\.) spacing)
(lambda (value input start end)
(declare (ignore value input start end))
'any-char))
(defrule lparen
(seq (literal #\() spacing))
(defrule rparen
(seq (literal #\)) spacing))
(defrule question
(seq (literal #\?) spacing)
(lambda (value input start end)
(declare (ignore value input start end))
#\?))
(defrule star
(seq (literal #\*) spacing)
(lambda (value input start end)
(declare (ignore value input start end))
#\*))
(defrule plus
(seq (literal #\+) spacing)
(lambda (value input start end)
(declare (ignore value input start end))
#\+))
(defrule and-literal
(seq (literal #\&) spacing)
(lambda (value input start end)
(declare (ignore value input start end))
#\&))
(defrule not-literal
(seq (literal #\!) spacing)
(lambda (value input start end)
(declare (ignore value input start end))
#\!))
(defrule slash
(seq (literal #\/) spacing))
(defrule left-arrow
(seq (literal "<-") spacing))
(defrule hexdigit
(char-class (#\0 #\9) (#\A #\F) (#\a #\f)))
(defrule peg-char
(choice (seq (char-class #\\)
(char-class #\n #\r #\t #\` #\' #\" #\[ #\] #\\))
(seq (char-class #\\)
(char-class #\u)
(one-or-more hexdigit))
(seq (not-p (char-class #\\))
any-char))
#'intern-peg-char)
(defun intern-peg-char (value input start end)
(declare (ignore input start end))
(if (eql #\\ (first value))
(case (second value)
(#\n #\Newline)
(#\r #\Return)
(#\t #\Tab)
(#\u (let ((code (parse-integer (coerce (third value) 'string)
:radix 16)))
(if (< code char-code-limit)
(code-char code)
(error "\\u~{~a~} is bigger than CHAR-CODE-LIMIT"
(third value)))))
(T (second value)))
(second value)))
(defrule peg-range
(choice (seq peg-char (char-class #\-) peg-char)
peg-char)
#'intern-peg-range)
(defun intern-peg-range (value input start end)
(declare (ignore input start end))
(if (characterp value)
value
(list (first value) (third value))))
(defrule peg-class
(seq (char-class #\[)
(zero-or-more (seq (not-p (char-class #\])) peg-range))
(char-class #\])
spacing)
#'intern-peg-class)
(defun intern-peg-class (value input start end)
(declare (ignore input start end))
`(char-class ,@(mapcar #'second
(second value))))
(defrule peg-literal
(choice (seq (char-class #\')
(zero-or-more (seq (not-p (char-class #\')) peg-char))
(char-class #\')
spacing)
(seq (char-class #\")
(zero-or-more (seq (not-p (char-class #\")) peg-char))
(char-class #\")
spacing))
#'intern-peg-literal)
(defun intern-peg-literal (value input start end)
(declare (ignore input start end))
`(literal ,(coerce (mapcar #'second (second value)) 'string)))
(defrule lisp-form
(seq (char-class #\`)
(zero-or-more (seq (not-p (char-class #\`)) peg-char))
(char-class #\`)
spacing)
#'intern-lisp-form)
(defun intern-lisp-form (value input start end)
(declare (ignore value))
(with-input-from-string (in (subseq input (1+ start) (1- end)))
(loop :with eof = (gensym)
:for form = (read in nil eof)
:until (eq form eof) :collect form :into forms
:finally (return (if (endp forms)
'(seq)
`(progn ,@forms))))))
(defrule ident-start
(char-class (#\a #\z) (#\A #\Z) #\-))
(defrule ident-cont
(choice ident-start (char-class (#\0 #\9))))
(defrule identifier
(seq ident-start (zero-or-more ident-cont) spacing)
#'intern-identifier)
(defun intern-identifier (value input start end)
(declare (ignore input start end))
(intern (format nil "~a~{~a~}" (first value) (second value))))
(defrule expr-in-parens
(seq lparen #'expression rparen)
(lambda (value input start end)
(declare (ignore input start end))
(second value)))
(defrule id-not-defn
(seq identifier (not-p left-arrow))
(lambda (value input start end)
(declare (ignore input start end))
(first value)))
(defrule primary
(choice lisp-form
id-not-defn
expr-in-parens
peg-literal
peg-class
dot))
(defrule suffix
(seq primary (optional (choice question star plus)))
#'intern-suffix)
(defun intern-suffix (value input start end)
(declare (ignore input start end))
(case (second value)
(#\? `(optional ,(first value)))
(#\* `(zero-or-more ,(first value)))
(#\+ `(one-or-more ,(first value)))
(T (first value))))
(defrule prefix
(seq (optional (choice and-literal not-literal)) suffix)
#'intern-prefix)
(defun intern-prefix (value input start end)
(declare (ignore input start end))
(case (first value)
(#\& `(and-p ,(second value)))
(#\! `(not-p ,(second value)))
(T (second value))))
(defrule expr-seq
(zero-or-more prefix)
(lambda (value input start end)
(declare (ignore input start end))
(if (= 1 (length value))
(first value)
`(seq ,@value))))
(defrule expression
(seq expr-seq (zero-or-more (seq slash expr-seq)))
(lambda (value input start end)
(declare (ignore input start end))
(if (endp (second value))
(first value)
`(choice ,(first value)
,@(mapcar #'second (second value))))))
(defrule rule
(seq spacing expression eof)
(lambda (value input start end)
(declare (ignore input start end))
(second value)))
(defun read-lbracket (stream char)
(declare (ignore char))
(let* ((string (with-output-to-string (out)
(loop :with level = 1
:for c = (read-char stream)
:do (case c
(#\[ (incf level) (write-char c out))
(#\] (unless (zerop (decf level))
(write-char c out)))
(T (write-char c out)))
:while (> level 0))))
(result (funcall rule string)))
(if (parsing-error-p result)
(error "Parsing error at ~a. Position ~a after `[': ~a"
stream
(result-position result)
(parsing-error-message result))
(let ((input (gensym "INPUT"))
(start (gensym "START"))
(end (gensym "END")))
`(lambda (,input &key ((:start ,start) 0) ((:end ,end) nil))
(funcall ,(result-value result)
,input
:start ,start
:end ,end))))))
(defun read-rbracket (stream char)
(declare (ignore stream char))
(error "Unmatched close bracket."))
(defvar *pegc-readtables* '())
(defun %enable-pegc-syntax ()
(push *readtable* *pegc-readtables*)
(setf *readtable* (copy-readtable))
(set-macro-character #\[ #'read-lbracket)
(set-macro-character #\] #'read-rbracket)
(values))
(defun %disable-pegc-syntax ()
(setf *readtable* (if (null *pegc-readtables*)
(copy-readtable nil)
(pop *pegc-readtables*)))
(values))
(defmacro pegc:enable-pegc-syntax ()
`(eval-when (:compile-toplevel :load-toplevel :execute)
(%enable-pegc-syntax)))
(defmacro pegc:disable-pegc-syntax ()
`(eval-when (:compile-toplevel :load-toplevel :execute)
(%disable-pegc-syntax)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment