Canonical S-expression parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(define (read-csexp) | |
(letrec | |
((left-paren (char->integer #\()) | |
(right-paren (char->integer #\))) | |
(zero (char->integer #\0)) | |
(nine (char->integer #\9)) | |
(colon (char->integer #\:)) | |
(in (current-input-port)) | |
(read-atom | |
(λ (byte digits) | |
(cond | |
;; Read bytes into a list until the colon is reached. | |
((and (>= byte zero) | |
(<= byte nine)) | |
(read-atom (read-byte in) (cons byte digits))) | |
;; Convert the byte list into a number and read as many bytes. | |
((= byte colon) | |
(read-bytes | |
(string->number (list->string (reverse (map integer->char digits)))) | |
in)) | |
(else | |
(error 'read-csexp "unexpected char in length string: 0x~x" byte))))) | |
(read-step | |
(λ (depth atoms) | |
(let ((byte (read-byte in))) | |
(cond | |
;; New list. If the depth is zero, we are on top level and this list | |
;; is going to be our only element, so we can't read beyond its | |
;; contents. Otherwise, we can continue reading. | |
((= byte left-paren) | |
(let ((inner-list (read-step (add1 depth) '()))) | |
(if (zero? depth) | |
inner-list | |
(read-step depth (cons inner-list atoms))))) | |
;; This list has been read. Return the contents in a list. | |
;; Ignore the lone right paren that might appear at top level. | |
((and (= byte right-paren) | |
(not (zero? depth))) | |
(reverse atoms)) | |
;; An atom. Return directly if at top level. | |
((and (>= byte zero) | |
(<= byte nine)) | |
(let ((atom (read-atom byte '()))) | |
(if (zero? depth) | |
atom | |
(read-step depth (cons atom atoms))))) | |
(else | |
(error 'read-csexp | |
"unexpected input: 0x~x at depth ~s" byte depth))))))) | |
(read-step 0 '()))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment