Last active October 14, 2022
Canonical S-expression parser
(define (read-csexp)
((left-paren (char->integer #\())
(right-paren (char->integer #\)))
(zero (char->integer #\0))
(nine (char->integer #\9))
(colon (char->integer #\:))
(in (current-input-port))
(λ (byte digits)
;; Read bytes into a list until the colon is reached.
((and (>= byte zero)
(<= byte nine))
(read-atom (read-byte in) (cons byte digits)))
;; Convert the byte list into a number and read as many bytes.
((= byte colon)
(string->number (list->string (reverse (map integer->char digits))))
(error 'read-csexp "unexpected char in length string: 0x~x" byte)))))
(λ (depth atoms)
(let ((byte (read-byte in)))
;; New list. If the depth is zero, we are on top level and this list
;; is going to be our only element, so we can't read beyond its
;; contents. Otherwise, we can continue reading.
((= byte left-paren)
(let ((inner-list (read-step (add1 depth) '())))
(if (zero? depth)
(read-step depth (cons inner-list atoms)))))
;; This list has been read. Return the contents in a list.
;; Ignore the lone right paren that might appear at top level.
((and (= byte right-paren)
(not (zero? depth)))
(reverse atoms))
;; An atom. Return directly if at top level.
((and (>= byte zero)
(<= byte nine))
(let ((atom (read-atom byte '())))
(if (zero? depth)
(read-step depth (cons atom atoms)))))
(error 'read-csexp
"unexpected input: 0x~x at depth ~s" byte depth)))))))
(read-step 0 '())))
