Skip to content

Instantly share code, notes, and snippets.

@lispm
Last active April 20, 2022 23:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lispm/466d560a4e0607b771d1096da2a30897 to your computer and use it in GitHub Desktop.
Save lispm/466d560a4e0607b771d1096da2a30897 to your computer and use it in GitHub Desktop.
;;; -*- Syntax: ANSI-Common-Lisp; Package: (LEXICAL-ANALYZER :USE CL) -*-
;; From: https://rosettacode.org/wiki/Compiler/lexical_analyzer#Common_Lisp
;; minor changes by Rainer Joswig, joswig@lisp.de, 2022
#+genera
(cl:require "GRAY-STREAMS")
(cl:defpackage #:lexical-analyzer
(:use #:cl)
#+(or lispworks genera clisp ecl sbcl)
(:import-from
#+lispworks "STREAM"
#+genera "GRAY-STREAMS"
#+clisp "GRAY"
#+ecl "GRAY"
#+sbcl "SB-GRAY"
"FUNDAMENTAL-CHARACTER-INPUT-STREAM"
"STREAM-READ-CHAR"
"STREAM-UNREAD-CHAR")
(:export #:main))
(cl:in-package #:lexical-analyzer)
(eval-when (:execute :load-toplevel :compile-toplevel)
(defparameter +lex-symbols-package+ (or (find-package :lex-symbols)
(make-package :lex-symbols :use nil))))
(defclass counting-character-input-stream (fundamental-character-input-stream)
((stream :type stream :initarg :stream :reader stream-of)
(line :type fixnum :initform 1 :accessor line-of)
(column :type fixnum :initform 0 :accessor column-of)
(prev-column :type (or null fixnum) :initform nil :accessor prev-column-of))
(:documentation "Character input stream that counts lines and columns."))
(defmethod stream-read-char ((stream counting-character-input-stream))
(let ((ch (read-char (stream-of stream) nil :eof t)))
(case ch
(#\Newline
(incf (line-of stream))
(setf (prev-column-of stream) (column-of stream)
(column-of stream) 0))
(:eof nil)
(t
(incf (column-of stream))))
ch))
(defmethod stream-unread-char ((stream counting-character-input-stream) char)
(unread-char char (stream-of stream))
(case char
(#\Newline
(decf (line-of stream))
(setf (column-of stream) (prev-column-of stream)))
(t
(decf (column-of stream)))))
(defstruct token
(name nil :type symbol)
(value nil :type t)
(line nil :type fixnum)
(column nil :type fixnum))
(defun lexer-error (format-control &rest args)
(apply #'error format-control args))
(defun handle-divide-or-comment (stream char)
(declare (ignore char))
(case (peek-char nil stream t nil t)
(#\* (loop with may-end = nil
initially (read-char stream t nil t)
for ch = (read-char stream t nil t)
until (and may-end (char= ch #\/))
do (setf may-end (char= ch #\*))
finally (return (read stream t nil t))))
(t (make-token :name :|Op-divide| :line (line-of stream) :column (column-of stream)))))
(defun make-constant-handler (token-name)
(lambda (stream char)
(declare (ignore char))
(make-token :name token-name :line (line-of stream) :column (column-of stream))))
(defun make-this-or-that-handler (expect then &optional else)
(lambda (stream char)
(declare (ignore char))
(let ((line (line-of stream))
(column (column-of stream))
(next (peek-char nil stream nil nil t)))
(cond ((and expect (char= next expect))
(read-char stream nil nil t)
(make-token :name then :line line :column column))
(else
(make-token :name else :line line :column column))
(t
(lexer-error "Unrecognized character '~A'" next))))))
(defun identifier? (symbol)
(and (symbolp symbol)
(not (keywordp symbol))
(let ((name (symbol-name symbol)))
(and (find (char name 0) "_abcdefghijklmnopqrstuvwxyz" :test #'char-equal)
(or (< (length name) 2)
(not (find-if-not (lambda (ch)
(find ch "_abcdefghijklmnopqrstuvwxyz0123456789"
:test #'char-equal))
name :start 1)))))))
(defun id->keyword (id line column)
(case id
(lex-symbols::|if| (make-token :name :|Keyword_if| :line line :column column))
(lex-symbols::|else| (make-token :name :|Keyword_else| :line line :column column))
(lex-symbols::|while| (make-token :name :|Keyword_while| :line line :column column))
(lex-symbols::|print| (make-token :name :|Keyword_print| :line line :column column))
(lex-symbols::|putc| (make-token :name :|Keyword_putc| :line line :column column))
(t nil)))
(defun handle-identifier (stream char)
(let ((*readtable* (copy-readtable)))
(set-syntax-from-char char #\z)
(let ((line (line-of stream))
(column (column-of stream)))
(unread-char char stream)
(let ((obj (read stream t nil t)))
(if (identifier? obj)
(or (id->keyword obj line column)
(make-token :name :|Identifier| :value obj :line line :column column))
(lexer-error "Invalid identifier name: ~A" obj))))))
(defun handle-integer (stream char)
(let ((*readtable* (copy-readtable nil)))
; (set-syntax-from-char char #\1)
(let ((line (line-of stream))
(column (column-of stream)))
(unread-char char stream)
(let ((obj (read stream t nil t)))
(if (integerp obj)
(make-token :name :|Integer| :value obj :line line :column column)
(lexer-error "Invalid integer: ~A" obj))))))
(defun handle-char-literal (stream char)
(declare (ignore char))
(let* ((line (line-of stream))
(column (column-of stream))
(ch (read-char stream t nil t))
(parsed (case ch
(#\' (lexer-error "Empty character constant"))
(#\Newline (lexer-error "New line in character literal"))
(#\\ (let ((next-ch (read-char stream t nil t)))
(case next-ch
(#\n #\Newline)
(#\\ #\\)
(t (lexer-error "Unknown escape sequence: \\~A" next-ch)))))
(t ch))))
(if (char= #\' (read-char stream t nil t))
(make-token :name :|Integer| :value (char-code parsed) :line line :column column)
(lexer-error "Only one character is allowed in character literal"))))
(defun handle-string (stream char)
(declare (ignore char))
(loop with result = (make-array 0 :element-type 'character :adjustable t :fill-pointer t)
with line = (line-of stream)
with column = (column-of stream)
for ch = (read-char stream t nil t)
until (char= ch #\")
do (setf ch (case ch
(#\Newline (lexer-error "New line in string"))
(#\\ (let ((next-ch (read-char stream t nil t)))
(case next-ch
(#\n #\Newline)
(#\\ #\\)
(t (lexer-error "Unknown escape sequence: \\~A" next-ch)))))
(t ch)))
(vector-push-extend ch result)
finally (return (make-token :name :|String| :value result :line line :column column))))
(defun make-lexer-readtable ()
(let ((*readtable* (copy-readtable nil)))
(setf (readtable-case *readtable*) :preserve)
(set-syntax-from-char #\\ #\z)
(set-syntax-from-char #\# #\z)
(set-syntax-from-char #\` #\z)
;; operators
(set-macro-character #\* (make-constant-handler :|Op_multiply|))
(set-macro-character #\/ #'handle-divide-or-comment)
(set-macro-character #\% (make-constant-handler :|Op_mod|))
(set-macro-character #\+ (make-constant-handler :|Op_add|))
(set-macro-character #\- (make-constant-handler :|Op_subtract|))
(set-macro-character #\< (make-this-or-that-handler #\= :|Op_lessequal| :|Op_less|))
(set-macro-character #\> (make-this-or-that-handler #\= :|Op_greaterequal| :|Op_greater|))
(set-macro-character #\= (make-this-or-that-handler #\= :|Op_equal| :|Op_assign|))
(set-macro-character #\! (make-this-or-that-handler #\= :|Op_notequal| :|Op_not|))
(set-macro-character #\& (make-this-or-that-handler #\& :|Op_and|))
(set-macro-character #\| (make-this-or-that-handler #\| :|Op_or|))
;; symbols
(set-macro-character #\( (make-constant-handler :|LeftParen|))
(set-macro-character #\) (make-constant-handler :|RightParen|))
(set-macro-character #\{ (make-constant-handler :|LeftBrace|))
(set-macro-character #\} (make-constant-handler :|RightBrace|))
(set-macro-character #\; (make-constant-handler :|Semicolon|))
(set-macro-character #\, (make-constant-handler :|Comma|))
;; identifiers & keywords
(set-macro-character #\_ #'handle-identifier t)
(loop for ch across "abcdefghijklmnopqrstuvwxyz"
do (set-macro-character ch #'handle-identifier t))
(loop for ch across "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
do (set-macro-character ch #'handle-identifier t))
;; integers
(loop for ch across "0123456789"
do (set-macro-character ch #'handle-integer t))
(set-macro-character #\' #'handle-char-literal)
;; strings
(set-macro-character #\" #'handle-string)
*readtable*))
(defun lex (stream)
(let ((*readtable* (make-lexer-readtable))
(*package* +lex-symbols-package+))
(fresh-line)
(with-open-stream (counting-stream (make-instance 'counting-character-input-stream :stream stream))
(loop with eof = (gensym)
for token = (read counting-stream nil eof)
until (eq token eof)
do (format t (case (token-name token)
(:|Identifier|
"~5D ~5D ~15A~@[ ~A~]~%")
(otherwise
"~5D ~5D ~15A~@[ ~S~]~%"))
(token-line token) (token-column token) (token-name token)
(token-value token)))
(format t "~5D ~5D ~15A~%"
(line-of counting-stream)
(1+ (column-of counting-stream))
:|End_of_input|))))
#||
(defun main ()
(lex *standard-input*))
(defun lexical-analyzer::example ()
(with-open-file (s #-genera "/Users/joswig/Lisp/lexical-analyzer-test.text"
#+genera "RJMACBOOKPRO14:/Users/joswig/Lisp/lexical-analyzer-test.text")
(lexical-analyzer::lex s)))
||#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment