Last active
January 26, 2017 08:13
-
-
Save takoeight0821/4a8f3ef3a4fe7626d96baf18dbbe8c9a to your computer and use it in GitHub Desktop.
俺言語
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ql:quickload '(:trivia.level2 :serapeum :trivia.ppcre :alexandria) :silent t) | |
(in-package :cl-user) | |
(defpackage :malgo | |
(:use :cl :trivia.level2 :trivia.ppcre) | |
(:export :parse)) | |
(in-package :malgo) | |
(alexandria:define-constant +number-re+ "^(\\+|\\-|)([0-9]+(?:\\.[0-9]+|))" :test #'string=) | |
(alexandria:define-constant +number-without-sign-re+ "^([0-9]+(?:\\.[0-9]+|))" :test #'string=) | |
(alexandria:define-constant +float-re+ "^([0-9]+\\.[0-9]+)" :test #'string=) | |
(alexandria:define-constant +int-re+ "^([0-9]+)" :test #'string=) | |
(alexandria:define-constant +ident-re+ "^([a-zA-Z_][a-zA-Z_0-9]*)" :test #'string=) | |
(alexandria:define-constant +op-re+ "^(=|,|<=|>=|<|>|\\+|\\-|\\*|{|}|/|\\(|\\))" :test #'string=) | |
(alexandria:define-constant +quote-re+ "^\"" :test #'string=) | |
(defun %tokenize (src tokens) | |
(if (= 0 (length src)) | |
(nreverse tokens) | |
(match src | |
((ppcre "^\\s+") | |
(%tokenize (string-trim '(#\Space #\Tab #\Newline) src) | |
tokens)) | |
((ppcre #.+quote-re+) | |
(multiple-value-bind (str len) (read-from-string src) | |
(%tokenize (subseq src len) | |
(cons (list :string str) tokens)))) | |
((ppcre #.+float-re+ matched) | |
(%tokenize (subseq src (length matched)) | |
(cons (list :float (serapeum:parse-float matched)) tokens))) | |
((ppcre #.+int-re+ matched) | |
(%tokenize (subseq src (length matched)) | |
(cons (list :int (serapeum:parse-number matched)) tokens))) | |
((ppcre "^let") | |
(%tokenize (subseq src 3) | |
(cons 'let tokens))) | |
((ppcre "^fn") | |
(%tokenize (subseq src 2) | |
(cons 'fn tokens))) | |
((ppcre #.+ident-re+ matched) | |
(%tokenize (subseq src (length matched)) | |
(cons matched tokens))) | |
((ppcre #.+op-re+ matched) | |
(let ((rest (subseq src (length matched)))) | |
(if (and (member matched (list "+" "-") :test #'string=) | |
(stringp (car tokens)) | |
(not (ppcre:scan +ident-re+ (car tokens))) | |
(ppcre:scan +number-without-sign-re+ rest)) | |
(%tokenize (subseq rest (length (ppcre:scan-to-strings +number-without-sign-re+ rest))) | |
(cons | |
(let ((number-str | |
(concatenate 'string | |
matched | |
(ppcre:scan-to-strings | |
+number-without-sign-re+ rest)))) | |
(if (ppcre:scan +float-re+ rest) | |
(list :float (serapeum:parse-float number-str)) | |
(list :int (serapeum:parse-number number-str)))) | |
tokens)) | |
(%tokenize rest (cons matched tokens))))) | |
((ppcre "^;") | |
(%tokenize (subseq src 1) | |
(cons ";" tokens))) | |
(_ (error "Tokenize error: ~S ~S." src tokens))))) | |
(defun tokenize (src) | |
(%tokenize src nil)) | |
(defclass <lexer> () | |
((token :accessor current-token :initform nil) | |
(rest-tokens :accessor rest-tokens :initform nil) | |
(tokens :accessor tokens :initarg :tokens))) | |
(defmethod initialize-instance :after ((lexer <lexer>) &rest initargs) | |
(declare (ignore initargs)) | |
(setf (slot-value lexer 'token) (first (slot-value lexer 'tokens)) | |
(slot-value lexer 'rest-tokens) (rest (slot-value lexer 'tokens tokens))) | |
lexer) | |
(defmethod print-object ((o <lexer>) s) | |
(format s "#<<LEXER> ~S ~S>" (slot-value o 'token) (slot-value o 'rest-tokens))) | |
(defgeneric next-token (lexer)) | |
(defmethod next-token ((lexer <lexer>)) | |
(if (finishedp lexer) | |
nil | |
(let ((result (slot-value lexer 'token))) | |
(if (slot-value lexer 'rest-tokens) | |
(setf (slot-value lexer 'token) (first (slot-value lexer 'rest-tokens)) | |
(slot-value lexer 'rest-tokens) (rest (slot-value lexer 'rest-tokens)) | |
(slot-value lexer 'tokens) (rest (slot-value lexer 'tokens))) | |
(setf (slot-value lexer 'token) nil)) | |
result))) | |
(defgeneric finishedp (object)) | |
(defmethod finishedp ((lexer <lexer>)) | |
(and (null (slot-value lexer 'token)) | |
(null (slot-value lexer 'rest-tokens)))) | |
(defgeneric reset (object objects)) | |
(defmethod reset ((lexer <lexer>) tokens) | |
(if (not (listp tokens)) | |
(setf tokens (list tokens))) | |
(setf (slot-value lexer 'tokens) tokens | |
(slot-value lexer 'token) (car tokens) | |
(slot-value lexer 'rest-tokens) (cdr tokens)) | |
lexer) | |
(defun do-if (pred func object) | |
(if (funcall pred object) | |
(funcall func object) | |
object)) | |
(defun maybe-cons (a b) | |
(if (and (listp a) (null b)) | |
a | |
(cons a b))) | |
(defun maybe-parse (parser object) | |
(if (consp object) | |
(funcall parser (make-instance '<lexer> | |
:tokens object) nil) | |
object)) | |
(defun singlep (object) | |
(and (consp object) | |
(null (cdr object)))) | |
(defun maybe-car (cons?) | |
(if (singlep cons?) | |
(car cons?) | |
cons?)) | |
(defun maybe-nreverse (list?) | |
(if (listp list?) | |
(nreverse list?) | |
list?)) | |
;;TODO: パターンマッチで書き直してどちらが速いか計測 | |
(defun %parse-parens (lexer parsed) | |
(if (finishedp lexer) | |
(do-if #'listp #'nreverse parsed) | |
(cond ((equal ")" (current-token lexer)) | |
(next-token lexer) | |
(reverse parsed)) | |
((equal "(" (current-token lexer)) | |
(next-token lexer) | |
(%parse-parens lexer (cons (%parse-parens lexer nil) parsed))) | |
(t (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-parens lexer (cons x parsed))))))) | |
(defun parse-parens (lexer) | |
(reset lexer (%parse-parens lexer nil))) | |
(defun %parse-multiply (lexer parsed) | |
(if (finishedp lexer) | |
#+comment(maybe-car) (do-if #'listp #'nreverse parsed) | |
(match (tokens lexer) | |
((list* l "*" r _) | |
(dotimes (i 3) (next-token lexer)) | |
(%parse-multiply lexer (cons (list '* | |
(maybe-parse #'%parse-multiply l) | |
(maybe-parse #'%parse-multiply r) | |
) parsed))) | |
((list* l "/" r _) | |
(dotimes (i 3) (next-token lexer)) | |
(%parse-multiply lexer (cons (list '/ | |
(maybe-parse #'%parse-multiply l) | |
(maybe-parse #'%parse-multiply r) | |
) parsed))) | |
((list* "*" r _) | |
(dotimes (i 2) (next-token lexer)) | |
(%parse-multiply lexer (cons (list '* | |
(car parsed) | |
(maybe-parse #'%parse-multiply r) | |
) (cdr parsed)))) | |
((list* "/" r _) | |
(dotimes (i 2) (next-token lexer)) | |
(%parse-multiply lexer (cons (list '/ | |
(car parsed) | |
(maybe-parse #'%parse-multiply r) | |
) (cdr parsed)))) | |
((cons (guard x (consp x)) _) | |
(next-token lexer) | |
(%parse-multiply lexer (cons (%parse-multiply (make-instance '<lexer> :tokens x) nil) | |
parsed))) | |
(_ (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-multiply lexer (cons x parsed))))))) | |
(defun parse-multiply (lexer) | |
(reset lexer (%parse-multiply lexer nil))) | |
(defun %parse-add (lexer parsed) | |
(if (finishedp lexer) | |
#+comment(maybe-car) (do-if #'listp #'nreverse parsed) | |
(match (tokens lexer) | |
((list* l "+" r _) | |
(dotimes (i 3) (next-token lexer)) | |
(%parse-add lexer (cons (list '+ | |
(maybe-parse #'%parse-add l) | |
(maybe-parse #'%parse-add r) | |
) parsed))) | |
((list* l "-" r _) | |
(dotimes (i 3) (next-token lexer)) | |
(%parse-add lexer (cons (list '- | |
(maybe-parse #'%parse-add l) | |
(maybe-parse #'%parse-add r) | |
) parsed))) | |
((list* "+" r _) | |
(dotimes (i 2) (next-token lexer)) | |
(%parse-add lexer (cons (list '+ | |
(car parsed) | |
(maybe-parse #'%parse-add r) | |
) (cdr parsed)))) | |
((list* "-" r _) | |
(dotimes (i 2) (next-token lexer)) | |
(%parse-add lexer (cons (list '- | |
(car parsed) | |
(maybe-parse #'%parse-add r) | |
) (cdr parsed)))) | |
((cons (guard x (consp x)) _) | |
(next-token lexer) | |
(%parse-add lexer (cons (%parse-add (make-instance '<lexer> :tokens x) nil) | |
parsed))) | |
(_ (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-add lexer (cons x parsed))))))) | |
(defun parse-add (lexer) | |
(reset lexer (%parse-add lexer nil))) | |
(defun %parse-funcall (lexer parsed) | |
(if (finishedp lexer) | |
(maybe-nreverse parsed) | |
(match (tokens lexer) | |
((list* (ppcre #.+ident-re+ func) (guard args (and (not (equal (car parsed) :fn)) (listp args))) _) | |
(dotimes (i 2) (next-token lexer)) | |
(%parse-funcall lexer (cons (list :funcall | |
func | |
(maybe-parse #'%parse-funcall args)) | |
parsed))) | |
((cons (guard x (consp x)) _) | |
(next-token lexer) | |
(%parse-funcall lexer (cons (maybe-parse #'%parse-funcall x) | |
parsed))) | |
(_ (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-funcall lexer (cons x parsed))))))) | |
(defun parse-funcall (lexer) | |
(reset lexer (%parse-funcall lexer nil))) | |
(defun parsedp (list) | |
(not (symbolp (car list)))) | |
(defun %parse-let (lexer parsed) | |
(if (finishedp lexer) | |
(nreverse parsed) | |
(match (tokens lexer) | |
((list* 'let var-name "=" form _) | |
(dotimes (i 4) (next-token lexer)) | |
(%parse-let lexer (cons (list :let | |
var-name | |
(maybe-parse #'%parse-let form)) | |
parsed))) | |
((cons (guard x (consp x)) _) | |
(next-token lexer) | |
(%parse-let lexer (cons (%parse-let (make-instance '<lexer> :tokens x) nil) | |
parsed))) | |
(_ (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-let lexer (cons x parsed))))))) | |
(defun parse-let (lexer) | |
(reset lexer (%parse-let lexer nil))) | |
(defun %parse-block (lexer parsed) | |
(if (finishedp lexer) | |
(do-if #'listp #'nreverse parsed) | |
(cond ((equal "}" (current-token lexer)) | |
(next-token lexer) | |
(cons :block (reverse parsed))) | |
((equal "{" (current-token lexer)) | |
(next-token lexer) | |
(%parse-block lexer (cons (%parse-block lexer nil) parsed))) | |
(t (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-block lexer (cons x parsed))))))) | |
(defun parse-block (lexer) | |
(reset lexer (%parse-block lexer nil))) | |
(defun %parse-fn (lexer parsed) | |
(if (finishedp lexer) | |
(maybe-nreverse parsed) | |
(match (tokens lexer) | |
((list* 'fn | |
fn-name | |
(guard params (listp params)) | |
(guard block (eq :block (car block))) _) | |
(dotimes (i 4) (next-token lexer)) | |
(%parse-fn lexer (cons (list :fn | |
fn-name | |
(remove "," params :test #'equal) | |
block) | |
parsed))) | |
((cons (guard x (consp x)) _) | |
(next-token lexer) | |
(%parse-fn lexer (cons (%parse-let (make-instance '<lexer> :tokens x) nil) | |
parsed))) | |
(_ (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-fn lexer (cons x parsed))))))) | |
(defun parse-fn (lexer) | |
(reset lexer (%parse-fn lexer nil))) | |
(defun %parse-literal (lexer parsed) | |
(if (finishedp lexer) | |
(maybe-nreverse parsed) | |
(match (tokens lexer) | |
((guard it (integerp it)) (next-token lexer) (cons (list :int it) parsed)) | |
((guard it (floatp it)) (next-token lexer) (cons (list :float it) parsed)) | |
(_ (let ((x (current-token lexer))) | |
(next-token lexer) | |
(%parse-literal lexer (cons x parsed))))))) | |
(defun parse-literal (lexer) | |
(reset lexer (%parse-literal lexer nil))) | |
(defun parse (src) | |
(let ((lexer (make-instance '<lexer> :tokens (tokenize src)))) | |
(parse-parens lexer) | |
(parse-block lexer) | |
(parse-fn lexer) | |
(parse-funcall lexer) | |
(parse-multiply lexer) | |
(parse-add lexer) | |
(parse-let lexer) | |
(remove ";" (tokens lexer) :test #'equal))) | |
(defclass <parser> () | |
((expr :accessor current-expr :initform nil) | |
(rest-exprs :accessor rest-exprs :initform nil) | |
(exprs :accessor exprs :initarg :exprs))) | |
(defmethod initialize-instance :after ((parser <parser>) &rest initargs) | |
(declare (ignore initargs)) | |
(setf (slot-value parser 'expr) (first (slot-value parser 'exprs)) | |
(slot-value parser 'rest-exprs) (rest (slot-value parser 'exprs exprs))) | |
parser) | |
(defmethod print-object ((o <parser>) s) | |
(format s "#<<PARSER> ~S ~S>" (slot-value o 'expr) (slot-value o 'rest-exprs))) | |
(defgeneric next-expr (parser)) | |
(defmethod next-expr ((parser <parser>)) | |
(if (finishedp parser) | |
nil | |
(let ((result (slot-value parser 'expr))) | |
(if (slot-value parser 'rest-exprs) | |
(setf (slot-value parser 'expr) (first (slot-value parser 'rest-exprs)) | |
(slot-value parser 'rest-exprs) (rest (slot-value parser 'rest-exprs)) | |
(slot-value parser 'exprs) (rest (slot-value parser 'exprs))) | |
(setf (slot-value parser 'expr) nil)) | |
result))) | |
(defmethod finishedp ((parser <parser>)) | |
(and (null (slot-value parser 'expr)) | |
(null (slot-value parser 'rest-exprs)))) | |
(defmethod reset ((parser <parser>) exprs) | |
(if (not (listp exprs)) | |
(setf exprs (list exprs))) | |
(setf (slot-value parser 'exprs) exprs | |
(slot-value parser 'expr) (car exprs) | |
(slot-value parser 'rest-exprs) (cdr exprs)) | |
parser) | |
(defclass <environment> () | |
((env-alist :accessor env-alist :initform nil))) | |
(defmethod print-object ((o <environment>) s) | |
(format s "#<<environment> ~S>" (slot-value o 'env-alist))) | |
(defun get-env (env name) | |
(cdr (assoc name (env-alist env) :test #'equal))) | |
(defun set-env (env name val) | |
(setf (env-alist env) | |
(cons (cons name val) (env-alist env)))) | |
(defun literalp (object) | |
(and (consp object) (member (car object) '(:float :int :string)))) | |
(defun maybe-interp (interpreter object env) | |
(if (or (stringp object) (consp object)) | |
(funcall interpreter | |
(make-instance '<parser> | |
:exprs (list object)) | |
env) | |
object)) | |
(defun %interp (parser env) | |
(if (finishedp parser) | |
(values parser env) | |
(match (current-expr parser) | |
((list '+ l r) (next-expr parser) | |
(+ (maybe-interp #'%interp l env) | |
(maybe-interp #'%interp r env))) | |
((list '- l r) (next-expr parser) | |
(- (maybe-interp #'%interp l env) | |
(maybe-interp #'%interp r env))) | |
((list '* l r) (next-expr parser) | |
(* (maybe-interp #'%interp l env) | |
(maybe-interp #'%interp r env))) | |
((list '/ l r) (next-expr parser) | |
(/ (maybe-interp #'%interp l env) | |
(maybe-interp #'%interp r env))) | |
((list :int i) (next-expr parser) i) | |
((list :float f) (next-expr parser) f) | |
((list :string s) (next-expr parser) s) | |
(:int (next-expr parser) (current-expr parser)) | |
(:float (next-expr parser) (current-expr parser)) | |
(:string (next-expr parser) (current-expr parser)) | |
((list :let var-name form) | |
(next-expr parser) | |
(set-env env var-name (maybe-interp #'%interp form env)) | |
(%interp parser env)) | |
((guard var (stringp var)) | |
(next-expr parser) | |
(get-env env var)) | |
((list :funcall "puts" (list str-form)) | |
(next-expr parser) | |
(format t "~A~%" (maybe-interp #'%interp str-form env)) | |
(%interp parser env)) | |
(_ (values 'finished parser env))))) | |
(defun test () | |
(let ((sample "let x = 1 + 2 let y = \"hoge\" puts(y) puts(x)")) | |
(parse sample) | |
(%interp (make-instance '<parser> :exprs (parse sample)) | |
(make-instance '<environment>)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment