Skip to content

Instantly share code, notes, and snippets.

@glts
Last active August 29, 2015 14:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save glts/b382b72ef29165b7857a to your computer and use it in GitHub Desktop.
Save glts/b382b72ef29165b7857a to your computer and use it in GitHub Desktop.
Trampolining lexer for J sentences
;; Trampolining lexer for J sentences
;; Predicates
(def digits (set "0123456789_"))
(def alphabet (set "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"))
(def space? (partial contains? #{\space \tab}))
(def alpha-n? (partial contains? (disj alphabet \N)))
(def n? (partial = \N))
(def b? (partial = \B))
(def alnum? (partial contains? (into digits alphabet)))
(def alnum-b? (partial contains? (disj (into digits alphabet) \B)))
(def digit? (partial contains? digits))
(def digit'? (partial contains? (conj (into digits alphabet) \.)))
(def period? (partial = \.))
(def colon? (partial = \:))
(def dots? (partial contains? #{\. \:}))
(def quote? (partial = \'))
(defmacro emit [f cs lex token & xs]
`(~f ~cs (conj ~lex ~token) ~@xs))
(defmacro transit [f cs lex & xs]
`(~f ~cs ~lex ~@xs))
;; State functions
(declare
s-space
s-space'
s-other
s-alpha
s-n
s-nb
s-nz
s-digits
s-even-quotes
s-quote)
(defn s-space [[c & cs] lex & curr]
(cond
(space? c) (recur cs lex nil)
(alpha-n? c) (transit s-alpha cs lex (str c))
(n? c) (transit s-n cs lex (str c))
(digit? c) (transit s-digits cs lex (str c))
(quote? c) (transit s-quote cs lex (str c))
c (transit s-other cs lex (str c))
:else lex))
(defn s-space' [[c & cs] lex curr spaces]
(cond
(space? c) (recur cs lex (str curr) (str spaces c))
(alpha-n? c) (emit s-alpha cs lex [:digits curr] (str c))
(n? c) (emit s-n cs lex [:digits curr] (str c))
(digit? c) (transit s-digits cs lex (str curr spaces c))
(quote? c) (emit s-quote cs lex [:digits curr] (str c))
c (emit s-other cs lex [:digits curr] (str c))
:else (conj lex [:digits curr])))
(defn s-other [[c & cs] lex curr]
(cond
(dots? c) (recur cs lex (str curr c))
(space? c) (emit s-space cs lex [:other curr])
(alpha-n? c) (emit s-alpha cs lex [:other curr] (str c))
(n? c) (emit s-n cs lex [:other curr] (str c))
(digit? c) (emit s-digits cs lex [:other curr] (str c))
(quote? c) (emit s-quote cs lex [:other curr] (str c))
c (recur cs (conj lex [:other curr]) (str c))
:else (conj lex [:other curr])))
(defn s-alpha [[c & cs] lex curr]
(cond
(alnum? c) (recur cs lex (str curr c))
(space? c) (emit s-space cs lex [:alpha curr])
(dots? c) (transit s-other cs lex (str curr c))
(quote? c) (emit s-quote cs lex [:alpha curr] (str c))
c (emit s-other cs lex [:alpha curr] (str c))
:else (conj lex [:alpha curr])))
(defn s-n [[c & cs] lex curr]
(cond
(space? c) (emit s-space cs lex [:alpha curr])
(alnum-b? c) (transit s-alpha cs lex (str curr c))
(b? c) (transit s-nb cs lex (str curr c))
(dots? c) (transit s-other cs lex (str curr c))
(quote? c) (emit s-quote cs lex [:alpha curr] (str c))
c (emit s-other cs lex [:alpha curr] (str c))
:else (conj lex [:alpha curr])))
(defn s-nb [[c & cs] lex curr]
(cond
(space? c) (emit s-space cs lex [:alpha curr])
(alnum? c) (transit s-alpha cs lex (str curr c))
(period? c) (transit s-nz cs lex (str curr c))
(colon? c) (transit s-other cs lex (str curr c))
(quote? c) (emit s-quote cs lex [:alpha curr] (str c))
c (emit s-other cs lex [:alpha curr] (str c))
:else (conj lex [:alpha curr])))
(defn s-nz [[c & cs :as s] lex curr]
(if (dots? c)
(transit s-other cs lex (str curr c))
(conj lex [:comment (apply str curr s)])))
(defn s-digits [[c & cs] lex curr]
(cond
(digit'? c) (recur cs lex (str curr c))
(space? c) (transit s-space' cs lex (str curr) (str c))
(colon? c) (transit s-other cs lex (str curr c))
(quote? c) (emit s-quote cs lex [:digits curr] (str c))
c (emit s-other cs lex [:digits curr] (str c))
:else (conj lex [:digits curr])))
(defn s-quote [[c & cs] lex curr]
(cond
(quote? c) (transit s-even-quotes cs lex (str curr c))
c (recur cs lex (str curr c))
:else (conj lex [:error "open quote"])))
(defn s-even-quotes [[c & cs] lex curr]
(cond
(space? c) (emit s-space cs lex [:string curr])
(alpha-n? c) (emit s-alpha cs lex [:string curr] (str c))
(n? c) (emit s-n cs lex [:string curr] (str c))
(digit? c) (emit s-digits cs lex [:string curr] (str c))
(quote? c) (transit s-quote cs lex (str curr c))
c (emit s-other cs lex [:string curr] (str c))
:else (conj lex [:string curr])))
;; Lexer
(defn run-lexer [s]
{:pre [(re-matches #"[\p{Graph}\p{Blank}]*" s)]}
(trampoline s-space s []))
(run-lexer "6754e3 8b2.762 +./ 89.89 'what '' ief' blbab NB. what")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment