sogaiu/core.cljc

## core.cljc
(ns parcera.core
  (:require [instaparse.core :as instaparse]
            [instaparse.combinators-source :as combi]
            [instaparse.cfg :as cfg]
            [parcera.terminals :as terminal])
  #?(:cljs (:import goog.string.StringBuffer)))

; todo: implement advices from
; http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html
; https://www.loggly.com/blog/regexes-the-bad-better-best/
; https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/

; todo: use advices in https://medium.appbase.io/analyzing-20k-github-repositories-af76de21c3fc
; to check if the heuristics are accurate

; NOTE: Through my experiments I found out that Instaparse will gladly take the
; first match as long as the grammar is not ambiguous. Therefore I switched the
; unordered OR (|) with an ordered one (/). This of course implies an heuristic
; of knowing which grammar rules are expected to match more often. I use
; Clojure's core as a reference with the following code snippet
#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")]
    (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory)))))))
#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")]
    (time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory)))))))
; todo: performance of [,\s]*;.*|[,\s]+ for whitespace
(def grammar-rules
  "code: input*;

    <input>: ignore / form;

    <ignore>: whitespace / comment / discard;

    <form>: literal / collection / reader-macro;

    (* for parsing purposes we dont consider a Set a collection since it starts
       with # -> dispatch macro *)
    <collection>: list / vector / map;

    list: <'('> input* <')'>;

    vector: <'['> input* <']'>;

    map: <'{'> input* <'}'>;

    (* a literal is basically anything that is not a collection, macro or whitespace *)
    <literal>: ( keyword
               / macro-keyword
               / string
               / number
               / character
               / symbol
               );

    (* keyword: ; *)

    (* macro-keyword: ; *)

    (* string: ; *)

    (* number: ; *)

    (* character: ; *)

    (* symbol: ; *)

    <reader-macro>: ( unquote
                    / metadata
                    / backtick
                    / quote
                    / dispatch
                    / unquote-splicing
                    / deref
                    );

    metadata: ((metadata-entry | deprecated-metadata-entry) ignore*)+
              ( symbol
              / collection
              / set
              / namespaced-map
              / tag
              / fn
              / unquote
              / unquote-splicing
              / conditional
              / deref
              / quote
              / backtick
              / var-quote
              );

    metadata-entry: <'^'> ignore* ( conditional
                                  / map
                                  / symbol
                                  / string
                                  / keyword
                                  / macro-keyword );

    deprecated-metadata-entry: <'#^'> ignore* ( conditional
                                              / map
                                              / symbol
                                              / string
                                              / keyword
                                              / macro-keyword );

    backtick: <'`'> ignore* form;

    quote: <'\\''> ignore* form;

    unquote: <#'~'> ignore* form;

    unquote-splicing: <'~@'> ignore* form;

    deref: <'@'> ignore* form;

    <dispatch>: ( fn
                / regex
                / set
                / conditional
                / conditional-splicing
                / namespaced-map
                / var-quote
                / tag
                / symbolic
                / eval
                );

    fn: <'#'> list;

    (* XXX: how to 'hide' and use regexes using combi? *)
    regex: <'#'> #'\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"'

    set: <'#{'> input* <'}'>;

    namespaced-map: <'#'> ( keyword / macro-keyword / auto-resolve )
                          ignore*
                          map;

    auto-resolve: <'::'>;

    var-quote: <'#\\''> ignore* form;

    discard: <'#_'> ignore* form;

    tag: <'#'> symbol ignore* (literal / collection / tag);

    conditional: <'#?'> ignore* list;

    conditional-splicing: <'#?@'> ignore* list;

    symbolic: <'##'> ignore* symbol;

    eval: <'#='> ignore* (symbol / list);

    (* whitespace: ; *)

    (* comment: ; *)
")


(def grammar-terminals
  {:character     (combi/regexp terminal/character-pattern)
   :string        (combi/regexp terminal/string-pattern)
   :symbol        (combi/regexp terminal/symbol-pattern)
   :number        (combi/regexp terminal/number-pattern)
   :macro-keyword (combi/regexp terminal/macro-keyword-pattern)
   :keyword       (combi/regexp terminal/keyword-pattern)
   :whitespace    (combi/regexp terminal/whitespace-pattern)
   :comment       (combi/regexp terminal/comment-pattern)})


(def grammar (merge (cfg/ebnf grammar-rules) grammar-terminals))


(def clojure
  "Clojure (instaparse) parser. It can be used as:
  - (parcera/clojure input-string)
     -> returns an AST representation of input-string
  - (instaparse/parse parcera/clojure input-string)
     -> same as above but more explicit
  - (instaparse/parses parcera/clojure input-string)
   -> returns a sequence of possible AST representations in case of ambiguity
      in input-string

   For a description of all possible options, visit Instaparse's official
   documentation: https://github.com/Engelberg/instaparse#reference"
  (instaparse/parser grammar :start :code))


(defn- code*
  "internal function used to imperatively build up the code from the provided
   AST as Clojure's str would be too slow"
  [ast #?(:clj ^StringBuilder string-builder
          :cljs ^StringBuffer string-builder)]
  (case (first ast)
    :code
    (doseq [child (rest ast)]
      (code* child string-builder))

    :list
    (do (. string-builder (append "("))
        (doseq [child (rest ast)] (code* child string-builder))
        (. string-builder (append ")")))

    :vector
    (do (. string-builder (append "["))
        (doseq [child (rest ast)] (code* child string-builder))
        (. string-builder (append "]")))

    :namespaced-map
    (do (. string-builder (append "#"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :map
    (do (. string-builder (append "{"))
        (doseq [child (rest ast)] (code* child string-builder))
        (. string-builder (append "}")))

    :set
    (do (. string-builder (append "#{"))
        (doseq [child (rest ast)] (code* child string-builder))
        (. string-builder (append "}")))

    (:number :whitespace :comment :symbol :character :string
     :keyword :macro-keyword)
    (. string-builder (append (second ast)))

    :symbolic
    (do (. string-builder (append "##"))
        (. string-builder (append (second ast))))

    :regex
    (do (. string-builder (append "#"))
        (. string-builder (append (second ast))))

    :auto-resolve
    (. string-builder (append "::"))

    :metadata
    (do (doseq [child (rest (butlast ast))] (code* child string-builder))
        (code* (last ast) string-builder))

    :metadata-entry
    (do (. string-builder (append "^"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :deprecated-metadata-entry
    (do (. string-builder (append "#^"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :quote
    (do (. string-builder (append "'"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :var-quote
    (do (. string-builder (append "#'"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :discard
    (do (. string-builder (append "#_"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :tag
    (do (. string-builder (append "#"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :backtick
    (do (. string-builder (append "`"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :unquote
    (do (. string-builder (append "~"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :unquote-splicing
    (do (. string-builder (append "~@"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :conditional
    (do (. string-builder (append "#?"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :conditional-splicing
    (do (. string-builder (append "#?@"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :deref
    (do (. string-builder (append "@"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :fn
    (do (. string-builder (append "#"))
        (doseq [child (rest ast)] (code* child string-builder)))

    :eval
    (do (. string-builder (append "#="))
        (doseq [child (rest ast)] (code* child string-builder)))))


(defn code
  "Transforms your AST back into code

   ast: The nested sequence of [:keyword & content] which MUST follow the
        same structure as the result of `(parcera/clojure input-string)`

   Returns a string representation of the provided AST

   In general (= input (parcera/code (parcera/clojure input)))"
  [ast]
  (let [string-builder #?(:clj (new StringBuilder)
                          :cljs (new StringBuffer))]
    (code* ast string-builder)
    (. string-builder (toString))))

; Successful parse.
; Profile:  {:create-node 384, :push-full-listener 2, :push-stack 384,
;            :push-listener 382, :push-result 227, :push-message 227 }
; "Elapsed time: 47.25084 msecs"
#_(time (clojure (str '(ns parcera.core
                         (:require [instaparse.core :as instaparse]
                                   [clojure.data :as data]
                                   [clojure.string :as str])))
                 :trace true))

#_(instaparse/disable-tracing!)

(comment

  ;; 4400 ms!
  (let [start (System/currentTimeMillis)
        src (slurp (str (System/getenv "HOME")
                        "/src/clojure/src/clj/clojure/core.clj"))
        ast (try
              (clojure src)
              (catch Exception e
                (println e)
                nil))]
    (when ast
      (let [reconstructed (code ast)]
        (when-not (= reconstructed src)
          (spit "/tmp/failed-reconstruction.clj"
                reconstructed)
          (println "failed to reconstruct"))))
    (println (- (System/currentTimeMillis) start) "ms"))

  )

## terminals.cljc
(ns parcera.terminals
  "Clojure symbols, keywords, numbers and string/regex share quite a lot
  of matching logic. This namespace is aimed towards clearly identifying
  those pieces and share them among the different definitions to
  avoid recurring issues")

;; Clojure's reader is quite permissive so we follow the motto
;; "if it is not forbidden, it is allowed"
(def not-allowed "\\f\\n\\r\\t \\(\\)\\[\\]{}\"@~\\^;`\\\\,:#\\'\\/")
(def allowed-character (str "[^" not-allowed "]"))

(def keyword-head (str "("
                       allowed-character "|" "[#']"
                       ")"))
(def keyword-body (str "("
                       keyword-head "|" "[:\\/]"
                       ")"))
(def keyword-pattern (str ":"
                          "("
                          , "(" keyword-head keyword-body "*" ")"
                          , "|"
                          , "\\/"
                          ")"))
(def macro-keyword-pattern (str "::" keyword-head keyword-body "*"))


(def name-head (str "("
                    allowed-character "|" "[\\/]"
                    ")"))
(def name-body (str "("
                    name-head "|" "[:#\\']"
                    ")"))
(def symbol-pattern (str name-head name-body "*"))


(def double-suffix "(((\\.\\d*)?([eE][-+]?\\d+)?)M?)")
(def long-suffix "((0[xX]([\\dA-Fa-f]+)|0([0-7]+)|([1-9]\\d?)[rR]([\\d\\w]+)|0\\d+)?N?)")
(def ratio-suffix "(\\/(\\d+))")
;; XXX: order ok here?
(def number-pattern (str "[+-]?\\d+(" long-suffix "|" double-suffix "|" ratio-suffix ")(?![\\.\\/])")) ; todo: word boundary ?


; This is supposed to be the JavaScript friendly version of #'\P{M}\p{M}*+'
; mentioned here: https://www.regular-expressions.info/unicode.html
; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block
; ticking all 'Combining Diacritical Marks' boxes *))
(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])")
(def named-char "(newline|return|space|tab|formfeed|backspace)")
(def unicode "(u[\\dA-Fa-f]{4})")
(def character-pattern (str "\\\\(" unicode-char "|" named-char "|" unicode ")(?!\\w+)"))


(def string-pattern "\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\"")
;;(def regex-pattern (str "#" string-pattern))


(def whitespace-pattern "[\f\n\r\t, ]+")

(def comment-pattern "(;|(#!))[^\r\n]*")
	(ns parcera.core
	(:require [instaparse.core :as instaparse]
	[instaparse.combinators-source :as combi]
	[instaparse.cfg :as cfg]
	[parcera.terminals :as terminal])
	#?(:cljs (:import goog.string.StringBuffer)))

	; todo: implement advices from
	; http://blog.reverberate.org/2013/09/ll-and-lr-in-context-why-parsing-tools.html
	; https://www.loggly.com/blog/regexes-the-bad-better-best/
	; https://www.loggly.com/blog/five-invaluable-techniques-to-improve-regex-performance/

	; todo: use advices in https://medium.appbase.io/analyzing-20k-github-repositories-af76de21c3fc
	; to check if the heuristics are accurate

	; NOTE: Through my experiments I found out that Instaparse will gladly take the
	; first match as long as the grammar is not ambiguous. Therefore I switched the
	; unordered OR (\|) with an ordered one (/). This of course implies an heuristic
	; of knowing which grammar rules are expected to match more often. I use
	; Clojure's core as a reference with the following code snippet
	#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojure/master/src/clj/clojure/core.clj")]
	(time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory)))))))
	#_(let [core-content (slurp "https://raw.githubusercontent.com/clojure/clojurescript/master/src/main/clojure/cljs/core.cljc")]
	(time (sort-by second > (frequencies (filter keyword? (flatten (clojure core-content :optimize :memory)))))))
	; todo: performance of [,\s];.\|[,\s]+ for whitespace
	(def grammar-rules
	"code: input*;

	<input>: ignore / form;

	<ignore>: whitespace / comment / discard;

	<form>: literal / collection / reader-macro;

	(* for parsing purposes we dont consider a Set a collection since it starts
	with # -> dispatch macro *)
	<collection>: list / vector / map;

	list: <'('> input* <')'>;

	vector: <'['> input* <']'>;

	map: <'{'> input* <'}'>;

	(* a literal is basically anything that is not a collection, macro or whitespace *)
	<literal>: ( keyword
	/ macro-keyword
	/ string
	/ number
	/ character
	/ symbol
	);

	(* keyword: ; *)

	(* macro-keyword: ; *)

	(* string: ; *)

	(* number: ; *)

	(* character: ; *)

	(* symbol: ; *)

	<reader-macro>: ( unquote
	/ metadata
	/ backtick
	/ quote
	/ dispatch
	/ unquote-splicing
	/ deref
	);

	metadata: ((metadata-entry \| deprecated-metadata-entry) ignore*)+
	( symbol
	/ collection
	/ set
	/ namespaced-map
	/ tag
	/ fn
	/ unquote
	/ unquote-splicing
	/ conditional
	/ deref
	/ quote
	/ backtick
	/ var-quote
	);

	metadata-entry: <'^'> ignore* ( conditional
	/ map
	/ symbol
	/ string
	/ keyword
	/ macro-keyword );

	deprecated-metadata-entry: <'#^'> ignore* ( conditional
	/ map
	/ symbol
	/ string
	/ keyword
	/ macro-keyword );

	backtick: <'`'> ignore* form;

	quote: <'\\''> ignore* form;

	unquote: <#'~'> ignore* form;

	unquote-splicing: <'~@'> ignore* form;

	deref: <'@'> ignore* form;

	<dispatch>: ( fn
	/ regex
	/ set
	/ conditional
	/ conditional-splicing
	/ namespaced-map
	/ var-quote
	/ tag
	/ symbolic
	/ eval
	);

	fn: <'#'> list;

	(* XXX: how to 'hide' and use regexes using combi? *)
	regex: <'#'> #'\"[^\"\\\\](?:\\\\.[^\"\\\\])*\"'

	set: <'#{'> input* <'}'>;

	namespaced-map: <'#'> ( keyword / macro-keyword / auto-resolve )
	ignore*
	map;

	auto-resolve: <'::'>;

	var-quote: <'#\\''> ignore* form;

	discard: <'#_'> ignore* form;

	tag: <'#'> symbol ignore* (literal / collection / tag);

	conditional: <'#?'> ignore* list;

	conditional-splicing: <'#?@'> ignore* list;

	symbolic: <'##'> ignore* symbol;

	eval: <'#='> ignore* (symbol / list);

	(* whitespace: ; *)

	(* comment: ; *)
	")


	(def grammar-terminals
	{:character (combi/regexp terminal/character-pattern)
	:string (combi/regexp terminal/string-pattern)
	:symbol (combi/regexp terminal/symbol-pattern)
	:number (combi/regexp terminal/number-pattern)
	:macro-keyword (combi/regexp terminal/macro-keyword-pattern)
	:keyword (combi/regexp terminal/keyword-pattern)
	:whitespace (combi/regexp terminal/whitespace-pattern)
	:comment (combi/regexp terminal/comment-pattern)})


	(def grammar (merge (cfg/ebnf grammar-rules) grammar-terminals))


	(def clojure
	"Clojure (instaparse) parser. It can be used as:
	- (parcera/clojure input-string)
	-> returns an AST representation of input-string
	- (instaparse/parse parcera/clojure input-string)
	-> same as above but more explicit
	- (instaparse/parses parcera/clojure input-string)
	-> returns a sequence of possible AST representations in case of ambiguity
	in input-string

	For a description of all possible options, visit Instaparse's official
	documentation: https://github.com/Engelberg/instaparse#reference"
	(instaparse/parser grammar :start :code))


	(defn- code*
	"internal function used to imperatively build up the code from the provided
	AST as Clojure's str would be too slow"
	[ast #?(:clj ^StringBuilder string-builder
	:cljs ^StringBuffer string-builder)]
	(case (first ast)
	:code
	(doseq [child (rest ast)]
	(code* child string-builder))

	:list
	(do (. string-builder (append "("))
	(doseq [child (rest ast)] (code* child string-builder))
	(. string-builder (append ")")))

	:vector
	(do (. string-builder (append "["))
	(doseq [child (rest ast)] (code* child string-builder))
	(. string-builder (append "]")))

	:namespaced-map
	(do (. string-builder (append "#"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:map
	(do (. string-builder (append "{"))
	(doseq [child (rest ast)] (code* child string-builder))
	(. string-builder (append "}")))

	:set
	(do (. string-builder (append "#{"))
	(doseq [child (rest ast)] (code* child string-builder))
	(. string-builder (append "}")))

	(:number :whitespace :comment :symbol :character :string
	:keyword :macro-keyword)
	(. string-builder (append (second ast)))

	:symbolic
	(do (. string-builder (append "##"))
	(. string-builder (append (second ast))))

	:regex
	(do (. string-builder (append "#"))
	(. string-builder (append (second ast))))

	:auto-resolve
	(. string-builder (append "::"))

	:metadata
	(do (doseq [child (rest (butlast ast))] (code* child string-builder))
	(code* (last ast) string-builder))

	:metadata-entry
	(do (. string-builder (append "^"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:deprecated-metadata-entry
	(do (. string-builder (append "#^"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:quote
	(do (. string-builder (append "'"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:var-quote
	(do (. string-builder (append "#'"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:discard
	(do (. string-builder (append "#_"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:tag
	(do (. string-builder (append "#"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:backtick
	(do (. string-builder (append "`"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:unquote
	(do (. string-builder (append "~"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:unquote-splicing
	(do (. string-builder (append "~@"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:conditional
	(do (. string-builder (append "#?"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:conditional-splicing
	(do (. string-builder (append "#?@"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:deref
	(do (. string-builder (append "@"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:fn
	(do (. string-builder (append "#"))
	(doseq [child (rest ast)] (code* child string-builder)))

	:eval
	(do (. string-builder (append "#="))
	(doseq [child (rest ast)] (code* child string-builder)))))


	(defn code
	"Transforms your AST back into code

	ast: The nested sequence of [:keyword & content] which MUST follow the
	same structure as the result of `(parcera/clojure input-string)`

	Returns a string representation of the provided AST

	In general (= input (parcera/code (parcera/clojure input)))"
	[ast]
	(let [string-builder #?(:clj (new StringBuilder)
	:cljs (new StringBuffer))]
	(code* ast string-builder)
	(. string-builder (toString))))

	; Successful parse.
	; Profile: {:create-node 384, :push-full-listener 2, :push-stack 384,
	; :push-listener 382, :push-result 227, :push-message 227 }
	; "Elapsed time: 47.25084 msecs"
	#_(time (clojure (str '(ns parcera.core
	(:require [instaparse.core :as instaparse]
	[clojure.data :as data]
	[clojure.string :as str])))
	:trace true))

	#_(instaparse/disable-tracing!)

	(comment

	;; 4400 ms!
	(let [start (System/currentTimeMillis)
	src (slurp (str (System/getenv "HOME")
	"/src/clojure/src/clj/clojure/core.clj"))
	ast (try
	(clojure src)
	(catch Exception e
	(println e)
	nil))]
	(when ast
	(let [reconstructed (code ast)]
	(when-not (= reconstructed src)
	(spit "/tmp/failed-reconstruction.clj"
	reconstructed)
	(println "failed to reconstruct"))))
	(println (- (System/currentTimeMillis) start) "ms"))

	)
	(ns parcera.terminals
	"Clojure symbols, keywords, numbers and string/regex share quite a lot
	of matching logic. This namespace is aimed towards clearly identifying
	those pieces and share them among the different definitions to
	avoid recurring issues")

	;; Clojure's reader is quite permissive so we follow the motto
	;; "if it is not forbidden, it is allowed"
	(def not-allowed "\\f\\n\\r\\t \\(\\)\\[\\]{}\"@~\\^;`\\\\,:#\\'\\/")
	(def allowed-character (str "[^" not-allowed "]"))

	(def keyword-head (str "("
	allowed-character "\|" "[#']"
	")"))
	(def keyword-body (str "("
	keyword-head "\|" "[:\\/]"
	")"))
	(def keyword-pattern (str ":"
	"("
	, "(" keyword-head keyword-body "*" ")"
	, "\|"
	, "\\/"
	")"))
	(def macro-keyword-pattern (str "::" keyword-head keyword-body "*"))


	(def name-head (str "("
	allowed-character "\|" "[\\/]"
	")"))
	(def name-body (str "("
	name-head "\|" "[:#\\']"
	")"))
	(def symbol-pattern (str name-head name-body "*"))


	(def double-suffix "(((\\.\\d*)?([eE][-+]?\\d+)?)M?)")
	(def long-suffix "((0[xX]([\\dA-Fa-f]+)\|0([0-7]+)\|([1-9]\\d?)[rR]([\\d\\w]+)\|0\\d+)?N?)")
	(def ratio-suffix "(\\/(\\d+))")
	;; XXX: order ok here?
	(def number-pattern (str "[+-]?\\d+(" long-suffix "\|" double-suffix "\|" ratio-suffix ")(?![\\.\\/])")) ; todo: word boundary ?


	; This is supposed to be the JavaScript friendly version of #'\P{M}\p{M}*+'
	; mentioned here: https://www.regular-expressions.info/unicode.html
	; It's cooked by this generator: http://kourge.net/projects/regexp-unicode-block
	; ticking all 'Combining Diacritical Marks' boxes *))
	(def unicode-char "([^\\u0300-\\u036F\\u1DC0-\\u1DFF\\u20D0-\\u20FF])")
	(def named-char "(newline\|return\|space\|tab\|formfeed\|backspace)")
	(def unicode "(u[\\dA-Fa-f]{4})")
	(def character-pattern (str "\\\\(" unicode-char "\|" named-char "\|" unicode ")(?!\\w+)"))


	(def string-pattern "\"[^\"\\\\](?:\\\\.[^\"\\\\])*\"")
	;;(def regex-pattern (str "#" string-pattern))


	(def whitespace-pattern "[\f\n\r\t, ]+")

	(def comment-pattern "(;\|(#!))[^\r\n]*")