Skip to content

Instantly share code, notes, and snippets.

@Conaws
Created July 20, 2018 00:17
Show Gist options
  • Save Conaws/b5f09d7527e3f98d783eed8c666d4ec6 to your computer and use it in GitHub Desktop.
Save Conaws/b5f09d7527e3f98d783eed8c666d4ec6 to your computer and use it in GitHub Desktop.
(ns history.parser
(:require [instaparse.core :as insta]
[debux.core :as dbg]
[history.util :refer [defc p makeInt]]
[clojure.pprint :refer [pprint] :as pp]
[clojure.string :as str]
[clojure.core.async :as async :refer [>! go go-loop <! <!! chan put!]]))
(defn filter-ke-tree
([coll] (filter-ke-tree coll #{}))
([x without]
#_(do
(print "\nWithout " )
(print without)
(print "\nFilterKE: ")
(print x)
(print "\n\n-------------"))
(cond
(and (map? x) (:keep x))
(let [newexclude (into #{} (:exclude x))
exclude (clojure.set/union without newexclude)]
(-> x
:keep
(filter-ke-tree exclude)
flatten))
(coll? x)
(->> x
(remove string? )
(remove keyword?)
(remove (partial contains? without))
(map #(filter-ke-tree % without)))
:else x)))
(def this-file "./src/clj/history/parser.clj")
(def between-s #"(?<=\))(\n)+(?=\()")
(def between-s-or-comment #"(?<=[\)(;.*)])(\n)+(?=[\((;.*)])")
(def get-name-from-vectors (comp second #(str/split % #"[ \n]") first))
(defn ip [pstring parsable]
((insta/parser pstring) parsable))
(defn ipt [pstring parsable]
((insta/parser pstring) parsable :total true ))
(defn ipp [pstring parsable]
((insta/parser pstring) parsable :partial true ))
(defn comparse [& s]
(apply str (vec (interleave s (repeat "\n")))))
(defn libify [& m] (into {} (map (juxt :name identity) m)))
; not distinguising quoted symbols right now
(def clj-content
" S = content+
<content> = w? (string / s / comment / regex / vector / map / set / symbol / keyword / number) w?
<w> = <#'[\\s,]+'>
s = ('#_' | '\\'' | '#' | '~' | '`' )? op (content | '%' )* cp
<comment> = #';.*'
vector = <'['> content* <']'>
map = <'{'> kv* <'}'>
kv = <w?> key <w?> binding
set = <'#{'> content* <'}'>
<key> = content
<binding> = content
<op> = <'('>
<cp> = <')'>
string = <'\"'> not-quote* <'\"'>
<not-quote> = (( <comment> | '\\\\''\"' ) / !'\"' #'[\\s\\S]')
symbol = (#'[^\\s:\\[\\(\\{\\]\\)\\}]+') keyword?
old-symbol = ( word | crazys )+ ( ':' | '\\'' | '/' |'#' | word | number | crazys)*
<crazys> = '*' | '$' | '!' | '&' | '.' | '+' | '?' | '<' | '>' | '-' | '_' | '='
regex = <'#'> string
keyword = (<':'> (#'[^\\s\\[\\(\\{\\]\\)\\}]')* (#'[^\\s:\\[\\(\\{\\]\\)\\}]')+)
<word> = #'[a-zA-Z]+'
math = '+' | '-' | '/' | '=' | '*' | '<' | '<=' | '>' | '>='
number = #'[0-9]+'")
#_(def form-transform1
{:s list
:string str
:number makeInt
:keyword (comp keyword str)
:symbol (comp symbol str)
:map (comp (p into {}) vector)
:kv vector
:regex symbol
:vector vector
:set (comp (p into {}) vector)
:body str})
#_(->> (ip clj-content (str '(def keep-exclude {:keep [:a :b :c {:exclude [1 2]
:keep [1 2 3 4 :a :b 5]}]
:exclude [:a :b]})))
(insta/transform form-transform))
#_(def testparse1
(-> this-file
slurp
(str/split between-s)
(->>
(map (juxt
#((insta/parser clj-content) % )
identity))
(filter (comp (p insta/get-failure) first))
print)))
(def test1 (str '(def clj-content
" S = content+
<content> = (<w>? (string / s / comment / vector / map / set / symbol / keyword / number) <w>? )
w = #'[\\s,]+'
s = ('#_' | '\\'' | '#' )? op (content | '%' )* cp
<comment> = #';.*'
vector = <'['> content* <']'>
map = <'{'> kv* <'}'>
kv = <w>? key <w>? binding
set = <'#{'> content* <'}'>
<key> = content
<binding> = content
<op> = <'('>
<cp> = <')'>
string = <'\"'> not-quote* <'\"'>
<not-quote> = ( <comment> / !'\"' #'[\\s\\S]')
symbol = ( word | crazys )+ (':' | '\\'' | '/' |'#' | word | number | crazys)*
<crazys> = '*' | '$' | '!' | '&' | '.' | '+' | '?' | '<' | '>' | '-' | '_' | '='
keyword = (<':'> (#'[^\\s\\[\\(\\{\\]\\)\\}]')* (#'[^\\s:\\[\\(\\{\\]\\)\\}]')+)
<word> = #'[a-zA-Z]+'
math = '+' | '-' | '/' | '=' | '*' | '<' | '<=' | '>' | '>='
number = #'[0-9]+'"
)))
(def debugger1
"s = ('#_' | '\\'' | '#' )? '(' ( string / content / s / '%' )* ')'
string = <'\"'> not-quote* <'\"'>
<not-quote> = ( <comment> / !'\"' #'[^\"]')
content = #'[^\\(\\)\\\"]+'
<comment> = #';.*'
")
#_(pprint (ipt debugger1 test1))
(defc embed-link [link-tar s]
(if (str/includes? s link-tar)
(->> (str/split s (re-pattern link-tar))
(interleave (repeat [(keyword (str "a#" link-tar)) link-tar]))
rest
(into [:div]))
s))
#_(dbg/dbg (embed-link "hello" "(defn hello abcede)"))
#_(embed-link "hello" "(defn helo abc hello no hello ede)")
(def let-example
(str '(defn hello [[x y]]
(let [a [1 2 3 4]
b (map (partial * 2) (conj a 5))]
(go
(let [[e f] (take 5 b)
{:keys [g h]} {:crazy (a b lets go now)}]
(insta/viz partal a g h e f c))
(let [d (take 5 b)]
(do
(if (+ 5 6 a b x y)))))))))
(def let-pair "[x 2
y (+ 3 4)]
(+ 1 x)")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(def top-level-parser (insta/parser
(str
"file = (tops | <w> | comment | string )*
tops = s"
clj-content)))
#_(defn spans [t]
(if (sequential? t)
(cons
(while (= t :tops)
(insta/span t))
(map spans (next t)))
t))
(def form-transform
"dangerous stuff with regex here"
{:s list
:string str
:number makeInt
:keyword (comp keyword str)
:symbol (comp symbol str)
:map (comp (p into {}) vector)
:regex symbol
:kv vector
:vector vector
:set (comp (p into #{}) vector)
})
#_(def get-strings
(let [f (slurp this-file)
p (->> (insta/parse top-level-parser f)
(insta/transform form-transform))]
(->> (take 2 p)
pprint)))
(def testsplit1
(-> this-file
slurp
(str/split between-s-or-comment)
(->>
count)))
(def def-extractor-string
" <extract> = (def / ns / <experiment> )
experiment = body
def = op <w>? type <w> name <w> body
body = #'[\\s\\S]*'
type = 'def' symbol?
w = #'\\s+'
name = symbol
ns = op <w>* <'ns'> <w>* nsname <w>* required <w>* cp
nsname = symbol
nspair = <'['> symbol (as|refer|<w>)* <']'>
required = op <':require'> (<w>* nspair)* cp
as = <':as'> <w>* symbol
refer = <':refer'> <w>* vector")
(def def-extractor
(insta/parser
(str
def-extractor-string
clj-content)))
(def ns-extractor-string
"ns = op <w>* <'ns'> <w>* nsname <w>* required <w>* cp
nsname = symbol
nspair = <'['> symbol (as|refer|<w>)* <']'>
required = op <':require'> (<w>* nspair)* cp
as = <':as'> <w>* symbol
refer = <':refer'> <w>* vector")
(def ns-parser
(insta/parser
(str
ns-extractor-string
clj-content)))
(def ns-transform
{:ns merge
:nsname (fn [n] {:name n :type :namespace})
:nspair (fn [name & attrs] [name (into {} attrs)])
:required (comp (fn [r] {:libraries r}) (partial into {}) vector)
})
(defc nt [tm s]
(insta/transform tm s))
(defc mapify [k s]
{k s})
(def dt2
{:symbol symbol
:def merge
:body nil
:type (comp (mapify :type) str)
:name (mapify :name)})
;;;; current state of the art
(defn parse [s]
(let [s-f (str/split s between-s-or-comment)
;_ (println "S-F" s-f)
ds (->> s-f
rest
(map (juxt (comp (nt dt2) def-extractor) identity)))
;_ (println "DS" ds)
[[n] nst] (->> s-f
first
((juxt (comp (nt ns-transform) def-extractor) identity)))
;_ (println "N" n "NST" nst)
]
(cons
(assoc n :string nst)
(for [[[d] dst] ds]
(assoc d :string dst)))))
(defn parse-file [file]
(-> file slurp parse))
(defc mapify [k s]
{k s})
(def ke-transform {:keep (comp (mapify :keep) flatten filter-ke-tree)
:exclude (comp (fn [s] {:exclude s}) flatten vector)
:arg-exclude (p merge-with (comp flatten conj))
:defn-ke (comp (mapify :keep) filter-ke-tree (p merge-with conj))
})
(def arg-pair "[[a b c] d] (comp a b boom c d)")
(def arg-extractor-string
(str
"defn-ke = arg-exclude keep+
arg-exclude = <'['> ( <w> |exclude)* <']'>
exclude = content
keep = content
name = symbol"
clj-content))
(def ma "(defn abcd
([x] (a b c x))
([x y] (d e f x y)))")
(def sa "(defn abc [] (comp partial a b c))")
(defc try-transform [message transform p]
(try (insta/transform transform p)
(catch Exception e (do (print (str "Exception Caught " message "\n\n" (.getMessage e) "\nFor Transform of \n"))
(pprint p)))))
(def mconj (p merge-with conj))
(defn parse2 [s]
(->> (try
((insta/parser (str
"TOP = def / defn / ns/ experiment
defn = op <w>? type <w> name (<w> op? defn-ke cp?)+ cp
def = op <w>? <'def'> <w> name <w> def-body cp
def-body = content
type = symbol
experiment = #'[\\s\\S]*'
name = symbol "
arg-extractor-string
ns-extractor-string
)) s)
(catch Exception e (print "GOT AN EXCeption/n/n/n" (.getMessage e))))
(try-transform "on the form" form-transform)
(try-transform "on the custom" (merge
{:exclude symbol
:keep (mapify :keep)
:arg-exclude (fn [& s] {:exclude (flatten s)})
:defn-ke (comp (mapify :includes) filter-ke-tree merge)
:def (p merge {:type :def})
:defn mconj
:def-body (fn [&s] {:def-body (filter symbol? s)})
;(comp (mapify :def-body) (p into #{}) (p keep symbol?) vector)
:name (mapify :name)
:type (mapify :type)
:TOP (p merge-with (comp flatten conj))
}
ns-transform))))
(def defns1
(->> (parse this-file)
(drop 1)
(keep :string)
(keep parse2)))
(defn async-parse [c parse-string]
(go
(print "started parsing" (take 10 parse-string))
(let [r (parse2 parse-string)]
(if (insta/get-failure r)
(put! c {:failed-string parse-string} )
(put! c r))
(print "put something on the chan"))))
(defn process-string [results-chan s]
(let [; results-chan (chan)
stuff (str/split s between-s-or-comment)]
(map
#(async-parse results-chan %) stuff)))
(def results-chan (chan 100))
#_(process-string results-chan (slurp this-file))
(def results (atom []))
(defn output-loop [c db]
(go-loop []
(let [r (<! c)]
(print r)
(swap! db conj r)
(recur))))
(def defns1
(->> (parse this-file)
(drop 1)
(keep :string)
(keep parse2)))
(defn beautify-set [s]
(print (apply str (interleave (repeat " \n ") s))))
#_(beautify-set (set (flatten (apply concat (keep :includes defns1)))))
(def defns2
(let [f (-> this-file
slurp
(str/split between-s-or-comment))
ds (->> f
rest
(map (juxt #(try
(parse2 %)
(catch Exception e (println (.getMessage e)))) identity)))
[[n] nst] (->> f
first
((juxt (comp (nt ns-transform) def-extractor) identity)))]
ds))
(def arg-extractor
(insta/parser
(str
arg-extractor-string
"<content> = (<w>? (s / comment / vector / map / string / symbol / keyword / number))"
)))
(def let-pair "[x 2
y (+ 3 4)]
(+ 1 x)")
(def arg-pair "[[a b c] d] (comp a b boom c d)")
#_(arg-extractor arg-pair)
#_(->> (arg-extractor "[a b c d e] (comp {:keep [a b g d e c] :exclude [a b c]} partial a b c)")
(insta/transform form-transform)
(insta/transform ke-transform))
(def let-extractor
(str
"let = <'('> <w>? <'let'> <w>? bindings <w>? keep <')'>
bindings = <'['> (<w>? ke-pair)* <']'>
ke-pair = exclude <w> keep"
"<content> = (<w>? (let / s / comment / vector / map / string / symbol / keyword / number))"
arg-extractor-string))
(def let-transform
{:ke-pair (partial merge-with conj)
:bindings (partial merge-with (comp flatten conj))
:let (comp (mapify :keep) filter-ke-tree (partial merge-with (comp flatten conj)))})
(def let-pair "(let [x (comp partial 5 a)
y (+ 3 4)]
(+ 1 x))")
#_(dbg/dbg (->> ((insta/parser let-extractor) let-pair)
(insta/transform form-transform)
(insta/transform ke-transform)
(insta/transform let-transform)))
#_(clojure.pprint/pprint (keys (ns-publics 'clojure.core)))
(def let-extractor2
(str
" file = ns (s | comment | w)*
ns = op (w | ns | within)* cp
def = op <w>? type <w>? name body
body = ((<w>? arguments? <w>? content) |<w>? arity*)+ <cp>
type = 'def' symbol?
arity = op ke-pair cp
<arguments> = arg-exclude
doc-string = string
name = symbol
let = <'('> <w>? <'let'> <w>? bindings <w>? keep <')'>
bindings = <'['> (<w>? ke-pair)* <']'>
ke-pair = exclude <w> keep"
" defn-ke = arg-exclude keep+
arg-exclude = <'['> exclude+ <']'>
exclude = content
keep = content
w = #'\\s+'
name = symbol
s = ('#_' | '\\'' | '#' )? op content* cp
<within> = #'([^\\(\\)])*'
<vectorW> = #'([^\\[\\]\\s])*'"))
(def flatmapconj (partial merge-with (comp flatten conj)))
(def let-transform
(merge
{:type (mapify :type)
:name (mapify :name)
; :body vector
}
{:s list
:string str
:number makeInt
:keyword (comp keyword str)
:symbol (comp symbol str)
:map (comp (p into {}) vector)
:kv vector
:vector vector
:set (p into #{})}
{:keep (comp (mapify :keep) flatten filter-ke-tree)
:exclude (comp (fn [s] {:exclude s}) flatten vector)
:arg-exclude (p merge-with (comp flatten conj))
:defn-ke (comp (mapify :keep) filter-ke-tree (p merge-with conj))}
{:ke-pair (partial merge-with conj)
:bindings (partial merge-with (comp flatten conj))
:let (comp (mapify :keep) filter-ke-tree (partial merge-with (comp flatten conj)))}))
(def tops-fn
(fn [s] (let [r (ip let-extractor2 s)]
(if (insta/get-failure r)
s
(insta/transform let-transform r)))))
(def file-fn (comp (p filter #(= :def (first %))) vector))
(def comp-transform
{:file vector
:s str
:within str
:ws nil
:string str
:comment str
:tops (comp (p apply str) (p take 20))
})
#_(def f (->> this-file
slurp))
;; (print f)
#_((insta/parser let-extractor2) f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment