Last active
July 24, 2018 09:37
-
-
Save lispyclouds/a18c19fe88cf34372329e88ad3285451 to your computer and use it in GitHub Desktop.
Clojure function to tokenize shell commands.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;; Clojure translation of https://sourceforge.net/p/drjava/git_repo/ci/master/tree/drjava/src/edu/rice/cs/util/ArgumentTokenizer.java | |
;; Input: "sh -c \"while sleep 1; do echo \\\"${RANDOM}\\\"; done\"" | |
;; Output: ["sh", "-c", "while sleep 1; do echo \"${RANDOM}\"; done"] | |
(defn shell-tokenize! | |
[^String command] | |
(let [[escaped? | |
current-arg | |
args | |
state] (loop [cmd command | |
escaped? false | |
state :no-token | |
current-arg "" | |
args []] | |
(if (or (nil? cmd) | |
(zero? (count cmd))) | |
[escaped? current-arg args state] | |
(let [char ^Character (first cmd)] | |
(if escaped? | |
(recur (rest cmd) false state (str current-arg char) args) | |
(case state | |
:single-quote (if (= char \') | |
(recur (rest cmd) escaped? :normal current-arg args) | |
(recur (rest cmd) escaped? state (str current-arg char) args)) | |
:double-quote (case char | |
\" (recur cmd escaped? :normal current-arg args) | |
\\ (let [next (second cmd)] | |
(if (or (= next \") | |
(= next \\)) | |
(recur (drop 2 cmd) escaped? state (str current-arg next) args) | |
(recur (drop 2 cmd) escaped? state (str current-arg char next) args))) | |
(recur (rest cmd) escaped? state (str current-arg char) args)) | |
(:no-token :normal) (case char | |
\\ (recur (rest cmd) true :normal current-arg args) | |
\' (recur (rest cmd) escaped? :single-quote current-arg args) | |
\" (recur (rest cmd) escaped? :double-quote current-arg args) | |
(if-not (Character/isWhitespace char) | |
(recur (rest cmd) escaped? :normal (str current-arg char) args) | |
(if (= state :normal) | |
(recur (rest cmd) escaped? :no-token "" (conj args current-arg)) | |
(recur (rest cmd) escaped? state current-arg args)))) | |
(throw (IllegalStateException. | |
(format "Invalid shell command: %s, unexpected token %s found." command state))))))))] | |
(if escaped? | |
(conj args (str current-arg \\)) | |
(if (not= state :no-token) | |
(conj args current-arg) | |
args)))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment