Last active
May 24, 2024 20:53
-
-
Save Octachron/03e7a9088a5a0fe75d363fa9223b72e1 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let[@tail_mod_cons] rec split escape_char char start pos s = | |
if pos >= String.length s then | |
[String.sub s start (String.length s - start)] | |
else | |
let c = s.[pos] in | |
if c = char then | |
(String.sub s start (pos-start)) :: split escape_char char (pos+1) (pos+1) s | |
else | |
let pos = if c = escape_char then pos + 2 else pos + 1 in | |
split escape_char char start pos s | |
let split_path s = split '\\' ':' 0 0 s | |
module State_machine = struct | |
type cclass = ESC | SEP | ANY | |
type state = { | |
escaped:bool; | |
words: string list; | |
word_start:int; | |
pos:int; | |
} | |
let split_with_escape esc sep s = | |
(* label what kind of character we have *) | |
let classify c = | |
if c = esc then ESC | |
else if c = sep then SEP | |
else ANY | |
in | |
let extract st = String.sub s st.word_start (st.pos - st.word_start) in | |
let f st c = | |
match st.escaped, classify c with | |
| true, (ANY|SEP|ESC as c) -> { st with escaped = c = ESC; pos = st.pos + 1 } | |
| false, (ANY|ESC as c) -> { st with escaped = c = ESC; pos = st.pos + 1 } | |
| false, SEP -> | |
let pos = st.pos + 1 in | |
{ words = extract st :: st.words; word_start = pos; pos; escaped = false} | |
in | |
let state = String.fold_left f { escaped = false; words = []; word_start = 0; pos =0 } s in | |
let words = | |
if state.pos > state.word_start then extract state :: state.words else state.words | |
in | |
List.rev words | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment