Created
April 15, 2012 16:04
-
-
Save thelema/2393587 to your computer and use it in GitHub Desktop.
nsplitp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* String.nsplitp - split by a predicate on bytes *) | |
let nsplitp str pred = | |
if str = "" then [] | |
else | |
(* str is non empty *) | |
let rec aux acc ofs idx = | |
(* ofs is current position to check, idx is beginning of last token *) | |
if ofs >= 0 then ( | |
if pred str.[ofs] then (* sep found *) | |
let token = sub str (ofs + 1) (idx - ofs) in | |
aux (token::acc) (ofs - 1) (ofs - 1) | |
else (* sep NOT found at position ofs*) | |
aux acc (ofs - 1) idx | |
) | |
else (* we've run out of string to search, we're done *) | |
sub str 0 (idx+1) :: acc | |
in | |
aux [] (length str - 1 ) (length str - 1) | |
(*$= nsplitp & ~printer:(IO.to_string (List.print String.print)) | |
["a"; "b"; "c"] (nsplitp "a b c" Char.is_whitespace) | |
*) | |
(* Text.nsplitp - split by a predicate on unicode characters *) | |
let nsplitp str pred = | |
if is_empty str then [] | |
else | |
(* str is non empty *) | |
let rec aux acc ofs = | |
match try Some (rindexp_from str ofs pred) | |
with Invalid_rope -> None | |
with | |
| Some idx -> (* found the delimiter *) | |
(* trailing delimiter or multiple seperator characters in a row *) | |
if idx = ofs || idx = ofs - 1 then aux acc idx else | |
let token = sub str (idx + 1) (ofs - idx) in | |
aux (token::acc) (idx - 1) | |
| None -> (* pred sep NOT found *) | |
sub str 0 (ofs + 1) :: acc | |
in | |
aux [] (length str - 1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment