Skip to content

Instantly share code, notes, and snippets.

@thelema
Created April 15, 2012 16:04
Show Gist options
  • Save thelema/2393587 to your computer and use it in GitHub Desktop.
Save thelema/2393587 to your computer and use it in GitHub Desktop.
nsplitp
(* String.nsplitp - split by a predicate on bytes *)
let nsplitp str pred =
if str = "" then []
else
(* str is non empty *)
let rec aux acc ofs idx =
(* ofs is current position to check, idx is beginning of last token *)
if ofs >= 0 then (
if pred str.[ofs] then (* sep found *)
let token = sub str (ofs + 1) (idx - ofs) in
aux (token::acc) (ofs - 1) (ofs - 1)
else (* sep NOT found at position ofs*)
aux acc (ofs - 1) idx
)
else (* we've run out of string to search, we're done *)
sub str 0 (idx+1) :: acc
in
aux [] (length str - 1 ) (length str - 1)
(*$= nsplitp & ~printer:(IO.to_string (List.print String.print))
["a"; "b"; "c"] (nsplitp "a b c" Char.is_whitespace)
*)
(* Text.nsplitp - split by a predicate on unicode characters *)
let nsplitp str pred =
if is_empty str then []
else
(* str is non empty *)
let rec aux acc ofs =
match try Some (rindexp_from str ofs pred)
with Invalid_rope -> None
with
| Some idx -> (* found the delimiter *)
(* trailing delimiter or multiple seperator characters in a row *)
if idx = ofs || idx = ofs - 1 then aux acc idx else
let token = sub str (idx + 1) (ofs - idx) in
aux (token::acc) (idx - 1)
| None -> (* pred sep NOT found *)
sub str 0 (ofs + 1) :: acc
in
aux [] (length str - 1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment