Skip to content

Instantly share code, notes, and snippets.

@idkjs
Last active June 24, 2021 11:12
Show Gist options
  • Save idkjs/4940a4b110d0ca7598228e523d29e976 to your computer and use it in GitHub Desktop.
Save idkjs/4940a4b110d0ca7598228e523d29e976 to your computer and use it in GitHub Desktop.
Stdlib.Str.re
/**************************************************************************/
/* */
/* OCaml */
/* */
/* Xavier Leroy, projet Cristal, INRIA Rocquencourt */
/* */
/* Copyright 1996 Institut National de Recherche en Informatique et */
/* en Automatique. */
/* */
/* All rights reserved. This file is distributed under the terms of */
/* the GNU Lesser General Public License version 2.1, with the */
/* special exception on linking described in the file LICENSE. */
/* */
/**************************************************************************/
/* In this module, [@ocaml.warning "-3"] is used in several places
that use deprecated functions to preserve legacy behavior.
It overrides -w @3 given on the command line. */
/** String utilities */;
let string_before = (s, n) => String.sub(s, 0, n);
let string_after = (s, n) => String.sub(s, n, String.length(s) - n);
let first_chars = (s, n) => String.sub(s, 0, n);
let last_chars = (s, n) => String.sub(s, String.length(s) - n, n);
/** Representation of character sets **/;
module Charset = {
type t = bytes; /* of length 32 */
/*let empty = Bytes.make 32 '\000'*/
let full = Bytes.make(32, '\255');
let make_empty = () => Bytes.make(32, '\000');
let add = (s, c) => {
let i = Char.code(c);
Bytes.set(
s,
i lsr 3,
Char.chr(Char.code(Bytes.get(s, i lsr 3)) lor 1 lsl (i land 7)),
);
};
let add_range = (s, c1, c2) =>
for (i in Char.code(c1) to Char.code(c2)) {
add(s, Char.chr(i));
};
let singleton = c => {
let s = make_empty();
add(s, c);
s;
};
/*let range c1 c2 =
let s = make_empty () in add_range s c1 c2; s
*/
let complement = s => {
let r = Bytes.create(32);
for (i in 0 to 31) {
Bytes.set(r, i, Char.chr(Char.code(Bytes.get(s, i)) lxor 0xFF));
};
r;
};
let union = (s1, s2) => {
let r = Bytes.create(32);
for (i in 0 to 31) {
Bytes.set(
r,
i,
Char.chr(
Char.code(Bytes.get(s1, i)) lor Char.code(Bytes.get(s2, i)),
),
);
};
r;
};
let disjoint = (s1, s2) =>
try (
{
for (i in 0 to 31) {
if (Char.code(Bytes.get(s1, i))
land Char.code(Bytes.get(s2, i)) != 0) {
raise(Exit);
};
};
true;
}
) {
| Exit => false
};
let iter = (fn, s) =>
for (i in 0 to 31) {
let c = Char.code(Bytes.get(s, i));
if (c != 0) {
for (j in 0 to 7) {
if (c land 1 lsl j != 0) {
fn(Char.chr(i lsl 3 + j));
};
};
};
};
let expand = s => {
let r = Bytes.make(256, '\000');
iter(c => Bytes.set(r, Char.code(c), '\001'), s);
r;
};
let fold_case = s =>
[@ocaml.warning "-3"]
{
let r = make_empty();
iter(
c => {
add(r, Char.lowercase(c));
add(r, Char.uppercase(c));
},
s,
);
r;
};
};
/** Abstract syntax tree for regular expressions */;
type re_syntax =
| Char(char)
| String(string)
| CharClass(Charset.t, bool) /* true = complemented, false = normal */
| Seq(list(re_syntax))
| Alt(re_syntax, re_syntax)
| Star(re_syntax)
| Plus(re_syntax)
| Option(re_syntax)
| Group(int, re_syntax)
| Refgroup(int)
| Bol
| Eol
| Wordboundary;
/** Representation of compiled regular expressions */;
[@warning "-unused-field"]
type regexp = {
prog: array(int), /* bytecode instructions */
cpool: array(string), /* constant pool (string literals) */
normtable: string, /* case folding table (if any) */
numgroups: int, /* number of \(...\) groups */
numregisters: int, /* number of nullable Star or Plus */
startchars: int /* index of set of starting chars, or -1 if none */
};
/** Opcodes for bytecode instructions; see strstubs.c for description */;
let op_CHAR = 0;
let op_CHARNORM = 1;
let op_STRING = 2;
let op_STRINGNORM = 3;
let op_CHARCLASS = 4;
let op_BOL = 5;
let op_EOL = 6;
let op_WORDBOUNDARY = 7;
let op_BEGGROUP = 8;
let op_ENDGROUP = 9;
let op_REFGROUP = 10;
let op_ACCEPT = 11;
let op_SIMPLEOPT = 12;
let op_SIMPLESTAR = 13;
let op_SIMPLEPLUS = 14;
let op_GOTO = 15;
let op_PUSHBACK = 16;
let op_SETMARK = 17;
let op_CHECKPROGRESS = 18;
/* Encoding of bytecode instructions */
let instr = (opc, arg) => opc lor arg lsl 8;
/* Computing relative displacements for GOTO and PUSHBACK instructions */
let displ = (dest, from) => dest - from - 1;
/** Compilation of a regular expression */;
/* Determine if a regexp can match the empty string */
let rec is_nullable =
fun
| Char(_) => false
| String(s) => s == ""
| CharClass(_) => false
| Seq(rl) => List.for_all(is_nullable, rl)
| Alt(r1, r2) => is_nullable(r1) || is_nullable(r2)
| Star(_) => true
| Plus(r) => is_nullable(r)
| Option(_) => true
| Group(_, r) => is_nullable(r)
| Refgroup(_) => true
| Bol => true
| Eol => true
| Wordboundary => true;
/* first r returns a set of characters C such that:
for all string s, s matches r => the first character of s is in C.
For convenience, return Charset.full if r is nullable. */
let rec first =
fun
| Char(c) => Charset.singleton(c)
| String(s) =>
if (s == "") {
Charset.full;
} else {
Charset.singleton(s.[0]);
}
| CharClass(cl, cmpl) =>
if (cmpl) {
Charset.complement(cl);
} else {
cl;
}
| Seq(rl) => first_seq(rl)
| Alt(r1, r2) => Charset.union(first(r1), first(r2))
| Star(_) => Charset.full
| Plus(r) => first(r)
| Option(_) => Charset.full
| Group(_, r) => first(r)
| Refgroup(_) => Charset.full
| Bol => Charset.full
| Eol => Charset.full
| Wordboundary => Charset.full
and first_seq =
fun
| [] => Charset.full
| [Bol | Eol | Wordboundary, ...rl] => first_seq(rl)
| [Star(r), ...rl] => Charset.union(first(r), first_seq(rl))
| [Option(r), ...rl] => Charset.union(first(r), first_seq(rl))
| [r, ..._] => first(r);
/* Transform a Char or CharClass regexp into a character class */
let charclass_of_regexp = (fold_case, re) => {
let (cl1, compl) =
switch (re) {
| Char(c) => (Charset.singleton(c), false)
| CharClass(cl, compl) => (cl, compl)
| _ => assert(false)
};
let cl2 =
if (fold_case) {
Charset.fold_case(cl1);
} else {
cl1;
};
Bytes.to_string(
if (compl) {
Charset.complement(cl2);
} else {
cl2;
},
);
};
/* The case fold table: maps characters to their lowercase equivalent */
let fold_case_table =
[@ocaml.warning "-3"]
{
let t = Bytes.create(256);
for (i in 0 to 255) {
Bytes.set(t, i, Char.lowercase(Char.chr(i)));
};
Bytes.to_string(t);
};
module StringMap =
Map.Make({
type t = string;
let compare = (x: t, y) => compare(x, y);
});
/* Compilation of a regular expression */
let compile = (fold_case, re) => {
/* Instruction buffering */
let prog = ref(Array.make(32, 0))
and progpos = ref(0)
and cpool = ref(StringMap.empty)
and cpoolpos = ref(0)
and numgroups = ref(1)
and numregs = ref(0);
/* Add a new instruction */
let emit_instr = (opc, arg) => {
if (progpos^ >= Array.length(prog^)) {
let newlen = ref(Array.length(prog^));
while (progpos^ >= newlen^) {
newlen := newlen^ * 2;
};
let nprog = Array.make(newlen^, 0);
Array.blit(prog^, 0, nprog, 0, Array.length(prog^));
prog := nprog;
};
prog^[progpos^] = instr(opc, arg);
incr(progpos);
};
/* Reserve an instruction slot and return its position */
let emit_hole = () => {
let p = progpos^;
emit_instr(op_CHAR, 0);
p;
};
/* Fill a reserved instruction slot with a GOTO or PUSHBACK instruction */
let patch_instr = (pos, opc, dest) =>
prog^[pos] = instr(opc, displ(dest, pos));
/* Return the cpool index for the given string, adding it if not
already there */
let cpool_index = s =>
try (StringMap.find(s, cpool^)) {
| Not_found =>
let p = cpoolpos^;
cpool := StringMap.add(s, p, cpool^);
incr(cpoolpos);
p;
};
/* Allocate fresh register if regexp is nullable */
let allocate_register_if_nullable = r =>
if (is_nullable(r)) {
let n = numregs^;
if (n >= 64) {
failwith("too many r* or r+ where r is nullable");
};
incr(numregs);
n;
} else {
(-1);
};
/* Main recursive compilation function */
let rec emit_code =
fun
| Char(c) =>
if (fold_case) {
[@ocaml.warning "-3"]
emit_instr(op_CHARNORM, Char.code(Char.lowercase(c)));
} else {
emit_instr(op_CHAR, Char.code(c));
}
| String(s) =>
switch (String.length(s)) {
| 0 => ()
| 1 =>
if (fold_case) {
[@ocaml.warning "-3"]
emit_instr(op_CHARNORM, Char.code(Char.lowercase(s.[0])));
} else {
emit_instr(op_CHAR, Char.code(s.[0]));
}
| _ =>
/* null characters are not accepted by the STRING* instructions;
if one is found, split string at null character */
try (
{
let i = String.index(s, '\000');
emit_code(String(string_before(s, i)));
emit_instr(op_CHAR, 0);
emit_code(String(string_after(s, i + 1)));
}
) {
| Not_found =>
if (fold_case) {
[@ocaml.warning "-3"]
emit_instr(op_STRINGNORM, cpool_index(String.lowercase(s)));
} else {
emit_instr(op_STRING, cpool_index(s));
}
}
}
| CharClass(cl, compl) => {
let cl1 =
if (fold_case) {
Charset.fold_case(cl);
} else {
cl;
};
let cl2 =
if (compl) {
Charset.complement(cl1);
} else {
cl1;
};
emit_instr(op_CHARCLASS, cpool_index(Bytes.to_string(cl2)));
}
| Seq(rl) => emit_seq_code(rl)
| Alt(r1, r2) => {
/* PUSHBACK lbl1
<match r1>
GOTO lbl2
lbl1: <match r2>
lbl2: ... */
let pos_pushback = emit_hole();
emit_code(r1);
let pos_goto_end = emit_hole();
let lbl1 = progpos^;
emit_code(r2);
let lbl2 = progpos^;
patch_instr(pos_pushback, op_PUSHBACK, lbl1);
patch_instr(pos_goto_end, op_GOTO, lbl2);
}
| Star(r) => {
/* Implement longest match semantics for compatibility with old Str */
/* General translation:
lbl1: PUSHBACK lbl2
SETMARK regno
<match r>
CHECKPROGRESS regno
GOTO lbl1
lbl2:
If r cannot match the empty string, code can be simplified:
lbl1: PUSHBACK lbl2
<match r>
GOTO lbl1
lbl2:
*/
let regno = allocate_register_if_nullable(r);
let lbl1 = emit_hole();
if (regno >= 0) {
emit_instr(op_SETMARK, regno);
};
emit_code(r);
if (regno >= 0) {
emit_instr(op_CHECKPROGRESS, regno);
};
emit_instr(op_GOTO, displ(lbl1, progpos^));
let lbl2 = progpos^;
patch_instr(lbl1, op_PUSHBACK, lbl2);
}
| Plus(r) => {
/* Implement longest match semantics for compatibility with old Str */
/* General translation:
lbl1: <match r>
CHECKPROGRESS regno
PUSHBACK lbl2
SETMARK regno
GOTO lbl1
lbl2:
If r cannot match the empty string, code can be simplified:
lbl1: <match r>
PUSHBACK lbl2
GOTO_PLUS lbl1
lbl2:
*/
let regno = allocate_register_if_nullable(r);
let lbl1 = progpos^;
emit_code(r);
if (regno >= 0) {
emit_instr(op_CHECKPROGRESS, regno);
};
let pos_pushback = emit_hole();
if (regno >= 0) {
emit_instr(op_SETMARK, regno);
};
emit_instr(op_GOTO, displ(lbl1, progpos^));
let lbl2 = progpos^;
patch_instr(pos_pushback, op_PUSHBACK, lbl2);
}
| Option(r) => {
/* Implement longest match semantics for compatibility with old Str */
/* PUSHBACK lbl
<match r>
lbl:
*/
let pos_pushback = emit_hole();
emit_code(r);
let lbl = progpos^;
patch_instr(pos_pushback, op_PUSHBACK, lbl);
}
| Group(n, r) => {
emit_instr(op_BEGGROUP, n);
emit_code(r);
emit_instr(op_ENDGROUP, n);
numgroups := (numgroups^ + n + 1);
}
| Refgroup(n) => {
emit_instr(op_REFGROUP, n);
numgroups := (numgroups^ + n + 1);
}
| Bol => emit_instr(op_BOL, 0)
| Eol => emit_instr(op_EOL, 0)
| Wordboundary => emit_instr(op_WORDBOUNDARY, 0)
and emit_seq_code =
fun
| [] => ()
| [Star((Char(_) | CharClass(_)) as r), ...rl]
when disjoint_modulo_case(first(r), first_seq(rl)) => {
emit_instr(
op_SIMPLESTAR,
cpool_index(charclass_of_regexp(fold_case, r)),
);
emit_seq_code(rl);
}
| [Plus((Char(_) | CharClass(_)) as r), ...rl]
when disjoint_modulo_case(first(r), first_seq(rl)) => {
emit_instr(
op_SIMPLEPLUS,
cpool_index(charclass_of_regexp(fold_case, r)),
);
emit_seq_code(rl);
}
| [Option((Char(_) | CharClass(_)) as r), ...rl]
when disjoint_modulo_case(first(r), first_seq(rl)) => {
emit_instr(
op_SIMPLEOPT,
cpool_index(charclass_of_regexp(fold_case, r)),
);
emit_seq_code(rl);
}
| [r, ...rl] => {
emit_code(r);
emit_seq_code(rl);
}
and disjoint_modulo_case = (c1, c2) =>
if (fold_case) {
Charset.disjoint(Charset.fold_case(c1), Charset.fold_case(c2));
} else {
Charset.disjoint(c1, c2);
};
emit_code(re);
emit_instr(op_ACCEPT, 0);
let start = first(re);
let start' =
if (fold_case) {
Charset.fold_case(start);
} else {
start;
};
let start_pos =
if (start == Charset.full) {
(-1);
} else {
cpool_index(Bytes.to_string(Charset.expand(start')));
};
let constantpool = Array.make(cpoolpos^, "");
StringMap.iter((str, idx) => constantpool[idx] = str, cpool^);
{
prog: Array.sub(prog^, 0, progpos^),
cpool: constantpool,
normtable: if (fold_case) {fold_case_table} else {""},
numgroups: numgroups^,
numregisters: numregs^,
startchars: start_pos,
};
};
/** Parsing of a regular expression */;
/* Efficient buffering of sequences */
module SeqBuffer = {
type t = {
sb_chars: Buffer.t,
mutable sb_next: list(re_syntax),
};
let create = () => {sb_chars: Buffer.create(16), sb_next: []};
let flush = buf => {
let s = Buffer.contents(buf.sb_chars);
Buffer.clear(buf.sb_chars);
switch (String.length(s)) {
| 0 => ()
| 1 => buf.sb_next = [Char(s.[0]), ...buf.sb_next]
| _ => buf.sb_next = [String(s), ...buf.sb_next]
};
};
let add = (buf, re) =>
switch (re) {
| Char(c) => Buffer.add_char(buf.sb_chars, c)
| _ =>
flush(buf);
buf.sb_next = [re, ...buf.sb_next];
};
let extract = buf => {
flush(buf);
Seq(List.rev(buf.sb_next));
};
};
/* The character class corresponding to `.' */
let dotclass = Charset.complement(Charset.singleton('\n'));
/* Parse a regular expression */
let parse = s => {
let len = String.length(s);
let group_counter = ref(1);
let rec regexp0 = i => {
let (r, j) = regexp1(i);
regexp0cont(r, j);
}
and regexp0cont = (r1, i) =>
if (i + 2 <= len && s.[i] == '\\' && s.[i + 1] == '|') {
let (r2, j) = regexp1(i + 2);
regexp0cont( Alt(r1, r2), j);
} else {
(r1, i);
}
and regexp1 = i => regexp1cont(SeqBuffer.create(), i)
and regexp1cont = (sb, i) =>
if (i >= len
|| i
+ 2 <= len
&& s.[i] == '\\'
&& {
let c = s.[i + 1];
c == '|' || c == ')';
}) {
(SeqBuffer.extract(sb), i);
} else {
let (r, j) = regexp2(i);
SeqBuffer.add(sb, r);
regexp1cont(sb, j);
}
and regexp2 = i => {
let (r, j) = regexp3(i);
regexp2cont(r, j);
}
and regexp2cont = (r, i) =>
if (i >= len) {
(r, i);
} else {
switch (s.[i]) {
| '?' => regexp2cont(Option(r), i + 1)
| '*' => regexp2cont(Star(r), i + 1)
| '+' => regexp2cont(Plus(r), i + 1)
| _ => (r, i)
};
}
and regexp3 = i =>
switch (s.[i]) {
| '\\' => regexpbackslash(i + 1)
| '[' =>
let (c, compl, j) = regexpclass0(i + 1);
( CharClass(c, compl), j);
| '^' => (Bol, i + 1)
| '$' => (Eol, i + 1)
| '.' => ( CharClass(dotclass, false), i + 1)
| c => (Char(c), i + 1)
}
and regexpbackslash = i =>
if (i >= len) {
(Char('\\'), i);
} else {
switch (s.[i]) {
| '|'
| ')' => assert(false)
| '(' =>
let group_no = group_counter^;
incr(group_counter);
let (r, j) = regexp0(i + 1);
if (j + 1 < len && s.[j] == '\\' && s.[j + 1] == ')') {
( Group(group_no, r), j + 2);
} else {
failwith("\\( group not closed by \\)");
};
| '1'..'9' as c => (Refgroup(Char.code(c) - 48), i + 1)
| 'b' => (Wordboundary, i + 1)
| c => (Char(c), i + 1)
};
}
and regexpclass0 = i =>
if (i < len && s.[i] == '^') {
let (c, j) = regexpclass1(i + 1);
(c, true, j);
} else {
let (c, j) = regexpclass1(i);
(c, false, j);
}
and regexpclass1 = i => {
let c = Charset.make_empty();
let j = regexpclass2(c, i, i);
(c, j);
}
and regexpclass2 = (c, start, i) => {
if (i >= len) {
failwith("[ class not closed by ]");
};
if (s.[i] == ']' && i > start) {
i + 1;
} else {
let c1 = s.[i];
if (i + 2 < len && s.[i + 1] == '-' && s.[i + 2] != ']') {
let c2 = s.[i + 2];
Charset.add_range(c, c1, c2);
regexpclass2(c, start, i + 3);
} else {
Charset.add(c, c1);
regexpclass2(c, start, i + 1);
};
};
};
let (r, j) = regexp0(0);
if (j == len) {
r;
} else {
failwith("spurious \\) in regular expression");
};
};
/** Parsing and compilation */;
let regexp = e => compile(false, parse(e));
let regexp_case_fold = e => compile(true, parse(e));
let quote = s => {
let len = String.length(s);
let buf = Bytes.create(2 * len);
let pos = ref(0);
for (i in 0 to len - 1) {
switch (s.[i]) {
| ('[' | ']' | '*' | '.' | '\\' | '?' | '+' | '^' | '$') as c =>
Bytes.set(buf, pos^, '\\');
Bytes.set(buf, pos^ + 1, c);
pos := pos^ + 2;
| c =>
Bytes.set(buf, pos^, c);
pos := pos^ + 1;
};
};
Bytes.sub_string(buf, 0, pos^);
};
let regexp_string = s => compile(false, String(s));
let regexp_string_case_fold = s => compile(true, String(s));
/** Matching functions **/;
external re_string_match: (regexp, string, int) => array(int) =
"re_string_match";
external re_partial_match: (regexp, string, int) => array(int) =
"re_partial_match";
external re_search_forward: (regexp, string, int) => array(int) =
"re_search_forward";
external re_search_backward: (regexp, string, int) => array(int) =
"re_search_backward";
let last_search_result = ref([||]);
let string_match = (re, s, pos) => {
let res = re_string_match(re, s, pos);
last_search_result := res;
Array.length(res) > 0;
};
let string_partial_match = (re, s, pos) => {
let res = re_partial_match(re, s, pos);
last_search_result := res;
Array.length(res) > 0;
};
let search_forward = (re, s, pos) => {
let res = re_search_forward(re, s, pos);
last_search_result := res;
if (Array.length(res) == 0) {
raise(Not_found);
} else {
res[0];
};
};
let search_backward = (re, s, pos) => {
let res = re_search_backward(re, s, pos);
last_search_result := res;
if (Array.length(res) == 0) {
raise(Not_found);
} else {
res[0];
};
};
let group_beginning = n => {
let n2 = n + n;
if (n < 0 || n2 >= Array.length(last_search_result^)) {
invalid_arg("Str.group_beginning");
} else {
let pos = last_search_result^[n2];
if (pos == (-1)) {
raise(Not_found);
} else {
pos;
};
};
};
let group_end = n => {
let n2 = n + n;
if (n < 0 || n2 >= Array.length(last_search_result^)) {
invalid_arg("Str.group_end");
} else {
let pos = last_search_result^[n2 + 1];
if (pos == (-1)) {
raise(Not_found);
} else {
pos;
};
};
};
let matched_group = (n, txt) => {
let n2 = n + n;
if (n < 0 || n2 >= Array.length(last_search_result^)) {
invalid_arg("Str.matched_group");
} else {
let b = last_search_result^[n2]
and e = last_search_result^[n2 + 1];
if (b == (-1)) {
raise(Not_found);
} else {
String.sub(txt, b, e - b);
};
};
};
let match_beginning = () => group_beginning(0)
and match_end = () => group_end(0)
and matched_string = txt => matched_group(0, txt);
/** Replacement **/;
external re_replacement_text: (string, array(int), string) => string =
"re_replacement_text";
let replace_matched = (repl, matched) =>
re_replacement_text(repl, last_search_result^, matched);
let substitute_first = (expr, repl_fun, text) =>
try (
{
let pos = search_forward(expr, text, 0);
String.concat(
"",
[
string_before(text, pos),
repl_fun(text),
string_after(text, match_end()),
],
);
}
) {
| Not_found => text
};
let opt_search_forward = (re, s, pos) =>
try (Some(search_forward(re, s, pos))) {
| Not_found => None
};
let global_substitute = (expr, repl_fun, text) => {
let rec replace = (accu, start, last_was_empty) => {
let startpos =
if (last_was_empty) {
start + 1;
} else {
start;
};
if (startpos > String.length(text)) {
[string_after(text, start), ...accu];
} else {
switch (opt_search_forward(expr, text, startpos)) {
| None => [string_after(text, start), ...accu]
| Some(pos) =>
let end_pos = match_end();
let repl_text = repl_fun(text);
replace(
[repl_text, String.sub(text, start, pos - start), ...accu],
end_pos,
end_pos == pos,
);
};
};
};
String.concat("", List.rev(replace([], 0, false)));
};
let global_replace = (expr, repl, text) =>
global_substitute(expr, replace_matched(repl), text)
and replace_first = (expr, repl, text) =>
substitute_first(expr, replace_matched(repl), text);
/** Splitting */;
let opt_search_forward_progress = (expr, text, start) =>
switch (opt_search_forward(expr, text, start)) {
| None => None
| Some(pos) =>
if (match_end() > start) {
Some(pos);
} else if (start < String.length(text)) {
opt_search_forward(expr, text, start + 1);
} else {
None;
}
};
let bounded_split = (expr, text, num) => {
let start =
if (string_match(expr, text, 0)) {
match_end();
} else {
0;
};
let rec split = (accu, start, n) =>
if (start >= String.length(text)) {
accu;
} else if (n == 1) {
[string_after(text, start), ...accu];
} else {
switch (opt_search_forward_progress(expr, text, start)) {
| None => [string_after(text, start), ...accu]
| Some(pos) =>
split(
[String.sub(text, start, pos - start), ...accu],
match_end(),
n - 1,
)
};
};
List.rev(split([], start, num));
};
let split = (expr, text) => bounded_split(expr, text, 0);
let bounded_split_delim = (expr, text, num) => {
let rec split = (accu, start, n) =>
if (start > String.length(text)) {
accu;
} else if (n == 1) {
[string_after(text, start), ...accu];
} else {
switch (opt_search_forward_progress(expr, text, start)) {
| None => [string_after(text, start), ...accu]
| Some(pos) =>
split(
[String.sub(text, start, pos - start), ...accu],
match_end(),
n - 1,
)
};
};
if (text == "") {
[];
} else {
List.rev(split([], 0, num));
};
};
let split_delim = (expr, text) => bounded_split_delim(expr, text, 0);
type split_result =
| Text(string)
| Delim(string);
let bounded_full_split = (expr, text, num) => {
let rec split = (accu, start, n) =>
if (start >= String.length(text)) {
accu;
} else if (n == 1) {
[Text(string_after(text, start)), ...accu];
} else {
switch (opt_search_forward_progress(expr, text, start)) {
| None => [Text(string_after(text, start)), ...accu]
| Some(pos) =>
let s = matched_string(text);
if (pos > start) {
split(
[
Delim(s),
Text(String.sub(text, start, pos - start)),
...accu,
],
match_end(),
n - 1,
);
} else {
split([Delim(s), ...accu], match_end(), n - 1);
};
};
};
List.rev(split([], 0, num));
};
let full_split = (expr, text) => bounded_full_split(expr, text, 0);
@idkjs
Copy link
Author

idkjs commented Jun 24, 2021

https://sketch.sh/s/TiONgHfdUbkSit9jHeNMo0/
https://github.com/ocaml/ocaml/blob/trunk/otherlibs/str/str.ml

Int.max is not handled in bucklescript really. Its been changed by removing it and relying on the buckelscript version. File doesnt work, just here for reference.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment