Skip to content

Instantly share code, notes, and snippets.

@mjambon
Last active January 28, 2021 22:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mjambon/4a22bcee7af16076587da2dd9b620163 to your computer and use it in GitHub Desktop.
Save mjambon/4a22bcee7af16076587da2dd9b620163 to your computer and use it in GitHub Desktop.
What a string library for OCaml might look like
(*
Extra string operations for OCaml
Conventions:
- all operations are failsafe: no exceptions are raised
- negative index i is equivalent to positive index (length - i)
- from/until indicate inclusive ranges like in 'for' loops
(for i = from to until do ... done)
- does not replicate or override the standard String module
Limitations:
- does not handle regexps
- does not handle multibyte characters
Other properties:
- very low maintenance
*)
val of_list : char list -> string
val to_list : string -> char list
val of_array : char array -> string
val to_array : string -> char array
val sub : ?pos:int -> ?len:int -> string -> string
val range : ?from:int -> ?until:int -> string -> string
(* left-to-right search without overlapping matches *)
val search : sub:string -> ?from:int -> ?until:int -> string -> int list
(* search for occurrences of a substring and return the non-matching fragments *)
val split : sub:string -> string -> string list
(* search for occurrences of a substring and return matching and non-matching
fragments, indicated by the boolean flag. *)
val search_map : sub:string -> (string * bool) list
val matches : sub:string -> at:[ `Start | `End | `Pos of int ] -> string -> bool
(* slightly faster but cumbersome equivalents of the above *)
val sub_pos_len : pos:int -> len:int -> string -> string
val range_from_until : from:int -> until:int -> string -> string
val search_sub : sub:string -> string -> int list
val search_from_until : sub:string -> from:int -> until:int -> string -> int list
val matches_at : sub:string -> at:int -> string -> bool
(* line processing. Recognizes CRLF, LF alone, and the end of the string as line
terminators. "a" is one line, "a\n" is two lines, "a\r\nb" is two lines. *)
val line_stream : ?include_terminator:bool -> string -> string Stream.t
val line_list : ?include_terminator:bool -> string -> string list
val line_array : ?include_terminator:bool -> string -> string array
val line_iter : ?include_terminator:bool -> string -> (int -> string -> unit) -> unit
val line_iteri : ?include_terminator:bool -> string -> (int -> string -> unit) -> unit
(* next/previous line start after/before the specified position,
always a valid position within the string. *)
val next_line_start : string -> int -> int option
val prev_line_start : string -> int -> int option
(* next/previous line end after/before the specified position.
It is the position right after the last character of the line,
which is either part of a line terminator or the first position
outside of the string. *)
val next_line_end : string -> int -> int option
val prev_line_end : string -> int -> int option
(* custom iterators and mappers *)
val iter : ?from:int -> ?until:int -> (char -> unit) -> unit
val iteri : ?from:int -> ?until:int -> (int -> char -> unit) -> unit
(* iterate over a string, writing to an extensible buffer *)
val iterb : ?from:int -> ?until:int -> (Buffer.t -> int -> char -> unit) -> string
(* functional-style mappers *)
val map : ?from:int -> ?until:int -> (char -> char) -> string
val mapi : ?from:int -> ?until:int -> (int -> char -> char) -> string
val map_str : ?from:int -> ?until:int -> (char -> string) -> string
val mapi_str : ?from:int -> ?until:int -> (int -> char -> string) -> string
val filter : ?from:int -> ?until:int -> (char -> bool) -> string
val filteri : ?from:int -> ?until:int -> (int -> char -> bool) -> string
val filter_map : ?from:int -> ?until:int -> (char -> char option) -> string
val filter_mapi : ?from:int -> ?until:int -> (int -> char -> char option) -> string
(* familiar signatures found in the List module, with extra from/until options *)
val fold_left : ?from:int -> ?until:int -> ('a -> char -> 'a) -> 'a -> string -> 'a
val fold_lefti : ?from:int -> ?until:int -> ('a -> int -> char -> 'a) -> 'a -> string -> 'a
val fold_right : ?from:int -> ?until:int -> (char -> 'a -> 'a) -> string -> 'a -> 'a
val fold_righti : ?from:int -> ?until:int -> (int -> char -> 'a -> 'a) -> string -> 'a -> 'a
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment