Skip to content

Instantly share code, notes, and snippets.

@leque
Last active January 18, 2023 14:55
Show Gist options
  • Save leque/97f416db8282a9bc1ed2d40ddb3814c8 to your computer and use it in GitHub Desktop.
Save leque/97f416db8282a9bc1ed2d40ddb3814c8 to your computer and use it in GitHub Desktop.
string -> Uchar.t Seq.t
module type Reader = sig
type t
type stream
type elem
val make : stream -> t
val read : t -> (elem * t) option
end
module Option_syntax = struct
let (let*) = Option.bind
let (let+) o f = Option.map f o
let return = Option.some
let guard b = if b then Some () else None
end
module Uchar_reader : Reader with type stream = string and type elem = Uchar.t = struct
type t = string * int
type stream = string
type elem = Uchar.t
let make s = (s, 0)
let read (s, i) =
let open Option_syntax in
let* () = guard (i < String.length s) in
let dec = String.get_utf_8_uchar s i in
let+ () = guard (Uchar.utf_decode_is_valid dec) in
(Uchar.utf_decode_uchar dec, (s, i + Uchar.utf_decode_length dec))
end
let uchar_seq_of_utf_8_string s =
let reader = Uchar_reader.make s in
Seq.unfold Uchar_reader.read reader
(*
# uchar_seq_of_utf_8_string "abcね" |> List.of_seq |> List.map Uchar.to_int;;
- : int list = [97; 98; 99; 12397]
*)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment