Skip to content

Instantly share code, notes, and snippets.

Last active August 26, 2023 13:44
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Example of encoding Unicode graphemes as string and taking its length
(* Re: *)
(* Use this if using the REPL, otherwise use dune to build with the library dependency *)
#require "uutf";;
(* Converts an array of ints (Unicode graphemes) into a UTF-8 encoded string. *)
let utf8_to_string uchars =
let buf = Buffer.create (2 * Array.length uchars) in
Array.iter (fun uchar -> Uutf.Buffer.add_utf_8 buf (Uchar.of_int uchar)) uchars;
Buffer.contents buf
(* Calculates number of graphemes in a UTF-8 encoded string. *)
let utf8_len =
let width_folder len _ = function
| `Uchar _ -> succ len
| `Malformed s -> invalid_arg s
Uutf.String.fold_utf_8 width_folder 0
(* Now we can replicate the examples in the other languages: *)
let s = utf8_to_string [|0x25105; 0x26159; 0x33521; 0x22269; 0x20154|] (* "𥄅𦅙𳔡𢉩𠅔" *)
let len = utf8_len s (* 5 *)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment