Last active
November 4, 2015 20:59
-
-
Save pqwy/9ccbe7eb7f90a03ce52d to your computer and use it in GitHub Desktop.
dumper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let encodev ?(encoding=`UTF_8) xs = | |
let b = Buffer.create 16 in | |
let e = Uutf.encoder encoding (`Buffer b) in | |
List.iter (fun x -> Uutf.encode e (`Uchar x) |> ignore) xs; | |
Uutf.encode e `End |> ignore; | |
Buffer.contents b | |
module S = struct | |
open Scanf | |
let rec bscans sc = | |
try bscanf sc "0x%x %r" bscans (fun i is -> i::is) | |
with _ -> | |
try bscanf sc "U+%x %r" bscans (fun i is -> i::is) | |
with _ -> [] | |
let scans s = sscanf s "%r" bscans (fun is -> is) | |
end | |
module P = struct | |
let list ?(brackets=("","")) ?(sep=" ") p oc xs = | |
let rec go = function | |
| [] -> () | |
| [x] -> p oc x | |
| x::xs -> p oc x ; print_string sep ; go xs in | |
let (l, r) = brackets in | |
output_string oc l; go xs; output_string oc r | |
let u oc = Printf.fprintf oc "U+%04X" | |
end | |
module Sequence_io = struct | |
let rec read_lines ic f = | |
match input_line ic with | |
| line -> f line; read_lines ic f | |
| exception End_of_file -> () | |
| exception Unix.Unix_error (Unix.EINTR, _, _) -> read_lines ic f | |
end | |
let xf ic oc = | |
let open Sequence in | |
Sequence_io.read_lines ic @@ fun line -> | |
let us = S.scans line in | |
Printf.fprintf oc "* %a: |%s|\n%!" | |
P.(list ~brackets:("[", "]") ~sep:", " u) us (encodev us) | |
let () = xf stdin stdout |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
U+1F600 | |
U+1F602 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
U+2028 | |
U+2029 | |
U+000A | |
U+000D | |
U+0085 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment