Skip to content

Instantly share code, notes, and snippets.

@anuragsoni
Last active March 20, 2021 03:37
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save anuragsoni/c953982328043406b0a64166a74be290 to your computer and use it in GitHub Desktop.
Save anuragsoni/c953982328043406b0a64166a74be290 to your computer and use it in GitHub Desktop.
module Trie = struct
type t = { mutable count : int; children : t option Array.t }
let[@inline] empty () = { count = 0; children = Array.make 128 None }
let add buf pos len t =
let rec loop idx t =
if idx > pos + len - 1 then t.count <- t.count + 1
else
let c = Char.lowercase_ascii (Bytes.get buf idx) in
let t' =
match t.children.(Char.code c) with
| None ->
let t'' = empty () in
t.children.(Char.code c) <- Some t'';
t''
| Some c -> c
in
loop (idx + 1) t'
in
loop pos t
let to_assoc t =
let rec loop acc curr t =
let acc =
match (curr, t.count) with
| [], _ -> acc
| _, 0 -> acc
| curr, c -> (List.rev curr, c) :: acc
in
let r = ref acc in
Array.iteri
(fun idx t' ->
match t' with
| None -> ()
| Some t' -> r := loop !r (Char.chr idx :: curr) t')
t.children;
!r
in
loop [] [] t
end
let[@inline] is_whitespace c = c = ' ' || c = '\n'
let rec skip_whitespace countwords pad ~lim i =
if i >= lim then (read_more_and_white [@tailcall]) countwords pad i
else if is_whitespace @@ Bytes.unsafe_get pad i then
(skip_whitespace [@tailcall]) countwords pad ~lim (i + 1)
else (read_next_word [@tailcall]) countwords pad ~lim i i
and read_next_word countwords pad ~lim i j =
if j >= lim then (read_more_and_next [@tailcall]) countwords pad i j
else if is_whitespace @@ Bytes.unsafe_get pad j then (
Trie.add pad i (j - i) countwords;
(skip_whitespace [@tailcall]) countwords pad ~lim (j + 1))
else (read_next_word [@tailcall]) countwords pad ~lim i (j + 1)
and read_more_and_next countwords pad i j =
let l = j - i in
Bytes.blit pad i pad 0 l;
let read = input stdin pad l (Bytes.length pad - l) in
let lim = l + read in
if lim = 0 then () else (read_next_word [@tailcall]) countwords pad ~lim 0 l
and read_more_and_white countwords pad i =
let lim = input stdin pad 0 (Bytes.length pad) in
if lim = 0 then () else (skip_whitespace [@tailcall]) countwords pad ~lim 0
let[@inline] read_start countwords pad =
(read_more_and_next [@specialize]) countwords pad 0 0
let () =
let t = Trie.empty () in
let pad = Bytes.make (64 * 1024) ' ' in
read_start t pad;
let assoc = Array.of_list (Trie.to_assoc t) in
Array.sort (fun (_, a) (_, b) -> Int.compare b a) assoc;
Array.iter
(fun (word, count) ->
List.iter print_char word;
print_char ' ';
print_int count;
print_newline ())
assoc
(* Simpler IO doesn't make the timing worse *)
module Trie = struct
type t = { mutable count : int; children : t option Array.t }
let[@inline] empty () = { count = 0; children = Array.make 128 None }
let add s pos len t =
let rec loop idx t =
if idx > pos + len - 1 then t.count <- t.count + 1
else
let c = Char.lowercase_ascii (String.get s idx) in
let t' =
match t.children.(Char.code c) with
| None ->
let t'' = empty () in
t.children.(Char.code c) <- Some t'';
t''
| Some c -> c
in
loop (idx + 1) t'
in
loop pos t
let to_assoc t =
let rec loop acc curr t =
let acc =
match (curr, t.count) with
| [], _ -> acc
| _, 0 -> acc
| curr, c -> (List.rev curr, c) :: acc
in
let r = ref acc in
Array.iteri
(fun idx t' ->
match t' with
| None -> ()
| Some t' -> r := loop !r (Char.chr idx :: curr) t')
t.children;
!r
in
loop [] [] t
end
let read_input t =
try
let rec loop () =
let line = input_line stdin in
let start = ref 0 in
let len = ref 0 in
for i = 0 to String.length line - 1 do
if line.[i] <> ' ' then incr len
else (
Trie.add line !start !len t;
start := i + 1;
len := 0)
done;
Trie.add line !start !len t;
loop ()
in
loop ()
with End_of_file -> ()
let () =
let t = Trie.empty () in
read_input t;
let assoc = Array.of_list (Trie.to_assoc t) in
Array.sort (fun (_, a) (_, b) -> Int.compare b a) assoc;
Array.iter
(fun (word, count) ->
List.iter print_char word;
print_char ' ';
print_int count;
print_newline ())
assoc
module Buf = struct
type t = { buf : Bytes.t; mutable pos : int; mutable len : int }
let create size = { buf = Bytes.create size; pos = 0; len = 0 }
let compress t =
if t.len <= 0 || t.pos = t.len then (
t.pos <- 0;
t.len <- 0)
else if t.pos > 0 then (
(* Shifting the unconsumed bytes to the start of the buffer. We will always
have enough room to shift these bytes so it should be fine to use unsafe_blit here. *)
BytesLabels.unsafe_blit ~src:t.buf ~src_pos:t.pos ~dst:t.buf ~dst_pos:0
~len:t.len;
t.pos <- 0)
let read_into t fd =
compress t;
let count =
UnixLabels.read fd ~buf:t.buf ~pos:(t.pos + t.len) ~len:(Bytes.length t.buf - t.len)
in
t.len <- t.len + count;
for i = t.pos to t.len - 1 do
Bytes.unsafe_set t.buf i (Char.lowercase_ascii (Bytes.unsafe_get t.buf i))
done;
count
let[@inline] is_delim c = c = '\n' || c = ' '
let iter_words f t =
let prev = ref t.pos in
for i = t.pos to t.len - 1 do
(* We perform bounds check so its okay to call unsafe_get here *)
if is_delim (Bytes.unsafe_get t.buf i) then (
f t.buf !prev (i - !prev);
prev := i + 1)
done;
t.len <- t.len - !prev;
t.pos <- !prev
end
module BytesHash = struct
include Bytes
let hash (k : bytes) = Hashtbl.hash k
end
module BytesHashtbl = Hashtbl.Make (BytesHash)
let process_word countwords buf pos len =
if len > 0 then
let word = Bytes.sub buf pos len in
try incr (BytesHashtbl.find countwords word)
with Not_found -> BytesHashtbl.add countwords word (ref 1)
let () =
let countwords = BytesHashtbl.create 33_000 in
let buf = Buf.create (64 * 1024) in
while Buf.read_into buf Unix.stdin > 0 do
Buf.iter_words (fun buf pos len -> process_word countwords buf pos len) buf
done;
let arr = Array.make (BytesHashtbl.length countwords) (Bytes.empty, 0) in
let idx = ref 0 in
BytesHashtbl.iter
(fun word count ->
arr.(!idx) <- (word, !count);
incr idx)
countwords;
Array.sort (fun (_, x) (_, y) -> Int.compare y x) arr;
Array.iter
(fun (w, c) ->
print_bytes w;
print_char ' ';
print_int c;
print_newline ())
arr
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment