Skip to content

Instantly share code, notes, and snippets.

@komamitsu
Last active April 11, 2020 19:58
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save komamitsu/6474266 to your computer and use it in GitHub Desktop.
Save komamitsu/6474266 to your computer and use it in GitHub Desktop.
Trie in OCaml
module Trie : sig
type t
val empty : t
val update : string -> int -> t -> t
val find : t -> string -> int list option
val create_from_text : string -> t
end =
struct
type node = {c:char; poslist:int list; children:t}
and t = node list
let empty : t = []
let string_to_char_list s : char list =
let len = String.length s in
let rec loop i cs =
if i >= len then List.rev cs
else loop (i + 1) (s.[i] :: cs)
in
loop 0 []
let update s pos tr : t =
let xs = string_to_char_list s in
let rec _update xs tr =
match xs with
| [] -> tr
| hd::tl ->
let node =
try List.find (fun x -> x.c = hd) tr
with Not_found -> {c=hd; poslist=[]; children=[]} in
{c=node.c; poslist=pos::node.poslist; children=_update tl node.children} ::
List.filter (fun x -> x.c != hd) tr
in
_update xs tr
let find tr s : int list option =
let rec loop len i (tr:t) =
try
let node = List.find (fun x -> s.[i] = x.c) tr in
if i = len - 1 then Some (List.rev node.poslist)
else loop len (i + 1) node.children
with Not_found -> None
in
loop (String.length s) 0 tr
let create_from_text s : t =
let re = Str.regexp "[0-9a-zA-Z]+" in
let rec loop pos tr =
try
let matched_start = Str.search_forward re s pos in
let matched_str = Str.matched_string s in
let tr = update matched_str matched_start tr in
loop (Str.match_end ()) tr
with Not_found -> tr
in
loop 0 empty
end
Type #utop_help for help about using utop.
─( 09:00:00 )─< command 0 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─
utop # #require "str";;
─( 18:43:23 )─< command 1 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─
utop # #use "trie.ml";;
module Trie : sig type t val empty : t val update : string -> int -> t -> t val find : t -> string -> int list option val create_from_text : string -> t end
─( 18:43:29 )─< command 2 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─
utop # let tr = Trie.create_from_text "In computer science, a trie, also called digital tree or sometimes radix tree or prefix tree (because we can search by prefixes), is an ordered tree data structure that is used to store a dynamic set or associative array where the keys are usually strings. Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated. All the descendants of a node have a common prefix of the string associated with that node, and the root is associated with the empty string. Values are normally not associated with every node, only with leaves and some inner nodes that correspond to keys of interest. For the space-optimized presentation of prefix tree, see compact prefix tree.";;
val tr : Trie.t = <abstr>
─( 18:43:35 )─< command 4 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─
utop # Trie.find tr "node";;
- : int list option = Some [289; 342; 452; 513; 615; 653]
─( 18:43:46 )─< command 5 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─
utop # Trie.find tr "In";;
- : int list option = Some [0]
─( 18:44:31 )─< command 6 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─
utop # Trie.find tr "Innnnnnnnn";;
- : int list option = None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment