Last active
April 11, 2020 19:58
-
-
Save komamitsu/6474266 to your computer and use it in GitHub Desktop.
Trie in OCaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Trie : sig | |
type t | |
val empty : t | |
val update : string -> int -> t -> t | |
val find : t -> string -> int list option | |
val create_from_text : string -> t | |
end = | |
struct | |
type node = {c:char; poslist:int list; children:t} | |
and t = node list | |
let empty : t = [] | |
let string_to_char_list s : char list = | |
let len = String.length s in | |
let rec loop i cs = | |
if i >= len then List.rev cs | |
else loop (i + 1) (s.[i] :: cs) | |
in | |
loop 0 [] | |
let update s pos tr : t = | |
let xs = string_to_char_list s in | |
let rec _update xs tr = | |
match xs with | |
| [] -> tr | |
| hd::tl -> | |
let node = | |
try List.find (fun x -> x.c = hd) tr | |
with Not_found -> {c=hd; poslist=[]; children=[]} in | |
{c=node.c; poslist=pos::node.poslist; children=_update tl node.children} :: | |
List.filter (fun x -> x.c != hd) tr | |
in | |
_update xs tr | |
let find tr s : int list option = | |
let rec loop len i (tr:t) = | |
try | |
let node = List.find (fun x -> s.[i] = x.c) tr in | |
if i = len - 1 then Some (List.rev node.poslist) | |
else loop len (i + 1) node.children | |
with Not_found -> None | |
in | |
loop (String.length s) 0 tr | |
let create_from_text s : t = | |
let re = Str.regexp "[0-9a-zA-Z]+" in | |
let rec loop pos tr = | |
try | |
let matched_start = Str.search_forward re s pos in | |
let matched_str = Str.matched_string s in | |
let tr = update matched_str matched_start tr in | |
loop (Str.match_end ()) tr | |
with Not_found -> tr | |
in | |
loop 0 empty | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Type #utop_help for help about using utop. | |
─( 09:00:00 )─< command 0 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─ | |
utop # #require "str";; | |
─( 18:43:23 )─< command 1 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─ | |
utop # #use "trie.ml";; | |
module Trie : sig type t val empty : t val update : string -> int -> t -> t val find : t -> string -> int list option val create_from_text : string -> t end | |
─( 18:43:29 )─< command 2 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─ | |
utop # let tr = Trie.create_from_text "In computer science, a trie, also called digital tree or sometimes radix tree or prefix tree (because we can search by prefixes), is an ordered tree data structure that is used to store a dynamic set or associative array where the keys are usually strings. Unlike a binary search tree, no node in the tree stores the key associated with that node; instead, its position in the tree defines the key with which it is associated. All the descendants of a node have a common prefix of the string associated with that node, and the root is associated with the empty string. Values are normally not associated with every node, only with leaves and some inner nodes that correspond to keys of interest. For the space-optimized presentation of prefix tree, see compact prefix tree.";; | |
val tr : Trie.t = <abstr> | |
─( 18:43:35 )─< command 4 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─ | |
utop # Trie.find tr "node";; | |
- : int list option = Some [289; 342; 452; 513; 615; 653] | |
─( 18:43:46 )─< command 5 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─ | |
utop # Trie.find tr "In";; | |
- : int list option = Some [0] | |
─( 18:44:31 )─< command 6 >──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────{ counter: 0 }─ | |
utop # Trie.find tr "Innnnnnnnn";; | |
- : int list option = None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment