Created
May 6, 2016 05:34
-
-
Save ytomino/5aa5b31ceef2a80e3e4f64e3715726a4 to your computer and use it in GitHub Desktop.
OCaml implementation of MurMurHash3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
let rotate_left (left: int32) (right: int): int32 = | |
Int32.logor | |
(Int32.shift_left left right) | |
(Int32.shift_right_logical left (32 - right));; | |
let fmix32 (h: int32): int32 = | |
let h = Int32.logxor h (Int32.shift_right_logical h 16) in | |
let h = Int32.mul h 0x85ebca6bl in | |
let h = Int32.logxor h (Int32.shift_right_logical h 13) in | |
let h = Int32.mul h 0xc2b2ae35l in | |
let h = Int32.logxor h (Int32.shift_right_logical h 16) in | |
h;; | |
let c1 = 0xcc9e2d51l;; | |
let c2 = 0x1b873593l;; | |
type t = int32;; | |
let initialize (seed: int32): t = seed;; | |
let update (k1: int32) (h1: t): t = | |
let k1 = Int32.mul k1 c1 in | |
let k1 = rotate_left k1 15 in | |
let k1 = Int32.mul k1 c2 in | |
let h1 = Int32.logxor h1 k1 in | |
let h1 = rotate_left h1 13 in | |
let h1 = Int32.add (Int32.mul h1 5l) 0xe6546b64l in | |
h1;; | |
let update_tail (k1: int32) (h1: t):t = | |
let k1 = Int32.mul k1 c1 in | |
let k1 = rotate_left k1 15 in | |
let k1 = Int32.mul k1 c2 in | |
let h1 = Int32.logxor h1 k1 in | |
h1;; | |
let update_length (length: int) (h1: t): t = | |
let h1 = Int32.logxor h1 (Int32.of_int length) in | |
h1;; | |
let finalize: t -> int32 = fmix32;; | |
let getblock8 (s: string) (i: int): int32 = | |
Int32.of_int (int_of_char s.[i]);; | |
let getblock16 (s: string) (i: int): int32 = | |
Int32.of_int ( | |
(int_of_char s.[i]) lor | |
(int_of_char s.[i + 1] lsl 8));; | |
let getblock24 (s: string) (i: int): int32 = | |
Int32.of_int ( | |
(int_of_char s.[i]) lor | |
(int_of_char s.[i + 1] lsl 8) lor | |
(int_of_char s.[i + 2] lsl 16));; | |
let getblock32 (s: string) (i: int): int32 = | |
if max_int <= 0x3fffffff then (* sizeof(int) <= 4 *) | |
Int32.logor | |
(getblock24 s i) | |
(Int32.of_int (int_of_char s.[i + 3] lsl 24)) | |
else | |
Int32.of_int ( | |
(int_of_char s.[i]) lor | |
(int_of_char s.[i + 1] lsl 8) lor | |
(int_of_char s.[i + 2] lsl 16) lor | |
(int_of_char s.[i + 3] lsl 24));; | |
let update_substring (s: string) (i: int) (length: int) (h1: t): t = | |
let rec loop (s: string) (n: int) (i: int) (h: t): t = | |
if i >= n then h else | |
if i >= n - 1 then update_tail (getblock8 s i) h else | |
if i >= n - 2 then update_tail (getblock16 s i) h else | |
if i >= n - 3 then update_tail (getblock24 s i) h else | |
update (getblock32 s i) h |> loop s length (i + 4) | |
in | |
loop s (i + length) i h1 | |
let hash_of_string (seed: int32) (s: string): int32 = | |
let l = String.length s in | |
initialize seed |> update_substring s 0 l |> update_length l |> finalize;; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* MurmurHash3 was written by Austin Appleby, and is placed in the public | |
domain. The author hereby disclaims copyright to this source code. *) | |
type t = private int32 | |
val initialize: int32 -> t | |
val update: int32 -> t -> t (* 32bit *) | |
val update_tail: int32 -> t -> t (* 8/16/24bit *) | |
val update_length: int -> t -> t | |
val finalize: t -> int32 | |
val update_substring: string -> int -> int -> t -> t | |
val hash_of_string: int32 -> string -> int32 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment