Skip to content

Instantly share code, notes, and snippets.

@ytomino
Created May 6, 2016 05:34
Show Gist options
  • Save ytomino/5aa5b31ceef2a80e3e4f64e3715726a4 to your computer and use it in GitHub Desktop.
Save ytomino/5aa5b31ceef2a80e3e4f64e3715726a4 to your computer and use it in GitHub Desktop.
OCaml implementation of MurMurHash3
let rotate_left (left: int32) (right: int): int32 =
Int32.logor
(Int32.shift_left left right)
(Int32.shift_right_logical left (32 - right));;
let fmix32 (h: int32): int32 =
let h = Int32.logxor h (Int32.shift_right_logical h 16) in
let h = Int32.mul h 0x85ebca6bl in
let h = Int32.logxor h (Int32.shift_right_logical h 13) in
let h = Int32.mul h 0xc2b2ae35l in
let h = Int32.logxor h (Int32.shift_right_logical h 16) in
h;;
let c1 = 0xcc9e2d51l;;
let c2 = 0x1b873593l;;
type t = int32;;
let initialize (seed: int32): t = seed;;
let update (k1: int32) (h1: t): t =
let k1 = Int32.mul k1 c1 in
let k1 = rotate_left k1 15 in
let k1 = Int32.mul k1 c2 in
let h1 = Int32.logxor h1 k1 in
let h1 = rotate_left h1 13 in
let h1 = Int32.add (Int32.mul h1 5l) 0xe6546b64l in
h1;;
let update_tail (k1: int32) (h1: t):t =
let k1 = Int32.mul k1 c1 in
let k1 = rotate_left k1 15 in
let k1 = Int32.mul k1 c2 in
let h1 = Int32.logxor h1 k1 in
h1;;
let update_length (length: int) (h1: t): t =
let h1 = Int32.logxor h1 (Int32.of_int length) in
h1;;
let finalize: t -> int32 = fmix32;;
let getblock8 (s: string) (i: int): int32 =
Int32.of_int (int_of_char s.[i]);;
let getblock16 (s: string) (i: int): int32 =
Int32.of_int (
(int_of_char s.[i]) lor
(int_of_char s.[i + 1] lsl 8));;
let getblock24 (s: string) (i: int): int32 =
Int32.of_int (
(int_of_char s.[i]) lor
(int_of_char s.[i + 1] lsl 8) lor
(int_of_char s.[i + 2] lsl 16));;
let getblock32 (s: string) (i: int): int32 =
if max_int <= 0x3fffffff then (* sizeof(int) <= 4 *)
Int32.logor
(getblock24 s i)
(Int32.of_int (int_of_char s.[i + 3] lsl 24))
else
Int32.of_int (
(int_of_char s.[i]) lor
(int_of_char s.[i + 1] lsl 8) lor
(int_of_char s.[i + 2] lsl 16) lor
(int_of_char s.[i + 3] lsl 24));;
let update_substring (s: string) (i: int) (length: int) (h1: t): t =
let rec loop (s: string) (n: int) (i: int) (h: t): t =
if i >= n then h else
if i >= n - 1 then update_tail (getblock8 s i) h else
if i >= n - 2 then update_tail (getblock16 s i) h else
if i >= n - 3 then update_tail (getblock24 s i) h else
update (getblock32 s i) h |> loop s length (i + 4)
in
loop s (i + length) i h1
let hash_of_string (seed: int32) (s: string): int32 =
let l = String.length s in
initialize seed |> update_substring s 0 l |> update_length l |> finalize;;
(* MurmurHash3 was written by Austin Appleby, and is placed in the public
domain. The author hereby disclaims copyright to this source code. *)
type t = private int32
val initialize: int32 -> t
val update: int32 -> t -> t (* 32bit *)
val update_tail: int32 -> t -> t (* 8/16/24bit *)
val update_length: int -> t -> t
val finalize: t -> int32
val update_substring: string -> int -> int -> t -> t
val hash_of_string: int32 -> string -> int32
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment