Created
October 29, 2014 17:57
-
-
Save toby/02faa64b286ac4cceca0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(ns backrack.bloom | |
(:import [java.lang Math]) | |
(:use [digest :only [sha-256]])) | |
(deftype Bloom [m k f] | |
Object | |
(equals [this b2] | |
(and (= (.m this) (.m b2)) (= (.k this) (.k b2)) (= (.f this) (.f b2)))) | |
(toString [this] | |
(str "m: " (.m this) " k: " (.k this)))) | |
(defprotocol BloomOps | |
(add-item [bloom item]) | |
(remove-item [bloom item]) | |
(added? [bloom item]) | |
(blank? [bloom]) | |
(full? [bloom])) | |
(defn- log-base [n base] | |
(/ (Math/log n) (Math/log base))) | |
(defn- index-bytes [m] | |
(int (Math/ceil (/ (Math/floor (log-base m 2)) 8.0)))) | |
(defn- forever-sha-256 | |
([item] | |
(forever-sha-256 item 0)) | |
([item modifier] | |
(if (= modifier 0) | |
(lazy-cat (sha-256 item) (forever-sha-256 item (inc modifier))) | |
(lazy-cat (sha-256 (str item modifier)) (forever-sha-256 item (inc modifier)))))) | |
(defn- hash-indexes [m k item] | |
(take k (map #(mod (Integer/parseInt (apply str %) 16) m) | |
(partition (bit-shift-left (index-bytes m) 1) (forever-sha-256 item))))) | |
(defn- has-indexes? [f indexes] | |
(every? pos? (map f indexes))) | |
(defn- update-bloom [m k f indexes function] | |
(if (empty? indexes) | |
(Bloom. m k f) | |
(recur m k (update-in f [(first indexes)] function) (rest indexes) function))) | |
(extend-type Bloom | |
BloomOps | |
(add-item [this item] | |
(let [indexes (hash-indexes (.m this) (.k this) item)] | |
(if-not (has-indexes? (.f this) indexes) | |
(update-bloom (.m this) (.k this) (.f this) indexes inc) | |
this))) | |
(remove-item [this item] | |
(let [indexes (hash-indexes (.m this) (.k this) item)] | |
(if (has-indexes? (.f this) indexes) | |
(update-bloom (.m this) (.k this) (.f this) indexes dec) | |
this))) | |
(added? [this item] | |
(has-indexes? (.f this) (hash-indexes (.m this) (.k this) item))) | |
(blank? [this] | |
(every? zero? (.f this))) | |
(full? [this] | |
(every? pos? (.f this)))) | |
(def blank-filter | |
(fn [m] (apply conj (vector-of :short) (repeat m (short 0))))) | |
(defn create-bloom | |
([] | |
(create-bloom 5000 0.001)) | |
([capacity error-rate] | |
(let [m (long (Math/ceil (/ (* capacity (Math/log error-rate)) | |
(Math/log (/ 1 (Math/pow 2 (Math/log 2))))))) | |
k (long (Math/floor (* (Math/log 2) (/ m capacity))))] | |
(Bloom. m k (blank-filter m))))) | |
(defn encode [bloom] | |
(let [parts (partition-by identity (.f bloom))] | |
(hash-map :m (.m bloom) :k (.k bloom) :filter (map (juxt first count) parts)))) | |
(defn decode [encoded] | |
(let [f (apply vector-of :short (mapcat (fn [[value length]] (repeat length (short value))) (:filter encoded)))] | |
(Bloom. (:m encoded) (:k encoded) f))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment