Skip to content

Instantly share code, notes, and snippets.

@cangyin
Created April 12, 2024 11:45
Show Gist options
  • Save cangyin/74e3d51ef08dafe4f437e7eaa99a1a4d to your computer and use it in GitHub Desktop.
Save cangyin/74e3d51ef08dafe4f437e7eaa99a1a4d to your computer and use it in GitHub Desktop.
Bloom Filter Calculation Functions In ClickHouse

Formulas from https://hur.st/bloomfilter.

CREATE OR REPLACE FUNCTION bf_p_from_kr AS (k, r) -> pow(1 - exp((-k) / r), k);
CREATE OR REPLACE FUNCTION bf_k_from_r AS r -> round(log(2) * r);
CREATE OR REPLACE FUNCTION bf_r_from_pk AS (p, k) -> ((-k) / log(1 - exp(log(p) / k)));
CREATE OR REPLACE FUNCTION bf_r_from_mn AS (m, n) -> (m / n);

CREATE OR REPLACE FUNCTION bf_km_from_np AS (n, p) ->
    (
        bf_k_from_r(bf_r_from_mn(m, n) AS r) AS k,
        ceil((n * log(p)) / log(1 / pow(2, log(2)))) AS m,
        n,
        bf_p_from_kr(k, r) AS p,
        ['k', 'm', 'p']
    );
CREATE OR REPLACE FUNCTION bf_kp_from_mn AS (m, n) ->
    (
        bf_k_from_r(bf_r_from_mn(m, n) AS r) AS k,
        m,
        n,
        bf_p_from_kr(k, r) AS p,
        ['k', 'p']
    );
CREATE OR REPLACE FUNCTION bf_kn_from_mp AS (m, p) ->
    (
        bf_k_from_r(bf_r_from_mn(m,n) AS r) AS k,
        m,
        ceil((m * log(pow(1 / 2, log(2))) / log(p))) AS n,
        bf_p_from_kr(k,r) AS p,
        ['k', 'n', 'p']
    );
CREATE OR REPLACE FUNCTION bf_p_from_kmn AS (k, m, n) ->
    (
        k,
        m,
        n,
        bf_p_from_kr(k,bf_r_from_mn(m,n)) AS p,
        ['p']
    );
CREATE OR REPLACE FUNCTION bf_n_from_kmp AS (k, m, p) ->
    (
        k,
        m,
        ceil(m / bf_r_from_pk(p,k) AS r) AS n,
        p,
        ['n']
    );
CREATE OR REPLACE FUNCTION bf_m_from_knp AS (k, n, p) ->
    (
        k,
        ceil(n * (bf_r_from_pk(p, k) AS r)) AS m,
        n,
        p,
        ['m']
    );
CREATE OR REPLACE FUNCTION bf_k_from_mnp AS (m, n, p_) ->
    (
        indexOf(arr,arrayMin(arr)) AS k,
        m,
        n,
        bf_p_from_kr(k, bf_r_from_mn(m, n) AS r) AS p,
        ['k', 'p'],
        arrayMap(kc -> abs(bf_p_from_kr(kc,r) - p_), arrayResize([1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36], toUInt8(-round(log2(p_))) AS opt_k)) AS arr
    );
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment