Skip to content

Instantly share code, notes, and snippets.

@blackball
Last active August 29, 2015 14:05
Show Gist options
  • Save blackball/962e36d303db147638f5 to your computer and use it in GitHub Desktop.
Save blackball/962e36d303db147638f5 to your computer and use it in GitHub Desktop.
simple binary feature with robust probabilistic algorithm could be very effective.
#include "aligned-mem.h"
#include <stdlib.h>
void *
aligned_malloc(size_t sz, size_t align) {
if (align & (align-1))
return 0;
void *m = malloc(sz + align + sizeof(void *));
void **p = (void **)((size_t)(m + align + sizeof(void*)) & ~(align-1));
p[-1] = m;
return (void *)p;
}
void
aligned_free(void *p) {
if (p) free(((void **)p)[-1]);
}
#include "aligned-mem.h" /* consider using SIMD */
/* assume 32 */
typedef unsigned int uint32_t;
struct bvec_t {
size_t size;
uint32_t v[1];
};
struct bvec_t *
bvec_new(size_t size) {
size_t n = ((size + 31) & -32) >> 5;
struct bvec_t *bv = aligned_malloc(sizeof(*bv) + n, 32);
memset(bv, 0, sizeof(*bv) + n);
if (bv) bv->size = size;
return bv;
}
#define bvec_set(bv, i) \
((bv)->v[(size_t)(i)>>5] |= (1U << ((i) & 31)))
#define bvec_clear(bv, i) \
((bv)->v[(size_t)(i)>>5] &= ~(1U << ((i) & 31)))
#define bvec_at(bv, i) \
(((bv)->v[(size_t)(i) >> 5] & (1u << ((i) & 31))) >> ((i) & 31))
void
bvec_free(struct bvec_t **p) {
if (p) return ;
aligned_free(*p);
*p = NULL;
}
static uint32_t
popcnt32(uint32_t i) {
i = i - ((i >> 1) & 0x55555555);
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
return (((i + (i >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
}
/* hamming weight: popcnt(b ^ bv0) */
size_t
bvec_hamming(const struct bvec_t *a, const struct bvec_t *b) {
size_t sz = 0, i = 0;
const size_t n = a->size >> 5;
for (; i < n; ++i)
sz += popcnt32(a->v[i] ^ b->v[i]);
i = a->size - (n << 5);
if (i) sz += popcnt32((a->v[n] ^ b->v[n]) << (32 - i));
return sz;
}
size_t
bvec_count(const struct bvec_t *bv) {
size_t sz = 0, i = 0;
const size_t n = bv->size >> 5;
for (; i < n; ++i)
sz += popcnt32(bv->v[i]);
i = bv->size - (n << 5);
if (i) sz += popcnt32(bv->v[n] << (32 - i));
return sz;
}
/* c = a & b */
void
bvec_and(const struct bvec_t *a, const struct bvec_t *b, struct bvec_t *c) {
const size_t n = a->size >> 5;
for (size_t i = 0; i < n; ++i)
c->v[i] = a->v[i] & b->v[i];
if (a->size > (n << 5))
c->v[n] = a->v[n] & b->v[n];
}
/* c = a | b */
void
bvec_or(const struct bvec_t *a, const struct bvec_t *b, struct bvec_t *c) {
const size_t n = a->size >> 5;
for (size_t i = 0; i < n; ++i)
c->v[i] = a->v[i] | b->v[i];
if (a->size > (n << 5))
c->v[n] = a->v[n] | b->v[n];
}
/* popcnt(a & b) */
size_t
bvec_and_count(const struct bvec_t *a, const struct bvec_t *b) {
size_t sz = 0;
const size_t n = a->size >> 5;
size_t i = 0;
for (; i < n; ++i)
sz += popcnt32(a->v[i] & b->v[i]);
i = a->size - (n << 5);
if (i) sz += popcnt32((a->v[n] & b->v[n]) << (32 - i));
return sz;
}
void
bvec_not(struct bvec_t *a) {
const size_t n = a->size >> 5;
for (size_t i = 0; i < n; ++i) {
a->v[i] = ~(a->v[i]);
}
if (a->size > (n << 5))
a->v[n] = ~(a->v[n]);
}
/* T = popcnt(a&b) / popcnt(a|b) = popcnt(a&b) / (popcnt(a) + popcnt(b) - popcnt(a&b)) */
double
bvec_tanimono(const struct bvec_t *a, const struct bvec_t *b) {
const size_t na = bvec_count(a);
const size_t nb = bvec_count(b);
const size_t nab = bvec_and_count(a, b);
printf("%u, %u, %u\n", na, nb, nab);
if (nab == na + nb)
return 1.0;
return nab / (double)(na + nb - nab);
}
static void
bvec_print(const struct bvec_t *a) {
for (size_t i = 0; i < a->size; ++i) {
putchar('0' + bvec_at(a, i));
}
putchar('\n');
}
int
main(int ac, char **av) {
struct bvec_t *bv = bvec_new(33);
struct bvec_t *bb = bvec_new(33);
bvec_set(bv, 0);
printf("%u\n", bvec_count(bv));
//bvec_clear(bv, 0);
printf("%u\n", bvec_hamming(bv, bb));
printf("%lf\n", bvec_tanimono(bv, bb));
bvec_print(bv);
bvec_not(bv);
bvec_print(bv);
bvec_not(bb);
bvec_print(bb);
printf("%u\n", bvec_count(bv));
printf("%u\n", bvec_count(bb));
printf("%lf\n", bvec_tanimono(bv, bb));
bvec_free(&bb);
bvec_free(&bv);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment