Skip to content

Instantly share code, notes, and snippets.

@softwaredoug
Created February 3, 2015 04:01
  • Star 14 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save softwaredoug/aea69681f538d9780c82 to your computer and use it in GitHub Desktop.
Educational HyperLogLog demo in C
#include "stdio.h"
#include "stdlib.h"
unsigned int max(unsigned int a, unsigned int b) {
return a > b ? a : b;
}
unsigned int bucket(unsigned int val) {
return val & 0xF0000000 >> 28;
}
unsigned int trailingZeros(unsigned int val) {
unsigned int mask = 0x00000001;
unsigned int cnt = 0;
while (val) {
val = val & mask;
mask <<= 1;
cnt++;
}
return cnt;
}
void initBuckets(unsigned int* buckets) {
int i;
for (i = 0; i < 16; i++) {
buckets[i] = 0;
}
}
unsigned int cardinality(unsigned int* buckets) {
int i;
unsigned int cnt = 0;
for (i = 0; i < 16; i++) {
if (buckets[i]) {
cnt += (1 << (buckets[i] - 1)); // 2^buckets[i]
}
}
return cnt;
}
void diagnosticReport(unsigned int* buckets) {
int i;
for (i = 0; i < 16; i++) {
printf("bucket %i: %08x\n", i, buckets[i]);
}
printf("Cardinality %i\n", cardinality(buckets));
}
int main() {
/*zero 16 buckets*/
unsigned int buckets[16];
initBuckets(buckets);
unsigned int inputNumber = 0;
unsigned int tryNo = 0;
while (1) {
printf("%ith Value for HLL\n", tryNo);
/*user enters a number,
*
* in real HLL we would hash the number, we're sorta expecting the user
* to bash on keys*/
scanf("%u", &inputNumber);
/*
* hash this number to a bucket based on the <numbuckets> significant
* digits*/
unsigned int buck = bucket(inputNumber);
printf("Hashed To Bucket %i\n", buck);
/* Write the largest number of trailing 0s we've seen thus far
*
* This is the magic of the hyperloglog, it gets very very hard
* to get more trailing zeros after some point
*
* Its much like flipping a coin
* */
buckets[buck] = max(buckets[buck], trailingZeros(inputNumber));
/* Print diagnistic, including cardinality. which is just a sum of
* 2^bucketVal over all buckets if bucketVal != 0
* */
diagnosticReport(buckets);
tryNo++;
}
}
@durana
Copy link

durana commented Feb 4, 2015

The trailingZeros function doesn't seem to do what the function's name suggests it does. Here's an alternate version that might work for you.

unsigned int trailingZeros(unsigned int val) {
  unsigned int cnt = 0;

  while (val) {
    if ((val & 1) == 0) {
      cnt++;
    }
    val >>= 1;
  }

  return cnt;
}

@bombela
Copy link

bombela commented Feb 5, 2015

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment