Skip to content

Instantly share code, notes, and snippets.

@softwaredoug
Created February 3, 2015 04:01
Show Gist options
  • Star 14 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save softwaredoug/aea69681f538d9780c82 to your computer and use it in GitHub Desktop.
Save softwaredoug/aea69681f538d9780c82 to your computer and use it in GitHub Desktop.
Educational HyperLogLog demo in C
#include "stdio.h"
#include "stdlib.h"
unsigned int max(unsigned int a, unsigned int b) {
return a > b ? a : b;
}
unsigned int bucket(unsigned int val) {
return val & 0xF0000000 >> 28;
}
unsigned int trailingZeros(unsigned int val) {
unsigned int mask = 0x00000001;
unsigned int cnt = 0;
while (val) {
val = val & mask;
mask <<= 1;
cnt++;
}
return cnt;
}
void initBuckets(unsigned int* buckets) {
int i;
for (i = 0; i < 16; i++) {
buckets[i] = 0;
}
}
unsigned int cardinality(unsigned int* buckets) {
int i;
unsigned int cnt = 0;
for (i = 0; i < 16; i++) {
if (buckets[i]) {
cnt += (1 << (buckets[i] - 1)); // 2^buckets[i]
}
}
return cnt;
}
void diagnosticReport(unsigned int* buckets) {
int i;
for (i = 0; i < 16; i++) {
printf("bucket %i: %08x\n", i, buckets[i]);
}
printf("Cardinality %i\n", cardinality(buckets));
}
int main() {
/*zero 16 buckets*/
unsigned int buckets[16];
initBuckets(buckets);
unsigned int inputNumber = 0;
unsigned int tryNo = 0;
while (1) {
printf("%ith Value for HLL\n", tryNo);
/*user enters a number,
*
* in real HLL we would hash the number, we're sorta expecting the user
* to bash on keys*/
scanf("%u", &inputNumber);
/*
* hash this number to a bucket based on the <numbuckets> significant
* digits*/
unsigned int buck = bucket(inputNumber);
printf("Hashed To Bucket %i\n", buck);
/* Write the largest number of trailing 0s we've seen thus far
*
* This is the magic of the hyperloglog, it gets very very hard
* to get more trailing zeros after some point
*
* Its much like flipping a coin
* */
buckets[buck] = max(buckets[buck], trailingZeros(inputNumber));
/* Print diagnistic, including cardinality. which is just a sum of
* 2^bucketVal over all buckets if bucketVal != 0
* */
diagnosticReport(buckets);
tryNo++;
}
}
@bombela
Copy link

bombela commented Feb 5, 2015

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment