Skip to content

Instantly share code, notes, and snippets.

@beberlei

beberlei/hdr.php Secret

Created March 25, 2019 13:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save beberlei/f41e067437ce747ac20e979cbb0d573b to your computer and use it in GitHub Desktop.
Save beberlei/f41e067437ce747ac20e979cbb0d573b to your computer and use it in GitHub Desktop.
<?php
class Hdr
{
public $ltv;
public $htv;
public $sf;
public $sub_bucket_half_count_magnitude;
public $unit_magnitude;
public $sub_bucket_count;
public $sub_bucket_half_count;
public $sub_bucket_mask;
public $counts_len;
public $counts;
public $min_value = 0;
public $max_value = 0;
public $normalizing_index_offset = 0;
public $conversion_ratio = 1.0;
public $total_count = 0;
}
function hdr_init($ltv, $htv, $sf)
{
$hdr = new Hdr;
hdr_calculate_bucket_config($ltv, $htv, $sf, $hdr);
$hdr->counts = new SplFixedArray($hdr->counts_len);
return $hdr;
}
function reverse(int $x) : int {
$x = ((($x & 0xaaaaaaaa) >> 1) | (($x & 0x55555555) << 1));
$x = ((($x & 0xcccccccc) >> 2) | (($x & 0x33333333) << 2));
$x = ((($x & 0xf0f0f0f0) >> 4) | (($x & 0x0f0f0f0f) << 4));
$x = ((($x & 0xff00ff00) >> 8) | (($x & 0x00ff00ff) << 8));
return(($x >> 16) | ($x << 16));
}
function get_bucket_index(Hdr $hdr, int $value) : int {
return max(
floor(log($value, 2)) -
$hdr->sub_bucket_half_count_magnitude -
$hdr->unit_magnitude,
0
);
/* $value = $value | $hdr->sub_bucket_mask;
for ($i = 60; $i >= 0; $i--) {
if (pow(2, $i) <= $value) {
return ($i+1) - $hdr->unit_magnitude - ($hdr->sub_bucket_half_count_magnitude + 1);
}
}
return 0;*/
}
function get_sub_bucket_index(int $value, int $bucket_index, int $unit_magnitude) : int
{
return ($value >> ($bucket_index + $unit_magnitude));
}
function counts_index(Hdr $hdr, int $bucket_index, $sub_bucket_index) : int
{
/* Calculate the index for the first entry in the bucket: */
/* (The following is the equivalent of ((bucket_index + 1) * subBucketHalfCount) ): */
$bucket_base_index = ($bucket_index + 1) << $hdr->sub_bucket_half_count_magnitude;
/* Calculate the offset in the bucket: */
$offset_in_bucket = $sub_bucket_index - $hdr->sub_bucket_half_count;
/* The following is the equivalent of ((sub_bucket_index - subBucketHalfCount) + bucketBaseIndex; */
return $bucket_base_index + $offset_in_bucket;
}
function normalize_index(Hdr $hdr, int $index) : int
{
$adjustment = 0;
if ($hdr->normalizing_index_offset == 0) {
return $index;
}
$normalized_index = $index - $hdr->normalizing_index_offset;
if ($normalized_index < 0) {
$adjustment = $hdr->counts_len;
} else if ($normalized_index >= $hdr->counts_len) {
$adjustment = -$hdr->counts_len;
}
return $normalized_index + $adjustment;
}
function counts_index_for(Hdr $hdr, int $value) : int
{
$bucket_index = get_bucket_index($hdr, $value);
$sub_bucket_index = get_sub_bucket_index($value, $bucket_index, $hdr->unit_magnitude);
return counts_index($hdr, $bucket_index, $sub_bucket_index);
}
function counts_inc_normalised(Hdr $hdr, int $index, int $value)
{
$normalised_index = normalize_index($hdr, $index);
$hdr->counts[$normalised_index] = ($hdr->counts[$normalised_index]+$value);
$hdr->total_count += $value;
}
function update_min_max(Hdr $hdr, int $value)
{
$hdr->min_value = ($value < $hdr->min_value && $value != 0) ? $value : $hdr->min_value;
$hdr->max_value = ($value > $hdr->max_value) ? $value : $hdr->max_value;
}
function hdr_record_value($hdr, $value)
{
hdr_record_values($hdr, $value, 1);
}
function hdr_record_values($hdr, $value, $count)
{
if ($value < 0) {
return false;
}
$counts_index = counts_index_for($hdr, $value);
if ($counts_index < 0 || $hdr->counts_len <= $counts_index) {
return false;
}
counts_inc_normalised($hdr, $counts_index, $count);
update_min_max($hdr, $value);
return true;
}
function hdr_value_at_percentile(Hdr $hdr, int $percentile)
{
$total = 0;
$requested_percentile = $percentile < 100.0 ? $percentile : 100.0;
$count_at_percentile = ((($requested_percentile / 100) * $hdr->total_count) + 0.5);
$count_at_percentile = $count_at_percentile > 1 ? $count_at_percentile : 1;
foreach ($hdr->counts as $idx => $value) {
$total += $value;
if ($total >= $count_at_percentile) {
return $idx; // TODO
}
}
return 0;
}
function hdr_calculate_bucket_config(int $ltv, int $htv, int $sf, Hdr $hdr)
{
if ($ltv < 1 || $sf < 1 || 5 < $sf) {
throw new \RuntimeException();
} else if ($ltv * 2 > $htv) {
throw new \RuntimeException();
}
$hdr->ltv = $ltv;
$hdr->sf = $sf;
$hdr->htv = $htv;
$largest_value_with_single_unit_resolution = 2 * pow(10, $sf);
$sub_bucket_count_magnitude = ceil(log((double)$largest_value_with_single_unit_resolution) / log(2));
$hdr->sub_bucket_half_count_magnitude = (($sub_bucket_count_magnitude > 1) ? $sub_bucket_count_magnitude : 1) - 1;
$hdr->unit_magnitude = floor(log((double)$ltv) / log(2));
$hdr->sub_bucket_count = (int)pow(2, ($hdr->sub_bucket_half_count_magnitude + 1));
$hdr->sub_bucket_half_count = $hdr->sub_bucket_count / 2;
$hdr->sub_bucket_mask = ($hdr->sub_bucket_count - 1) << $hdr->unit_magnitude;
if ($hdr->unit_magnitude + $hdr->sub_bucket_half_count_magnitude > 61) {
throw new \RuntimeException();
}
$hdr->bucket_count = buckets_needed_to_cover_value($hdr->htv, $hdr->sub_bucket_count, $hdr->unit_magnitude);
$hdr->counts_len = ($hdr->bucket_count + 1) * ($hdr->sub_bucket_count / 2);
}
function buckets_needed_to_cover_value($value, $sub_bucket_count, $unit_magnitude) : int {
$smallest_untrackable_value = ($sub_bucket_count) << $unit_magnitude;
$buckets_needed = 1;
while ($smallest_untrackable_value <= $value) {
if ($smallest_untrackable_value > PHP_INT_MAX / 2) {
return $buckets_needed + 1;
}
$smallest_untrackable_value <<= 1;
$buckets_needed++;
}
return $buckets_needed;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment