This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
class Hdr | |
{ | |
public $ltv; | |
public $htv; | |
public $sf; | |
public $sub_bucket_half_count_magnitude; | |
public $unit_magnitude; | |
public $sub_bucket_count; | |
public $sub_bucket_half_count; | |
public $sub_bucket_mask; | |
public $counts_len; | |
public $counts; | |
public $min_value = 0; | |
public $max_value = 0; | |
public $normalizing_index_offset = 0; | |
public $conversion_ratio = 1.0; | |
public $total_count = 0; | |
} | |
function hdr_init($ltv, $htv, $sf) | |
{ | |
$hdr = new Hdr; | |
hdr_calculate_bucket_config($ltv, $htv, $sf, $hdr); | |
$hdr->counts = new SplFixedArray($hdr->counts_len); | |
return $hdr; | |
} | |
function reverse(int $x) : int { | |
$x = ((($x & 0xaaaaaaaa) >> 1) | (($x & 0x55555555) << 1)); | |
$x = ((($x & 0xcccccccc) >> 2) | (($x & 0x33333333) << 2)); | |
$x = ((($x & 0xf0f0f0f0) >> 4) | (($x & 0x0f0f0f0f) << 4)); | |
$x = ((($x & 0xff00ff00) >> 8) | (($x & 0x00ff00ff) << 8)); | |
return(($x >> 16) | ($x << 16)); | |
} | |
function get_bucket_index(Hdr $hdr, int $value) : int { | |
return max( | |
floor(log($value, 2)) - | |
$hdr->sub_bucket_half_count_magnitude - | |
$hdr->unit_magnitude, | |
0 | |
); | |
/* $value = $value | $hdr->sub_bucket_mask; | |
for ($i = 60; $i >= 0; $i--) { | |
if (pow(2, $i) <= $value) { | |
return ($i+1) - $hdr->unit_magnitude - ($hdr->sub_bucket_half_count_magnitude + 1); | |
} | |
} | |
return 0;*/ | |
} | |
function get_sub_bucket_index(int $value, int $bucket_index, int $unit_magnitude) : int | |
{ | |
return ($value >> ($bucket_index + $unit_magnitude)); | |
} | |
function counts_index(Hdr $hdr, int $bucket_index, $sub_bucket_index) : int | |
{ | |
/* Calculate the index for the first entry in the bucket: */ | |
/* (The following is the equivalent of ((bucket_index + 1) * subBucketHalfCount) ): */ | |
$bucket_base_index = ($bucket_index + 1) << $hdr->sub_bucket_half_count_magnitude; | |
/* Calculate the offset in the bucket: */ | |
$offset_in_bucket = $sub_bucket_index - $hdr->sub_bucket_half_count; | |
/* The following is the equivalent of ((sub_bucket_index - subBucketHalfCount) + bucketBaseIndex; */ | |
return $bucket_base_index + $offset_in_bucket; | |
} | |
function normalize_index(Hdr $hdr, int $index) : int | |
{ | |
$adjustment = 0; | |
if ($hdr->normalizing_index_offset == 0) { | |
return $index; | |
} | |
$normalized_index = $index - $hdr->normalizing_index_offset; | |
if ($normalized_index < 0) { | |
$adjustment = $hdr->counts_len; | |
} else if ($normalized_index >= $hdr->counts_len) { | |
$adjustment = -$hdr->counts_len; | |
} | |
return $normalized_index + $adjustment; | |
} | |
function counts_index_for(Hdr $hdr, int $value) : int | |
{ | |
$bucket_index = get_bucket_index($hdr, $value); | |
$sub_bucket_index = get_sub_bucket_index($value, $bucket_index, $hdr->unit_magnitude); | |
return counts_index($hdr, $bucket_index, $sub_bucket_index); | |
} | |
function counts_inc_normalised(Hdr $hdr, int $index, int $value) | |
{ | |
$normalised_index = normalize_index($hdr, $index); | |
$hdr->counts[$normalised_index] = ($hdr->counts[$normalised_index]+$value); | |
$hdr->total_count += $value; | |
} | |
function update_min_max(Hdr $hdr, int $value) | |
{ | |
$hdr->min_value = ($value < $hdr->min_value && $value != 0) ? $value : $hdr->min_value; | |
$hdr->max_value = ($value > $hdr->max_value) ? $value : $hdr->max_value; | |
} | |
function hdr_record_value($hdr, $value) | |
{ | |
hdr_record_values($hdr, $value, 1); | |
} | |
function hdr_record_values($hdr, $value, $count) | |
{ | |
if ($value < 0) { | |
return false; | |
} | |
$counts_index = counts_index_for($hdr, $value); | |
if ($counts_index < 0 || $hdr->counts_len <= $counts_index) { | |
return false; | |
} | |
counts_inc_normalised($hdr, $counts_index, $count); | |
update_min_max($hdr, $value); | |
return true; | |
} | |
function hdr_value_at_percentile(Hdr $hdr, int $percentile) | |
{ | |
$total = 0; | |
$requested_percentile = $percentile < 100.0 ? $percentile : 100.0; | |
$count_at_percentile = ((($requested_percentile / 100) * $hdr->total_count) + 0.5); | |
$count_at_percentile = $count_at_percentile > 1 ? $count_at_percentile : 1; | |
foreach ($hdr->counts as $idx => $value) { | |
$total += $value; | |
if ($total >= $count_at_percentile) { | |
return $idx; // TODO | |
} | |
} | |
return 0; | |
} | |
function hdr_calculate_bucket_config(int $ltv, int $htv, int $sf, Hdr $hdr) | |
{ | |
if ($ltv < 1 || $sf < 1 || 5 < $sf) { | |
throw new \RuntimeException(); | |
} else if ($ltv * 2 > $htv) { | |
throw new \RuntimeException(); | |
} | |
$hdr->ltv = $ltv; | |
$hdr->sf = $sf; | |
$hdr->htv = $htv; | |
$largest_value_with_single_unit_resolution = 2 * pow(10, $sf); | |
$sub_bucket_count_magnitude = ceil(log((double)$largest_value_with_single_unit_resolution) / log(2)); | |
$hdr->sub_bucket_half_count_magnitude = (($sub_bucket_count_magnitude > 1) ? $sub_bucket_count_magnitude : 1) - 1; | |
$hdr->unit_magnitude = floor(log((double)$ltv) / log(2)); | |
$hdr->sub_bucket_count = (int)pow(2, ($hdr->sub_bucket_half_count_magnitude + 1)); | |
$hdr->sub_bucket_half_count = $hdr->sub_bucket_count / 2; | |
$hdr->sub_bucket_mask = ($hdr->sub_bucket_count - 1) << $hdr->unit_magnitude; | |
if ($hdr->unit_magnitude + $hdr->sub_bucket_half_count_magnitude > 61) { | |
throw new \RuntimeException(); | |
} | |
$hdr->bucket_count = buckets_needed_to_cover_value($hdr->htv, $hdr->sub_bucket_count, $hdr->unit_magnitude); | |
$hdr->counts_len = ($hdr->bucket_count + 1) * ($hdr->sub_bucket_count / 2); | |
} | |
function buckets_needed_to_cover_value($value, $sub_bucket_count, $unit_magnitude) : int { | |
$smallest_untrackable_value = ($sub_bucket_count) << $unit_magnitude; | |
$buckets_needed = 1; | |
while ($smallest_untrackable_value <= $value) { | |
if ($smallest_untrackable_value > PHP_INT_MAX / 2) { | |
return $buckets_needed + 1; | |
} | |
$smallest_untrackable_value <<= 1; | |
$buckets_needed++; | |
} | |
return $buckets_needed; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment