Skip to content

Instantly share code, notes, and snippets.

@lukpueh
Last active December 18, 2023 12:28
Show Gist options
  • Save lukpueh/f6313234aa54aea8f56a9fd1181d1550 to your computer and use it in GitHub Desktop.
Save lukpueh/f6313234aa54aea8f56a9fd1181d1550 to your computer and use it in GitHub Desktop.
TUF hash bin delegation (optimal bin numbers)
"""TUF hash bin delegation (optimal bin numbers)
Calculate the optimal number of bins for given number of target files.
Problem description
===================
Given 'targets_count', 'bin_meta_size' and 'target_meta_size', I want to know
the optimal 'bin_count', for which 'snapshot_size' plus 'bin_size' are minimal.
Constraints
-----------
bin_count must be a power of 2
snapshot_size = bin_count * bin_meta_size
bin_size = ceil(targets_count / bin_count) * target_meta_size
Result (i.e. target count thresholds, where optimal bin count changes)
======
targets #: 0, bin #: 2
targets #: 3, bin #: 4
targets #: 5, bin #: 8
targets #: 9, bin #: 16
targets #: 49, bin #: 32
targets #: 225, bin #: 64
targets #: 961, bin #: 128
targets #: 3,969, bin #: 256
targets #: 15,617, bin #: 512
targets #: 62,977, bin #: 1,024
targets #: 250,881, bin #: 2,048
targets #: 1,005,569, bin #: 4,096
targets #: 4,026,369, bin #: 8,192
"""
import math
MAX_TARGETS_COUNT = 10_000_000
TARGET_META_SIZE = 250
BIN_META_SIZE = 30
MAX_BIN_EXPONENT = 15
def get_optimal_bin_count(targets_count):
totals = {}
for bin_exponent in range(1, MAX_BIN_EXPONENT + 1):
bin_count = 2**bin_exponent
targets_count_per_bin = math.ceil(targets_count / bin_count)
snapshot_size = bin_count * BIN_META_SIZE
bin_size = targets_count_per_bin * TARGET_META_SIZE
total = snapshot_size + bin_size
totals[total] = bin_exponent
optimal_exponent = totals[min(totals.keys())]
if optimal_exponent == MAX_BIN_EXPONENT:
raise Exception("optimum may lie beyond tested bin counts,"
"consider increasing MAX_BIN_EXPONENT")
return 2**optimal_exponent
def main():
"""Print optimal number of bins for a range of number of targets. """
last_optimal = 0
for targets_count in range(0, MAX_TARGETS_COUNT):
optimal = get_optimal_bin_count(targets_count)
if optimal > last_optimal:
print(f"targets #: {targets_count:,}, bin #: {optimal:,}")
last_optimal = optimal
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment