Skip to content

Instantly share code, notes, and snippets.

@clayg
Created July 1, 2014 22:57
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save clayg/2fb0b467d71a8afd3670 to your computer and use it in GitHub Desktop.
Save clayg/2fb0b467d71a8afd3670 to your computer and use it in GitHub Desktop.
"""
This script is about telling want you need *today* - not what you want to hear
%(prog)s <number-of-devices-you-have-in-the-real-hardware-you-are-going-to-deploy>
"""
import os
import sys
from math import log, modf, floor, ceil
REPLICAS = 3.0
MAXIMUM_NUMBER_OF_DIRECTORIES_YOU_WANT_IN_TOP_LEVEL_DIR = 7000
MAXIMUM_NUMBER_OF_DIRECTORIES_IN_TOP_LEVEL_DIR = 15000
MIDDLE_OF_THE_ROAD_PARTS_PER_DISK = (
MAXIMUM_NUMBER_OF_DIRECTORIES_YOU_WANT_IN_TOP_LEVEL_DIR +
MAXIMUM_NUMBER_OF_DIRECTORIES_IN_TOP_LEVEL_DIR
) / 2
MINIMUM_NUMBER_OF_PARTS_PER_DISK = 100
def main():
prog = sys.argv.pop(0)
try:
device_count = int(sys.argv.pop(0))
except (IndexError, ValueError):
sys.exit(__doc__ % {'prog': prog})
as_big_as_you_really_want_to_go = int(round(log(
(MAXIMUM_NUMBER_OF_DIRECTORIES_YOU_WANT_IN_TOP_LEVEL_DIR *
device_count) / REPLICAS, 2)))
as_big_as_you_can_really_support = int(round(log(
(MAXIMUM_NUMBER_OF_DIRECTORIES_IN_TOP_LEVEL_DIR *
device_count) / REPLICAS, 2)))
number_of_parts = int(round(
REPLICAS * (2 ** as_big_as_you_really_want_to_go) / device_count
))
part_delta = MIDDLE_OF_THE_ROAD_PARTS_PER_DISK - number_of_parts
print '@%s' % as_big_as_you_really_want_to_go, 'number of partitions per device', number_of_parts
number_of_parts_max = int(round(
REPLICAS * (2 ** as_big_as_you_can_really_support) / device_count
))
part_delta_max = number_of_parts_max - MIDDLE_OF_THE_ROAD_PARTS_PER_DISK
print '@%s' % as_big_as_you_can_really_support, 'number of partitions per device', number_of_parts_max
if part_delta < part_delta_max:
right_answer = as_big_as_you_really_want_to_go
else:
right_answer = as_big_as_you_can_really_support
print right_answer
max_devices = REPLICAS * (2 ** right_answer) / 100
thin_nodes = max_devices / 12
dense_nodes = max_devices / 24
small_drives = max_devices * REPLICAS
big_drives = max_devices * 5
print '%s drives in %s-%s servers storing %s-%s TBs' % tuple(int(x) for x in
(max_devices,
dense_nodes,
thin_nodes,
small_drives,
big_drives))
if __name__ == "__main__":
sys.exit(main())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment