Last active
August 29, 2015 14:01
-
-
Save tartakynov/7fd547c9a11a3493d705 to your computer and use it in GitHub Desktop.
Calculates required number of Hadoop data nodes on first year
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from math import ceil | |
from tabulate import tabulate | |
GiB = 1024 | |
TiB = 1048576 | |
def main(): | |
""" | |
Calculates cluster growth plan based on numbers listed below | |
""" | |
dailyIngest = 30 * GiB # average daily ingest rate | |
replication = 3 # replication factor (by default 3) | |
reserve = 0.25 # MapReduce temp space reserve (usually 1/3 or 1/4) | |
nodeStorage = 6 * TiB # amount of disk space per node | |
monthlyGrowthRates = [0, 0.05, 0.10, 0.15] # growth of ingest rate per month | |
results = [] | |
m = (dailyIngest * replication * 365 / 12.0) | |
for r in monthlyGrowthRates: | |
s = m * (12 if (r == 0) else (pow(1 + r, 12) - 1) / ((1 + r) - 1)) | |
total = s / (1 - reserve) | |
results.append([r, humanReadableSize(total), ceil(total / nodeStorage)]) | |
print tabulate(results, headers=["Monthly growth rate", "Total amount of space", "Number of data nodes"]) | |
def humanReadableSize(num): | |
for x in ['MiB', 'GiB', 'TiB', 'PiB', 'EiB']: | |
if num < 1024.0: | |
return "%3.1f%s" % (num, x) | |
num /= 1024.0 | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Let's assume that you're planning to build a cluster with 6 TiB data nodes.
If you expect to store 30 GiB daily on average and amount of incoming data is expected to grow 5% per month, then you would need min 10 nodes on first year (with all other options set to default).