tartakynov/hgp_monthly.py

## hgp_monthly.py
from math import ceil
from tabulate import tabulate

GiB = 1024
TiB = 1048576

def main():
    """
    Calculates cluster growth plan based on numbers listed below
    """
    dailyIngest = 30 * GiB # average daily ingest rate
    replication = 3        # replication factor (by default 3)
    reserve     = 0.25     # MapReduce temp space reserve (usually 1/3 or 1/4)
    nodeStorage = 6 * TiB  # amount of disk space per node
    growthRate = 0.05      # growth of ingest rate per month

    results = []
    m = (dailyIngest * replication * 365 / 12.0)
    for month in range(1, 13):
        s = m * (month if (growthRate == 0) else (pow(1 + growthRate, month) - 1) / ((1 + growthRate) - 1))
        total = s / (1 - reserve)
        results.append([month, humanReadableSize(total), ceil(total / nodeStorage)])
    print tabulate(results, headers=["Month", "Total amount of space", "Number of data nodes"])

def humanReadableSize(num):
    for x in ['MiB', 'GiB', 'TiB', 'PiB', 'EiB']:
        if num < 1024.0:
            return "%3.1f%s" % (num, x)
        num /= 1024.0

if __name__ == "__main__":
    main()
	from math import ceil
	from tabulate import tabulate

	GiB = 1024
	TiB = 1048576

	def main():
	"""
	Calculates cluster growth plan based on numbers listed below
	"""
	dailyIngest = 30 * GiB # average daily ingest rate
	replication = 3 # replication factor (by default 3)
	reserve = 0.25 # MapReduce temp space reserve (usually 1/3 or 1/4)
	nodeStorage = 6 * TiB # amount of disk space per node
	growthRate = 0.05 # growth of ingest rate per month

	results = []
	m = (dailyIngest * replication * 365 / 12.0)
	for month in range(1, 13):
	s = m * (month if (growthRate == 0) else (pow(1 + growthRate, month) - 1) / ((1 + growthRate) - 1))
	total = s / (1 - reserve)
	results.append([month, humanReadableSize(total), ceil(total / nodeStorage)])
	print tabulate(results, headers=["Month", "Total amount of space", "Number of data nodes"])

	def humanReadableSize(num):
	for x in ['MiB', 'GiB', 'TiB', 'PiB', 'EiB']:
	if num < 1024.0:
	return "%3.1f%s" % (num, x)
	num /= 1024.0

	if __name__ == "__main__":
	main()
Month	Total amount of space	Min number of data nodes
1	3.6 TiB	1
2	7.3 TiB	2
3	11.2 TiB	2
4	15.4 TiB	3
5	19.7 TiB	4
6	24.2 TiB	5
7	29.0 TiB	5
8	34.0 TiB	6
9	39.3 TiB	7
10	44.8 TiB	8
11	50.6 TiB	9
12	56.7 TiB	10