Skip to content

Instantly share code, notes, and snippets.

@kgorskowski
Created July 19, 2023 13:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kgorskowski/013c9fc52b128bd9b1ca29c30cec1ea8 to your computer and use it in GitHub Desktop.
Save kgorskowski/013c9fc52b128bd9b1ca29c30cec1ea8 to your computer and use it in GitHub Desktop.
kafka broker sizing calculator based on available storage
def calculate_retention_settings(storage_per_broker, brokers, partitions, replication, data_rate):
# Calculate total available storage in the cluster
# consider the lifecycle management cycle as overhead (runs per default every 5 minutes)
lc_overhead = data_rate * 1024 * 1024 * 60 * 5
# total storage in GB
total_storage = storage_per_broker * brokers
# total storage in bytes
total_storage_bytes = total_storage * (1024*1024*1024)
# usable bytes minus 20% and lifecycle overhead
usable_bytes = (total_storage_bytes * 0.8) - lc_overhead
# Calculate the desired retention bytes value
retention_bytes = usable_bytes / (partitions * replication)
# bytes per second based on data rate in mb/s
bytes_per_second = data_rate * 1000 * 1000
# Calculate the maximum retention time in ms for given storage
retention_seconds = int(usable_bytes / (data_rate * 1024 * 1024))
# Print the calculated settings
print(f"Available storage for the broker: {total_storage} GB")
print(f"recommended retention bytes settings for topic: {retention_bytes} bytes / {int(retention_bytes/(1024*1024*1024))} GB")
print(f"This leaves {total_storage - (int(retention_bytes/(1024*1024*1024)* partitions * replication))} GB overhead storage")
print(f"Max. possible Retention time based on storage limit: {retention_seconds} seconds / {int(retention_seconds / 60) } minutes")
# Input variables
storage_per_broker = 30 # Available storage per broker in GB
brokers = 3 # Number of brokers in the cluster
partitions = 6 # Number of partitions for the topic
replication = 2 # Level of replication for the topic
data_rate = 15 # Expected data rate in MB per second
# Calculate and print the maximum retention settings
calculate_retention_settings(storage_per_broker, brokers, partitions, replication, data_rate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment