stephanGarland/calc_innodb_buf_pool.py

## calc_innodb_buf_pool.py
#!/usr/bin/env python3

# LICENSE
# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
# If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

# Copyright 2024 Stephan Garland

"""
Calculates various parameters for the InnoDB buffer pool based on a simple input.
If system memory isn't provided ([`-m`, `--sys-memory-mb`]), the tool attempts to
get this information via `sysctl hw.memsize`.

The InnoDB engine is extremely strict about how its buffer pool can be sized.
Ref: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_buffer_pool_chunk_size
Other than size restrictions (which are unlikely to be violated in normal practice), there are
complicated relationships between the three main variables that must be met, plus some
best practices to be observed. Getting these three correct can be challenging, hence this calculator.

innodb_buffer_pool_chunk_size:
  Description: This is the least likely to ever be used during normal operation, but it still must be observed.
    It defines the size of chunks to add or remove to the buffer pool during online resizing operations.

  Default: 134217728 bytes (128 MiB)
  Minimum: 1048576 bytes (1 MiB)
  Maximum: innodb_buffer_pool_size / innodb_buffer_pool_instances
  Increment: 1048576 bytes (1 MiB)
  Dynamic: False
  Recommendation: num_chunks (innodb_buffer_pool_size / innodb_buffer_pool_chunk_size) <= 1000

innodb_buffer_pool_instances:
  Description: This is the number of instances the buffer pool is split into. Each instance gets its own
    flush list, LRU, mutex, etc. This should generally be raised as high as reasonable for better throughput.

   Default: innodb_buffer_pool_size < 1 GiB ? 1 : 8
   Minimum: 1
   Maximum: 64
   Increment: 1
   Dynamic: False
   Recommendation: As high as possible while keeping each instance size
     (innodb_buffer_pool_size / innodb_buffer_pool_instances) >= 1 GiB

 innodb_buffer_pool_size:
   Description: This is the size of the buffer pool itself. Unlike Postgres, this is the sole pool of memory
     available to MySQL (Postgres uses a combination of internal and OS-level memory caches). As such,
     the recommended conservative starting value for this on a dedicated instance is 75-80% of system memory,
     but can go higher with caution. The total size will be approximately 110% of the specified
     buffer pool's size due to overhead control structures, and enough must be left for other MySQL operations,
     as well as the OS's own needs (and those of any other programs operating on the server).
     The buffer pool must be equal to or a multiple of innodb_buffer_pool_chunk_size * innodb_buffer_pool_instances,
     and all other requirements for other variables must also still be met.

   Default: 134217728 bytes (128 MiB)
   Mininum: 5242880 bytes (5 MiB)
   Maximum: (2^64) - 1 bytes (16 EiB - 1 byte)
   Increment: innodb_buffer_pool_chunk_size
   Dynamic: true
   Recommendation: As high as possible without being OOMkilled
"""

import argparse
import subprocess
from collections import namedtuple


def make_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        "--chunk-size-bytes",
        help="size of innodb buffer pool chunks in bytes",
        type=int,
    )
    parser.add_argument(
        "-i",
        "--instance-count",
        help="number of innodb buffer pool instances",
        type=int,
    )
    parser.add_argument(
        "-f",
        "--force",
        help="force exact values to be used as given",
        action="store_true",
    )
    parser.add_argument(
        "-m",
        "--sys-memory-mb",
        help="amount of system memory in mb as reported by ansible, sysctl, etc.",
        type=int,
    )
    parser.add_argument(
        "-p",
        "--pool-size-percent-of-mem",
        help="size of buffer pool as percent of system memory",
        type=float,
    )
    parser.add_argument(
        "-s",
        "--pool-size-bytes",
        help="size of innodb buffer pool in bytes",
        type=int,
    )

    return parser.parse_args()


def pool_is_multiple(pool_size: int, chunk_size: int, instances: int) -> bool:
    """Validates that the buffer pool is a multiple of its consitituent parts."""
    return pool_size % (chunk_size * instances) == 0


def instances_gte_one_gib(pool_size: int, instances: int) -> bool:
    """Validates that each instance is at least 1 GiB."""
    return (pool_size / instances) / 2**30 >= 1


def instances_count_allowed(instances: int) -> bool:
    """Validates that the number of instances is within the allowable range."""
    return 0 < instances < 65


def pool_size_allowed(pool_size: int) -> bool:
    """Validates that the pool size is wthin the allowable range."""
    return 5 * 2**20 < pool_size < 2**64


def chunk_size_allowed(chunk_size: int, pool_size: int, instances: int) -> bool:
    """Validates that the chunk size is valid."""
    return 2**20 < chunk_size < (pool_size / instances)


def chunk_count_optimal(chunk_size: int, pool_size: int) -> bool:
    """Validates that the number of chunks is under the recommended limit."""
    return (pool_size / chunk_size) <= 1000


def all_checks_true(
    pool_size_bytes: int, chunk_size_bytes: int, instance_count: int
) -> bool:
    """Validates that all other checks are true."""
    return all(
        [
            pool_is_multiple(pool_size_bytes, chunk_size_bytes, instance_count),
            instances_gte_one_gib(pool_size_bytes, instance_count),
            instances_count_allowed(instance_count),
            pool_size_allowed(pool_size_bytes),
            chunk_size_allowed(chunk_size_bytes, pool_size_bytes, instance_count),
            chunk_count_optimal(chunk_size_bytes, pool_size_bytes),
        ]
    )


def get_buffer_pool_bytes(pct_of_mem: float, sys_mem_mb: int) -> int:
    """Returns a buffer pool size that's rounded to the floor GiB"""
    if 1 < pct_of_mem <= 100:
        pct_of_mem /= 100
    elif pct_of_mem > 100:
        raise SystemExit("ERROR: percent of mem cannot exceed 100")
    buf_pool_raw = sys_mem_mb * pct_of_mem
    buf_pool_mod_gib = buf_pool_raw % 1024

    return int((buf_pool_raw - buf_pool_mod_gib) * 2**20)


if __name__ == "__main__":
    args = make_args()
    _chunk_size_bytes = 134217728  # 128 MiB, default value
    if args.force and args.force.lower() in ["on", "true", "yes"]:
        args.force = True
    else:
        args.force = False
    if not args.sys_memory_mb:
        sysctl_mem = subprocess.run(
            ["sysctl", "hw.memsize"], capture_output=True, encoding="utf-8"
        )
        if sysctl_mem.returncode:
            raise SystemExit(
                "ERROR: unable to determine system memory amount – please provide with `-m`"
            )
        args.sys_memory_mb = int(sysctl_mem.stdout.split(" ")[-1].rstrip("\n")) // 2**20
    if args.pool_size_percent_of_mem:
        _pool_size_bytes = get_buffer_pool_bytes(
            args.pool_size_percent_of_mem, args.sys_memory_mb
        )
    elif not all([args.pool_size_bytes, args.chunk_size_bytes]):
        raise SystemExit(
            "ERROR: must define pool size and chunk size, or a percentage of system memory"
        )
    elif not args.force:
        # ensure it's aligned to gib boundaries
        _pool_size_bytes = get_buffer_pool_bytes(1.0, args.pool_size_bytes)
    else:  # assume the user knows what they're doing
        _pool_size_bytes = args.pool_size_bytes
        _chunk_size_bytes = args.chunk_size_bytes
        _instance_count = args.instance_count

    if not args.force:
        if _pool_size_bytes >= 8 * 2**30:
            _instance_count = 8
        elif _pool_size_bytes >= 2 * 2**30:
            _instance_count = 2
        else:
            _instance_count = 1

    # calculate multiples of chunk size, up to a limit of half the buffer pool, in 64 MiB increments
    chunk_size_mults = range(_chunk_size_bytes, int(_pool_size_bytes / 2), 64 * 2**20)
    Candidate = namedtuple(
        "Candidate",
        ["chunk_size_mib", "chunk_count", "instance_count", "instance_size_mib"],
    )
    candidates = set()
    chunk_size_bytes = _chunk_size_bytes
    pool_size_bytes = _pool_size_bytes
    # don't let this go on indefinitely
    while _pool_size_bytes > pool_size_bytes * 0.95:
        for i in range(64):
            if _instance_count > 64:
                break
            for j in range(len(chunk_size_mults)):
                if all_checks_true(
                    _pool_size_bytes, _chunk_size_bytes, _instance_count
                ):
                    candidates.add(
                        Candidate(
                            _chunk_size_bytes // 2**20,
                            _pool_size_bytes // _chunk_size_bytes,
                            _instance_count,
                            (_pool_size_bytes // _instance_count) // 2**20,
                        )
                    )
                # first, try adding 1 MiB
                _chunk_size_bytes += 2**20
            # reset if we're out of the inner loop
            _chunk_size_bytes = chunk_size_bytes
            # try adding another instance
            _instance_count += 1
        # subtract 1 MiB from the pool size
        _pool_size_bytes -= 2**20

    # prefer highest number of buffer pool instances
    cand = sorted(candidates, key=lambda x: x.instance_count, reverse=True)
    innodb_prefix = "innodb_buffer_pool"
    mysql_vars = []
    mysql_vars.append(f"{innodb_prefix}_chunk_size = {cand[0].chunk_size_mib * 2**20}")
    mysql_vars.append(f"{innodb_prefix}_instances = {cand[0].instance_count}")
    mysql_vars.append(
        f"{innodb_prefix}_size = {cand[0].instance_size_mib * cand[0].instance_count * 2**20}"
    )

    print("Place the following in your `my.cnf` file\n")
    print("\n".join(mysql_vars))
	#!/usr/bin/env python3

	# LICENSE
	# This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
	# If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

	# Copyright 2024 Stephan Garland

	"""
	Calculates various parameters for the InnoDB buffer pool based on a simple input.
	If system memory isn't provided ([`-m`, `--sys-memory-mb`]), the tool attempts to
	get this information via `sysctl hw.memsize`.

	The InnoDB engine is extremely strict about how its buffer pool can be sized.
	Ref: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_buffer_pool_chunk_size
	Other than size restrictions (which are unlikely to be violated in normal practice), there are
	complicated relationships between the three main variables that must be met, plus some
	best practices to be observed. Getting these three correct can be challenging, hence this calculator.

	innodb_buffer_pool_chunk_size:
	Description: This is the least likely to ever be used during normal operation, but it still must be observed.
	It defines the size of chunks to add or remove to the buffer pool during online resizing operations.

	Default: 134217728 bytes (128 MiB)
	Minimum: 1048576 bytes (1 MiB)
	Maximum: innodb_buffer_pool_size / innodb_buffer_pool_instances
	Increment: 1048576 bytes (1 MiB)
	Dynamic: False
	Recommendation: num_chunks (innodb_buffer_pool_size / innodb_buffer_pool_chunk_size) <= 1000

	innodb_buffer_pool_instances:
	Description: This is the number of instances the buffer pool is split into. Each instance gets its own
	flush list, LRU, mutex, etc. This should generally be raised as high as reasonable for better throughput.

	Default: innodb_buffer_pool_size < 1 GiB ? 1 : 8
	Minimum: 1
	Maximum: 64
	Increment: 1
	Dynamic: False
	Recommendation: As high as possible while keeping each instance size
	(innodb_buffer_pool_size / innodb_buffer_pool_instances) >= 1 GiB

	innodb_buffer_pool_size:
	Description: This is the size of the buffer pool itself. Unlike Postgres, this is the sole pool of memory
	available to MySQL (Postgres uses a combination of internal and OS-level memory caches). As such,
	the recommended conservative starting value for this on a dedicated instance is 75-80% of system memory,
	but can go higher with caution. The total size will be approximately 110% of the specified
	buffer pool's size due to overhead control structures, and enough must be left for other MySQL operations,
	as well as the OS's own needs (and those of any other programs operating on the server).
	The buffer pool must be equal to or a multiple of innodb_buffer_pool_chunk_size * innodb_buffer_pool_instances,
	and all other requirements for other variables must also still be met.

	Default: 134217728 bytes (128 MiB)
	Mininum: 5242880 bytes (5 MiB)
	Maximum: (2^64) - 1 bytes (16 EiB - 1 byte)
	Increment: innodb_buffer_pool_chunk_size
	Dynamic: true
	Recommendation: As high as possible without being OOMkilled
	"""

	import argparse
	import subprocess
	from collections import namedtuple


	def make_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser()
	parser.add_argument(
	"-c",
	"--chunk-size-bytes",
	help="size of innodb buffer pool chunks in bytes",
	type=int,
	)
	parser.add_argument(
	"-i",
	"--instance-count",
	help="number of innodb buffer pool instances",
	type=int,
	)
	parser.add_argument(
	"-f",
	"--force",
	help="force exact values to be used as given",
	action="store_true",
	)
	parser.add_argument(
	"-m",
	"--sys-memory-mb",
	help="amount of system memory in mb as reported by ansible, sysctl, etc.",
	type=int,
	)
	parser.add_argument(
	"-p",
	"--pool-size-percent-of-mem",
	help="size of buffer pool as percent of system memory",
	type=float,
	)
	parser.add_argument(
	"-s",
	"--pool-size-bytes",
	help="size of innodb buffer pool in bytes",
	type=int,
	)

	return parser.parse_args()


	def pool_is_multiple(pool_size: int, chunk_size: int, instances: int) -> bool:
	"""Validates that the buffer pool is a multiple of its consitituent parts."""
	return pool_size % (chunk_size * instances) == 0


	def instances_gte_one_gib(pool_size: int, instances: int) -> bool:
	"""Validates that each instance is at least 1 GiB."""
	return (pool_size / instances) / 2**30 >= 1


	def instances_count_allowed(instances: int) -> bool:
	"""Validates that the number of instances is within the allowable range."""
	return 0 < instances < 65


	def pool_size_allowed(pool_size: int) -> bool:
	"""Validates that the pool size is wthin the allowable range."""
	return 5 * 220 < pool_size < 264


	def chunk_size_allowed(chunk_size: int, pool_size: int, instances: int) -> bool:
	"""Validates that the chunk size is valid."""
	return 2**20 < chunk_size < (pool_size / instances)


	def chunk_count_optimal(chunk_size: int, pool_size: int) -> bool:
	"""Validates that the number of chunks is under the recommended limit."""
	return (pool_size / chunk_size) <= 1000


	def all_checks_true(
	pool_size_bytes: int, chunk_size_bytes: int, instance_count: int
	) -> bool:
	"""Validates that all other checks are true."""
	return all(
	[
	pool_is_multiple(pool_size_bytes, chunk_size_bytes, instance_count),
	instances_gte_one_gib(pool_size_bytes, instance_count),
	instances_count_allowed(instance_count),
	pool_size_allowed(pool_size_bytes),
	chunk_size_allowed(chunk_size_bytes, pool_size_bytes, instance_count),
	chunk_count_optimal(chunk_size_bytes, pool_size_bytes),
	]
	)


	def get_buffer_pool_bytes(pct_of_mem: float, sys_mem_mb: int) -> int:
	"""Returns a buffer pool size that's rounded to the floor GiB"""
	if 1 < pct_of_mem <= 100:
	pct_of_mem /= 100
	elif pct_of_mem > 100:
	raise SystemExit("ERROR: percent of mem cannot exceed 100")
	buf_pool_raw = sys_mem_mb * pct_of_mem
	buf_pool_mod_gib = buf_pool_raw % 1024

	return int((buf_pool_raw - buf_pool_mod_gib) * 2**20)


	if __name__ == "__main__":
	args = make_args()
	_chunk_size_bytes = 134217728 # 128 MiB, default value
	if args.force and args.force.lower() in ["on", "true", "yes"]:
	args.force = True
	else:
	args.force = False
	if not args.sys_memory_mb:
	sysctl_mem = subprocess.run(
	["sysctl", "hw.memsize"], capture_output=True, encoding="utf-8"
	)
	if sysctl_mem.returncode:
	raise SystemExit(
	"ERROR: unable to determine system memory amount – please provide with `-m`"
	)
	args.sys_memory_mb = int(sysctl_mem.stdout.split(" ")[-1].rstrip("\n")) // 2**20
	if args.pool_size_percent_of_mem:
	_pool_size_bytes = get_buffer_pool_bytes(
	args.pool_size_percent_of_mem, args.sys_memory_mb
	)
	elif not all([args.pool_size_bytes, args.chunk_size_bytes]):
	raise SystemExit(
	"ERROR: must define pool size and chunk size, or a percentage of system memory"
	)
	elif not args.force:
	# ensure it's aligned to gib boundaries
	_pool_size_bytes = get_buffer_pool_bytes(1.0, args.pool_size_bytes)
	else: # assume the user knows what they're doing
	_pool_size_bytes = args.pool_size_bytes
	_chunk_size_bytes = args.chunk_size_bytes
	_instance_count = args.instance_count

	if not args.force:
	if _pool_size_bytes >= 8 * 2**30:
	_instance_count = 8
	elif _pool_size_bytes >= 2 * 2**30:
	_instance_count = 2
	else:
	_instance_count = 1

	# calculate multiples of chunk size, up to a limit of half the buffer pool, in 64 MiB increments
	chunk_size_mults = range(_chunk_size_bytes, int(_pool_size_bytes / 2), 64 * 2**20)
	Candidate = namedtuple(
	"Candidate",
	["chunk_size_mib", "chunk_count", "instance_count", "instance_size_mib"],
	)
	candidates = set()
	chunk_size_bytes = _chunk_size_bytes
	pool_size_bytes = _pool_size_bytes
	# don't let this go on indefinitely
	while _pool_size_bytes > pool_size_bytes * 0.95:
	for i in range(64):
	if _instance_count > 64:
	break
	for j in range(len(chunk_size_mults)):
	if all_checks_true(
	_pool_size_bytes, _chunk_size_bytes, _instance_count
	):
	candidates.add(
	Candidate(
	_chunk_size_bytes // 2**20,
	_pool_size_bytes // _chunk_size_bytes,
	_instance_count,
	(_pool_size_bytes // _instance_count) // 2**20,
	)
	)
	# first, try adding 1 MiB
	_chunk_size_bytes += 2**20
	# reset if we're out of the inner loop
	_chunk_size_bytes = chunk_size_bytes
	# try adding another instance
	_instance_count += 1
	# subtract 1 MiB from the pool size
	_pool_size_bytes -= 2**20

	# prefer highest number of buffer pool instances
	cand = sorted(candidates, key=lambda x: x.instance_count, reverse=True)
	innodb_prefix = "innodb_buffer_pool"
	mysql_vars = []
	mysql_vars.append(f"{innodb_prefix}_chunk_size = {cand[0].chunk_size_mib * 2**20}")
	mysql_vars.append(f"{innodb_prefix}_instances = {cand[0].instance_count}")
	mysql_vars.append(
	f"{innodb_prefix}_size = {cand[0].instance_size_mib * cand[0].instance_count * 2**20}"
	)

	print("Place the following in your `my.cnf` file\n")
	print("\n".join(mysql_vars))