Skip to content

Instantly share code, notes, and snippets.

@stephanGarland
Last active April 19, 2024 11:53
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save stephanGarland/cd6a5c667e8406cf9cfb661a12399ddd to your computer and use it in GitHub Desktop.
Save stephanGarland/cd6a5c667e8406cf9cfb661a12399ddd to your computer and use it in GitHub Desktop.
Designed for use with Ansible, but works on its own as well – calculate the tricky parts of InnoDB's buffer pool
#!/usr/bin/env python3
"""
Calculates various parameters for the InnoDB buffer pool based on a simple input.
The InnoDB engine is extremely strict about how its buffer pool can be sized.
Ref: https://dev.mysql.com/doc/refman/8.0/en/innodb-parameters.html#sysvar_innodb_buffer_pool_chunk_size
Other than size restrictions (which are unlikely to be violated in normal practice), there are
complicated relationships between the three main variables that must be met, plus some
best practices to be observed. Getting these three correct can be challenging, hence this calculator.
innodb_buffer_pool_chunk_size:
Description: This is the least likely to ever be used during normal operation, but it still must be observed.
It defines the size of chunks to add or remove to the buffer pool during online resizing operations.
Default: 134217728 bytes (128 MiB)
Minimum: 1048576 bytes (1 MiB)
Maximum: innodb_buffer_pool_size / innodb_buffer_pool_instances
Increment: 1048576 bytes (1 MiB)
Dynamic: False
Recommendation: num_chunks (innodb_buffer_pool_size / innodb_buffer_pool_chunk_size) <= 1000
innodb_buffer_pool_instances:
Description: This is the number of instances the buffer pool is split into. Each instance gets its own
flush list, LRU, mutex, etc. This should generally be raised as high as reasonable for better throughput.
Default: innodb_buffer_pool_size < 1 GiB ? 1 : 8
Minimum: 1
Maximum: 64
Increment: 1
Dynamic: False
Recommendation: As high as possible while keeping each instance size
(innodb_buffer_pool_size / innodb_buffer_pool_instances) >= 1 GiB
innodb_buffer_pool_size:
Description: This is the size of the buffer pool itself. Unlike Postgres, this is the sole pool of memory
available to MySQL (Postgres uses a combination of internal and OS-level memory caches). As such,
the recommended conservative starting value for this on a dedicated instance is 75-80% of system memory,
but can go higher with caution. The total size will be approximately 110% of the specified
buffer pool's size due to overhead control structures, and enough must be left for other MySQL operations,
as well as the OS's own needs (and those of any other programs operating on the server).
The buffer pool must be equal to or a multiple of innodb_buffer_pool_chunk_size * innodb_buffer_pool_instances,
and all other requirements for other variables must also still be met.
Default: 134217728 bytes (128 MiB)
Mininum: 5242880 bytes (5 MiB)
Maximum: (2^64) - 1 bytes (16 EiB - 1 byte)
Increment: innodb_buffer_pool_chunk_size
Dynamic: true
Recommendation: As high as possible without being OOMkilled
"""
import argparse
from collections import namedtuple
def make_args() -> argparse.Namespace:
parser = argparse.ArgumentParser()
parser.add_argument(
"-c",
"--chunk-size-bytes",
help="size of innodb buffer pool chunks in bytes",
type=int,
)
parser.add_argument(
"-i",
"--instance-count",
help="number of innodb buffer pool instances",
type=int,
)
parser.add_argument(
"-f",
"--force",
help="force exact values to be used as given",
action="store_true",
)
parser.add_argument(
"-m",
"--sys-memory-mb",
help="amount of system memory in mb as reported by ansible",
type=int,
)
parser.add_argument(
"-p",
"--pool-size-percent-of-mem",
help="size of buffer pool as percent of system memory",
type=float,
)
parser.add_argument(
"-s",
"--pool-size-bytes",
help="size of innodb buffer pool in bytes",
type=int,
)
return parser.parse_args()
def pool_is_multiple(pool_size: int, chunk_size: int, instances: int) -> bool:
"""Validates that the buffer pool is a multiple of its consitituent parts."""
return pool_size % (chunk_size * instances) == 0
def instances_gte_one_gib(pool_size: int, instances: int) -> bool:
"""Validates that each instance is at least 1 GiB."""
return (pool_size / instances) / 2**30 >= 1
def instances_count_allowed(instances: int) -> bool:
"""Validates that the number of instances is within the allowable range."""
return 0 < instances < 65
def pool_size_allowed(pool_size: int) -> bool:
"""Validates that the pool size is wthin the allowable range."""
return 5 * 2**20 < pool_size < 2**64
def chunk_size_allowed(chunk_size: int, pool_size: int, instances: int) -> bool:
"""Validates that the chunk size is valid."""
return 2**20 < chunk_size < (pool_size / instances)
def chunk_count_optimal(chunk_size: int, pool_size: int) -> bool:
"""Validates that the number of chunks is under the recommended limit."""
return (pool_size / chunk_size) <= 1000
def all_checks_true(
pool_size_bytes: int, chunk_size_bytes: int, instance_count: int
) -> bool:
"""Validates that all other checks are true."""
return all(
[
pool_is_multiple(pool_size_bytes, chunk_size_bytes, instance_count),
instances_gte_one_gib(pool_size_bytes, instance_count),
instances_count_allowed(instance_count),
pool_size_allowed(pool_size_bytes),
chunk_size_allowed(chunk_size_bytes, pool_size_bytes, instance_count),
chunk_count_optimal(chunk_size_bytes, pool_size_bytes),
]
)
def get_buffer_pool_bytes(pct_of_mem: float, sys_mem_mb: int) -> int:
"""Returns a buffer pool size that's rounded to the floor GiB"""
if 1 < pct_of_mem <= 100:
pct_of_mem /= 100
elif pct_of_mem > 100:
raise SystemExit("ERROR: percent of mem cannot exceed 100")
buf_pool_raw = sys_mem_mb * pct_of_mem
buf_pool_mod_gib = buf_pool_raw % 1024
return int((buf_pool_raw - buf_pool_mod_gib) * 2**20)
if __name__ == "__main__":
args = make_args()
_chunk_size_bytes = 134217728 # 128 MiB, default value
if args.force and args.force.lower() in ["on", "true", "yes"]:
args.force = True
else:
args.force = False
if args.pool_size_percent_of_mem:
_pool_size_bytes = get_buffer_pool_bytes(
args.pool_size_percent_of_mem, args.sys_memory_mb
)
elif not all([args.pool_size_bytes, args.chunk_size_bytes]):
raise SystemExit(
"ERROR: must define pool size and chunk size, or a percentage of system memory"
)
elif not args.force:
# ensure it's aligned to gib boundaries
_pool_size_bytes = get_buffer_pool_bytes(1.0, args.pool_size_bytes)
else: # assume the user knows what they're doing
_pool_size_bytes = args.pool_size_bytes
_chunk_size_bytes = args.chunk_size_bytes
_instance_count = args.instance_count
if not args.force:
if _pool_size_bytes >= 8 * 2**30:
_instance_count = 8
elif _pool_size_bytes >= 2 * 2**30:
_instance_count = 2
else:
_instance_count = 1
# calculate multiples of chunk size, up to a limit of half the buffer pool, in 64 MiB increments
chunk_size_mults = range(_chunk_size_bytes, int(_pool_size_bytes / 2), 64 * 2**20)
Candidate = namedtuple(
"Candidate",
["chunk_size_mib", "chunk_count", "instance_count", "instance_size_mib"],
)
candidates = set()
chunk_size_bytes = _chunk_size_bytes
pool_size_bytes = _pool_size_bytes
# don't let this go on indefinitely
while _pool_size_bytes > pool_size_bytes * 0.95:
for i in range(64):
if _instance_count > 64:
break
for j in range(len(chunk_size_mults)):
if all_checks_true(
_pool_size_bytes, _chunk_size_bytes, _instance_count
):
candidates.add(
Candidate(
_chunk_size_bytes // 2**20,
_pool_size_bytes // _chunk_size_bytes,
_instance_count,
(_pool_size_bytes // _instance_count) // 2**20,
)
)
# first, try adding 1 MiB
_chunk_size_bytes += 2**20
# reset if we're out of the inner loop
_chunk_size_bytes = chunk_size_bytes
# try adding another instance
_instance_count += 1
# subtract 1 MiB from the pool size
_pool_size_bytes -= 2**20
# prefer highest number of buffer pool instances
cand = sorted(candidates, key=lambda x: x.instance_count, reverse=True)
print(cand[0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment