/btrfs-smr-balance.py
Last active May 5, 2020
btrfs-smr-balance.py
| #!/usr/bin/env python3 | |
| # The goal of this is to gradually balance a btrfs filesystem which contains DM-SMR drives. | |
| # Such drive are described in detail at https://www.usenix.org/node/188434 | |
| # A normal drive should be able to balance a single 1GB chunk in under 30s. | |
| # Such a stripe would normally be written directly to the shingled blocks, but in the case | |
| # it was cached, it would take roughly 100s to clean. | |
| # So our heuristic here is: | |
| # * balance two chunks from the drive with the amount of unallocated space. | |
| # * if it took longer than 60s, increase the per-chunk sleep interval | |
| # * otherwise, decrease the per-chunk sleep interval | |
| # * keep going until the stdev of the drive unallocated space is below 10GB | |
| from functools import lru_cache | |
| import logging | |
| import statistics | |
| import subprocess | |
| import sys | |
| import time | |
| logging.basicConfig( | |
| format='%(asctime)s %(levelname)-8s %(message)s', | |
| level=logging.INFO, | |
| datefmt='%Y-%m-%d %H:%M:%S') | |
| FILESYSTEM = '/media/btrfs' | |
| CHUNK_TIMEOUT = 60 # seconds | |
| MAX_SLEEP = 7200 # seconds | |
| STDEV_LIMIT = 10*1024*1024*1024 # bytes | |
| @lru_cache | |
| def fib(n): | |
| if n < 2: | |
| return 1 | |
| return fib(n-2) + fib(n-1) | |
| def sizeof_fmt(num, suffix='B'): | |
| for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: | |
| if abs(num) < 1024.0: | |
| return "%3.1f%s%s" % (num, unit, suffix) | |
| num /= 1024.0 | |
| return "%.1f%s%s" % (num, 'Yi', suffix) | |
| def bal_chunk(): | |
| least_empty_dev_id = None | |
| least_empty_dev_path = None | |
| least_empty_dev_unallocated = float('inf') | |
| free = [] | |
| for line in subprocess.getoutput('btrfs fi show --raw %s | grep devid' % FILESYSTEM).split('\n'): | |
| x = line.split() | |
| devid = x[1] | |
| size = int(x[3]) | |
| used = int(x[5]) | |
| path = x[7] | |
| unallocated = size - used | |
| free.append(unallocated) | |
| if unallocated < least_empty_dev_unallocated: | |
| least_empty_dev_id = devid | |
| least_empty_dev_path = path | |
| least_empty_dev_unallocated = unallocated | |
| stdev = statistics.stdev(free) | |
| if stdev < STDEV_LIMIT: | |
| logging.info('Unallocated space stdev %s is below %s, exiting...' % (sizeof_fmt(stdev), sizeof_fmt(STDEV_LIMIT))) | |
| sys.exit() | |
| else: | |
| logging.info('Unallocated space stdev %s is above %s, continuing...' % (sizeof_fmt(stdev), sizeof_fmt(STDEV_LIMIT))) | |
| logging.info('Balancing the least empty device: %s with %s unallocated' % (least_empty_dev_path, sizeof_fmt(least_empty_dev_unallocated))) | |
| cmd = 'btrfs balance start -ddevid=%s,limit=2 %s' % (least_empty_dev_id, FILESYSTEM) | |
| ret, out = subprocess.getstatusoutput(cmd) | |
| if ret != 0: | |
| logging.warning(out) | |
| time.sleep(30) | |
| else: | |
| logging.info(out) | |
| def fib_sleep(index): | |
| seconds = fib(index) | |
| until = time.strftime("%H:%M:%S", time.localtime(time.time() + seconds)) | |
| logging.info("Sleeping %ds until %s" % (seconds, until)) | |
| time.sleep(seconds) | |
| backoff = 0 | |
| while True: | |
| start = time.time() | |
| bal_chunk() | |
| duration = time.time() - start | |
| yeet = "Ouch!" if duration > CHUNK_TIMEOUT else "Nice!" | |
| logging.info("%s Last chunk took %ds" % (yeet, duration)) | |
| if (duration > CHUNK_TIMEOUT): | |
| if fib(backoff + 1) < MAX_SLEEP: backoff += 1 | |
| fib_sleep(backoff) | |
| else: | |
| if backoff > 1: backoff -= 2 # that's optimism | |
| elif backoff > 0: backoff -= 1 | |
| fib_sleep(backoff) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment