Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@dvanders
Last active May 5, 2020 15:08
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dvanders/c15d490ae380bcf4220a437b18a32f04 to your computer and use it in GitHub Desktop.
Save dvanders/c15d490ae380bcf4220a437b18a32f04 to your computer and use it in GitHub Desktop.
btrfs-smr-balance.py
#!/usr/bin/env python3
# The goal of this is to gradually balance a btrfs filesystem which contains DM-SMR drives.
# Such drive are described in detail at https://www.usenix.org/node/188434
# A normal drive should be able to balance a single 1GB chunk in under 30s.
# Such a stripe would normally be written directly to the shingled blocks, but in the case
# it was cached, it would take roughly 100s to clean.
# So our heuristic here is:
# * balance two chunks from the drive with the amount of unallocated space.
# * if it took longer than 60s, increase the per-chunk sleep interval
# * otherwise, decrease the per-chunk sleep interval
# * keep going until the stdev of the drive unallocated space is below 10GB
from functools import lru_cache
import logging
import statistics
import subprocess
import sys
import time
logging.basicConfig(
format='%(asctime)s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
FILESYSTEM = '/media/btrfs'
CHUNK_TIMEOUT = 60 # seconds
MAX_SLEEP = 7200 # seconds
STDEV_LIMIT = 10*1024*1024*1024 # bytes
@lru_cache
def fib(n):
if n < 2:
return 1
return fib(n-2) + fib(n-1)
def sizeof_fmt(num, suffix='B'):
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
def bal_chunk():
least_empty_dev_id = None
least_empty_dev_path = None
least_empty_dev_unallocated = float('inf')
free = []
for line in subprocess.getoutput('btrfs fi show --raw %s | grep devid' % FILESYSTEM).split('\n'):
x = line.split()
devid = x[1]
size = int(x[3])
used = int(x[5])
path = x[7]
unallocated = size - used
free.append(unallocated)
if unallocated < least_empty_dev_unallocated:
least_empty_dev_id = devid
least_empty_dev_path = path
least_empty_dev_unallocated = unallocated
stdev = statistics.stdev(free)
if stdev < STDEV_LIMIT:
logging.info('Unallocated space stdev %s is below %s, exiting...' % (sizeof_fmt(stdev), sizeof_fmt(STDEV_LIMIT)))
sys.exit()
else:
logging.info('Unallocated space stdev %s is above %s, continuing...' % (sizeof_fmt(stdev), sizeof_fmt(STDEV_LIMIT)))
logging.info('Balancing the least empty device: %s with %s unallocated' % (least_empty_dev_path, sizeof_fmt(least_empty_dev_unallocated)))
cmd = 'btrfs balance start -ddevid=%s,limit=2 %s' % (least_empty_dev_id, FILESYSTEM)
ret, out = subprocess.getstatusoutput(cmd)
if ret != 0:
logging.warning(out)
time.sleep(30)
else:
logging.info(out)
def fib_sleep(index):
seconds = fib(index)
until = time.strftime("%H:%M:%S", time.localtime(time.time() + seconds))
logging.info("Sleeping %ds until %s" % (seconds, until))
time.sleep(seconds)
backoff = 0
while True:
start = time.time()
bal_chunk()
duration = time.time() - start
yeet = "Ouch!" if duration > CHUNK_TIMEOUT else "Nice!"
logging.info("%s Last chunk took %ds" % (yeet, duration))
if (duration > CHUNK_TIMEOUT):
if fib(backoff + 1) < MAX_SLEEP: backoff += 1
fib_sleep(backoff)
else:
if backoff > 1: backoff -= 2 # that's optimism
elif backoff > 0: backoff -= 1
fib_sleep(backoff)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment