Skip to content

Instantly share code, notes, and snippets.

@digitalsignalperson
Last active February 14, 2024 07:41
Show Gist options
  • Save digitalsignalperson/709e2adf847473c5301d67644d4f4e19 to your computer and use it in GitHub Desktop.
Save digitalsignalperson/709e2adf847473c5301d67644d4f4e19 to your computer and use it in GitHub Desktop.
testing feasibility of zfs native encryption sync back and forth

ref openzfs/zfs#12580

Here's a script showing the predictability of this post-mount extra data that is created. When the previous snapshot contained 1024 files, the extra data written after mounting the received snapshot is proportional to this. Taking a snapshot of this extra data shows ~1024 "OBJECT" and ~1024 "FREE" lines in the zstream dump, with the raw data for the objects all starting with "ZP/"

Side-note: I also note that immediately after creating any dataset, the "written" is non-zero, a similar effect to this post-receive data?

diff_similarity() {
    lines1=$(wc -l < "$1")
    lines2=$(wc -l < "$2")
    total_lines=$((lines1 + lines2))
    num_changes=$(diff -U0 "$1" "$2" | grep -v "^@" | wc -l)
    percent_similarity=$((100 * ($total_lines - $num_changes) / $total_lines))
    echo "${percent_similarity}% = (${num_changes} changes) / (${lines1} + ${lines2} total lines)"
}

dd if=/dev/zero of=/root/zpool bs=1M count=4096
zpool create testpool /root/zpool -m /mnt/testpool -O compression=off -O canmount=off -O readonly=on -O atime=off
zfs list -o name,written -H
# testpool        24K
zfs snapshot testpool@0
zfs send testpool@0 | zstreamdump -d > testpool.0.dump 
echo "12345678" | zfs create -o canmount=off -o encryption=on -o keylocation=prompt -o keyformat=passphrase -o readonly=off testpool/enc
zfs list -o name,written -H
# testpool        0
# testpool/enc    98K
zfs snapshot testpool/enc@0 
zfs send testpool/enc@0 | zstreamdump -d > testpool.enc.0.dump

zfs create testpool/enc/data
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       98K
zfs snapshot -r testpool/enc@1
zfs send testpool/enc@1 | zstreamdump -d > testpool.enc.1.dump
zfs send testpool/enc/data@1 | zstreamdump -d > testpool.enc.data.1.dump

zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0

diff_similarity testpool.0.dump testpool.enc.0.dump
# 97% = (120 changes) / (2363 + 2363 total lines)
diff_similarity testpool.enc.0.dump testpool.enc.data.1.dump
# 98% = (90 changes) / (2363 + 2363 total lines)

# After creating a dataset and taking a snapshot
# for each of
#     testpool@0, testpool/enc@0, testpool/enc@1
# their `zstreamdump -d` data are 97-97% the same
# their post-snapshot "noise" is almost the same
# e.g 
# - first WRITE object contains "normalization, utf8only, casesensitivity, VERSION, SA_ATTRS, DELETE_QUEUE, ROOT
# - second contains "REGISTRY LAYOUTS"
# etc.

zfs send -wR testpool/enc@1 | zfs recv testpool/enc_copy
echo "12345678" | zfs load-key testpool/enc_copy
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  85K

zfs snapshot testpool/enc_copy/data@extra-data
zfs send testpool/enc_copy/data@extra-data | zstreamdump -d > testpool.enc_copy.data.extra-data.dump
zfs send -i @1 testpool/enc_copy/data@extra-data | zstreamdump -d > testpool.enc_copy.data.extra-data.i.dump

diff_similarity testpool.enc.data.1.dump testpool.enc_copy.data.extra-data.i.dump
# 3% = (2348 changes) / (2363 + 77 total lines)
# very different. only 77 lines

diff_similarity testpool.enc.data.1.dump testpool.enc_copy.data.extra-data.dump
# 98% = (64 changes) / (2363 + 2363 total lines)

# The extra data created after mounting an incremental receive
# is different from that after creating a dataset
# And it notably has one object with data starting with ZP/
#    ZP/. .... .A.. ....
#    .... .... .... ....
#    .... .... .... ....
#    "... .... D... ....
#    RE.e .... u... ....
#    RE.e .... u... ....
#    RE.e .... u... ....
#    RE.e .... u... ....
#    .... .... .... ....
#    .... .... .... ....
#    ..@  .... ...@ ....


zfs rollback -r testpool/enc_copy/data@1
zfs rollback -r testpool/enc/data@1

# Make a bunch of data to see if the post-snapshot noise is predictable

for i in {1..1024}; do
    dd if=/dev/random bs=1M count=1 of=/mnt/testpool/enc/data/$i &> /dev/null
done
sleep 8
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       1.00G
# testpool/enc_copy       0
# testpool/enc_copy/data  0


zfs snapshot testpool/enc/data@2
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  0

zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  85K

zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  0

zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  1.31M

zfs snapshot testpool/enc_copy/data@extra-data2
zfs send -i @2 testpool/enc_copy/data@extra-data2 | zstreamdump -d > testpool.enc_copy.data.extra-data2.i.dump

echo "
from collections import Counter
with open('testpool.enc_copy.data.extra-data2.i.dump') as f:
    lines = f.read().splitlines()
print(Counter([line.split()[0] for line in lines if line and len(line.split()[0]) > 2]))
" | python
# checksum': 2083, 'FREE': 1035, 'OBJECT': 1030, 'FREEOBJECTS': 17,

cat testpool.enc_copy.data.extra-data2.i.dump  | grep ZP | wc -l
# 1025

ls /mnt/testpool/enc/data | wc -l
# 1024

# Here we have almost exactly the same number of "ZP/" objects as changes in the snapshot



#---------------------------------------------

# this time increase the number of files by 128x
# but decrease their size accordingly
# same 1G snapshot size

zfs rollback -r testpool/enc/data@1
# for i in {1..131072}; do
#     dd if=/dev/random bs=8K count=1 of=/mnt/testpool/enc/data/$i &> /dev/null
# done
seq 1 131072 | parallel dd if=/dev/random bs=8K count=1 of=/mnt/testpool/enc/data/{} &> /dev/null
# not much faster

sleep 8
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       1.14G
# testpool/enc_copy       0
# testpool/enc_copy/data  0



zfs snapshot testpool/enc/data@2
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  0

zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  85K

zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  0

zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  129M


zfs snapshot testpool/enc_copy/data@extra-data2
zfs send -i @2 testpool/enc_copy/data@extra-data2 | zstreamdump -d > testpool.enc_copy.data.extra-data2.i.dump

echo "
from collections import Counter
with open('testpool.enc_copy.data.extra-data2.i.dump') as f:
    lines = f.read().splitlines()
print(Counter([line.split()[0] for line in lines if line and len(line.split()[0]) > 2]))
" | python
# Counter({'checksum': 262178, 'FREE': 131083, 'OBJECT': 131078, 'FREEOBJECTS': 16

cat testpool.enc_copy.data.extra-data2.i.dump  | grep ZP | wc -l
# 131073

ls /mnt/testpool/enc/data | wc -l
# 131072

# Again we have almost exactly the same number of "ZP/" objects as changes in the snapshot

runaway growth?

from here I just sent data back and forth a couple times

zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
zfs list -o name,written -H
sleep 5
zfs list -o name,written -H
zfs umount -a
zfs mount -a
zfs list -o name,written -H
sleep 5
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  129M


zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
sleep 5
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  0


touch /mnt/testpool/enc_copy/data/new
sleep 5
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  129M

zfs list 
# NAME                     USED  AVAIL  REFER  MOUNTPOINT
# testpool                2.40G  1.22G    24K  /mnt/testpool
# testpool/enc            1.14G  1.22G    98K  /mnt/testpool/enc
# testpool/enc/data       1.14G  1.22G  1.14G  /mnt/testpool/enc/data
# testpool/enc_copy       1.26G  1.22G    95K  /mnt/testpool/enc_copy
# testpool/enc_copy/data  1.26G  1.22G  1.14G  /mnt/testpool/enc_copy/data

# ouch, wasting 10% of our space


zfs snapshot testpool/enc_copy/data@new
zfs send -wi @2 testpool/enc_copy/data@new | zfs recv testpool/enc/data
sleep 5
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  0

zfs list
# NAME                     USED  AVAIL  REFER  MOUNTPOINT
# testpool                2.53G  1.10G    24K  /mnt/testpool
# testpool/enc            1.26G  1.10G    98K  /mnt/testpool/enc
# testpool/enc/data       1.26G  1.10G  1.14G  /mnt/testpool/enc/data
# testpool/enc_copy       1.26G  1.10G    95K  /mnt/testpool/enc_copy
# testpool/enc_copy/data  1.26G  1.10G  1.14G  /mnt/testpool/enc_copy/data


zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       129M
# testpool/enc_copy       0
# testpool/enc_copy/data  0

zfs list
# NAME                     USED  AVAIL  REFER  MOUNTPOINT
# testpool                2.65G   994M    24K  /mnt/testpool
# testpool/enc            1.39G   994M    98K  /mnt/testpool/enc
# testpool/enc/data       1.39G   994M  1.14G  /mnt/testpool/enc/data
# testpool/enc_copy       1.26G   994M    95K  /mnt/testpool/enc_copy
# testpool/enc_copy/data  1.26G   994M  1.14G  /mnt/testpool/enc_copy/data


zfs rollback testpool/enc/data@new
touch /mnt/testpool/enc/data/new2
sleep 8
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       354K
# testpool/enc_copy       0
# testpool/enc_copy/data  0
zfs list -t snapshot
# NAME                         USED  AVAIL  REFER  MOUNTPOINT
# testpool@0                     0B      -    24K  -
# testpool/enc@0                 0B      -    98K  -
# testpool/enc@1                 0B      -    98K  -
# testpool/enc/data@1           86K      -    98K  -
# testpool/enc/data@2          129M      -  1.14G  -
# testpool/enc/data@new        339K      -  1.14G  -
# testpool/enc_copy@0            8K      -    95K  -
# testpool/enc_copy@1            0B      -    95K  -
# testpool/enc_copy/data@1      83K      -    95K  -
# testpool/enc_copy/data@2     129M      -  1.14G  -
# testpool/enc_copy/data@new     0B      -  1.14G  -


zfs snapshot testpool/enc/data@new2
zfs send -wi @new testpool/enc/data@new2 | zfs recv testpool/enc_copy/data
sleep 8
zfs list -o name,written -H
zfs umount -a
zfs mount -a
sleep 8
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       129M
# testpool/enc_copy       0
# testpool/enc_copy/data  129M

zfs list -t snapshot
# NAME                          USED  AVAIL  REFER  MOUNTPOINT
# testpool@0                      0B      -    24K  -
# testpool/enc@0                  0B      -    98K  -
# testpool/enc@1                  0B      -    98K  -
# testpool/enc/data@1            86K      -    98K  -
# testpool/enc/data@2           129M      -  1.14G  -
# testpool/enc/data@new         339K      -  1.14G  -
# testpool/enc/data@new2        206K      -  1.14G  -
# testpool/enc_copy@0             8K      -    95K  -
# testpool/enc_copy@1             0B      -    95K  -
# testpool/enc_copy/data@1       83K      -    95K  -
# testpool/enc_copy/data@2      129M      -  1.14G  -
# testpool/enc_copy/data@new    338K      -  1.14G  -
# testpool/enc_copy/data@new2   204K      -  1.14G  -

# But how does that add up?


# NAME                     USED  AVAIL  REFER  MOUNTPOINT
# testpool                2.78G   865M    24K  /mnt/testpool
# testpool/enc            1.39G   865M    98K  /mnt/testpool/enc
# testpool/enc/data       1.39G   865M  1.14G  /mnt/testpool/enc/data
# testpool/enc_copy       1.39G   865M    95K  /mnt/testpool/enc_copy
# testpool/enc_copy/data  1.39G   865M  1.14G  /mnt/testpool/enc_copy/data

# for 1GB of files we're already at 1.4GB used. This is not sustainable.


touch /mnt/testpool/enc/data/new3
zfs snapshot testpool/enc/data@new3
zfs send -wi @new2 testpool/enc/data@new3 | zfs recv testpool/enc_copy/data -F
sleep 8
zfs list -o name,written -H
zfs umount -a
zfs mount -a
sleep 8
zfs list -o name,written -H
# testpool        0
# testpool/enc    0
# testpool/enc/data       0
# testpool/enc_copy       0
# testpool/enc_copy/data  129M

zfs list
# NAME                     USED  AVAIL  REFER  MOUNTPOINT
# testpool                2.91G   735M    24K  /mnt/testpool
# testpool/enc            1.39G   735M    98K  /mnt/testpool/enc
# testpool/enc/data       1.39G   735M  1.14G  /mnt/testpool/enc/data
# testpool/enc_copy       1.52G   735M    95K  /mnt/testpool/enc_copy
# testpool/enc_copy/data  1.52G   735M  1.14G  /mnt/testpool/enc_copy/data

# ouch 1.5G. almost 50% wasted.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment