Here's a script showing the predictability of this post-mount extra data that is created. When the previous snapshot contained 1024 files, the extra data written after mounting the received snapshot is proportional to this. Taking a snapshot of this extra data shows ~1024 "OBJECT" and ~1024 "FREE" lines in the zstream dump, with the raw data for the objects all starting with "ZP/"
Side-note: I also note that immediately after creating any dataset, the "written" is non-zero, a similar effect to this post-receive data?
diff_similarity() {
lines1=$(wc -l < "$1")
lines2=$(wc -l < "$2")
total_lines=$((lines1 + lines2))
num_changes=$(diff -U0 "$1" "$2" | grep -v "^@" | wc -l)
percent_similarity=$((100 * ($total_lines - $num_changes) / $total_lines))
echo "${percent_similarity}% = (${num_changes} changes) / (${lines1} + ${lines2} total lines)"
}
dd if=/dev/zero of=/root/zpool bs=1M count=4096
zpool create testpool /root/zpool -m /mnt/testpool -O compression=off -O canmount=off -O readonly=on -O atime=off
zfs list -o name,written -H
# testpool 24K
zfs snapshot testpool@0
zfs send testpool@0 | zstreamdump -d > testpool.0.dump
echo "12345678" | zfs create -o canmount=off -o encryption=on -o keylocation=prompt -o keyformat=passphrase -o readonly=off testpool/enc
zfs list -o name,written -H
# testpool 0
# testpool/enc 98K
zfs snapshot testpool/enc@0
zfs send testpool/enc@0 | zstreamdump -d > testpool.enc.0.dump
zfs create testpool/enc/data
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 98K
zfs snapshot -r testpool/enc@1
zfs send testpool/enc@1 | zstreamdump -d > testpool.enc.1.dump
zfs send testpool/enc/data@1 | zstreamdump -d > testpool.enc.data.1.dump
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
diff_similarity testpool.0.dump testpool.enc.0.dump
# 97% = (120 changes) / (2363 + 2363 total lines)
diff_similarity testpool.enc.0.dump testpool.enc.data.1.dump
# 98% = (90 changes) / (2363 + 2363 total lines)
# After creating a dataset and taking a snapshot
# for each of
# testpool@0, testpool/enc@0, testpool/enc@1
# their `zstreamdump -d` data are 97-97% the same
# their post-snapshot "noise" is almost the same
# e.g
# - first WRITE object contains "normalization, utf8only, casesensitivity, VERSION, SA_ATTRS, DELETE_QUEUE, ROOT
# - second contains "REGISTRY LAYOUTS"
# etc.
zfs send -wR testpool/enc@1 | zfs recv testpool/enc_copy
echo "12345678" | zfs load-key testpool/enc_copy
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 85K
zfs snapshot testpool/enc_copy/data@extra-data
zfs send testpool/enc_copy/data@extra-data | zstreamdump -d > testpool.enc_copy.data.extra-data.dump
zfs send -i @1 testpool/enc_copy/data@extra-data | zstreamdump -d > testpool.enc_copy.data.extra-data.i.dump
diff_similarity testpool.enc.data.1.dump testpool.enc_copy.data.extra-data.i.dump
# 3% = (2348 changes) / (2363 + 77 total lines)
# very different. only 77 lines
diff_similarity testpool.enc.data.1.dump testpool.enc_copy.data.extra-data.dump
# 98% = (64 changes) / (2363 + 2363 total lines)
# The extra data created after mounting an incremental receive
# is different from that after creating a dataset
# And it notably has one object with data starting with ZP/
# ZP/. .... .A.. ....
# .... .... .... ....
# .... .... .... ....
# "... .... D... ....
# RE.e .... u... ....
# RE.e .... u... ....
# RE.e .... u... ....
# RE.e .... u... ....
# .... .... .... ....
# .... .... .... ....
# ..@ .... ...@ ....
zfs rollback -r testpool/enc_copy/data@1
zfs rollback -r testpool/enc/data@1
# Make a bunch of data to see if the post-snapshot noise is predictable
for i in {1..1024}; do
dd if=/dev/random bs=1M count=1 of=/mnt/testpool/enc/data/$i &> /dev/null
done
sleep 8
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 1.00G
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs snapshot testpool/enc/data@2
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 85K
zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 1.31M
zfs snapshot testpool/enc_copy/data@extra-data2
zfs send -i @2 testpool/enc_copy/data@extra-data2 | zstreamdump -d > testpool.enc_copy.data.extra-data2.i.dump
echo "
from collections import Counter
with open('testpool.enc_copy.data.extra-data2.i.dump') as f:
lines = f.read().splitlines()
print(Counter([line.split()[0] for line in lines if line and len(line.split()[0]) > 2]))
" | python
# checksum': 2083, 'FREE': 1035, 'OBJECT': 1030, 'FREEOBJECTS': 17,
cat testpool.enc_copy.data.extra-data2.i.dump | grep ZP | wc -l
# 1025
ls /mnt/testpool/enc/data | wc -l
# 1024
# Here we have almost exactly the same number of "ZP/" objects as changes in the snapshot
#---------------------------------------------
# this time increase the number of files by 128x
# but decrease their size accordingly
# same 1G snapshot size
zfs rollback -r testpool/enc/data@1
# for i in {1..131072}; do
# dd if=/dev/random bs=8K count=1 of=/mnt/testpool/enc/data/$i &> /dev/null
# done
seq 1 131072 | parallel dd if=/dev/random bs=8K count=1 of=/mnt/testpool/enc/data/{} &> /dev/null
# not much faster
sleep 8
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 1.14G
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs snapshot testpool/enc/data@2
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 85K
zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 129M
zfs snapshot testpool/enc_copy/data@extra-data2
zfs send -i @2 testpool/enc_copy/data@extra-data2 | zstreamdump -d > testpool.enc_copy.data.extra-data2.i.dump
echo "
from collections import Counter
with open('testpool.enc_copy.data.extra-data2.i.dump') as f:
lines = f.read().splitlines()
print(Counter([line.split()[0] for line in lines if line and len(line.split()[0]) > 2]))
" | python
# Counter({'checksum': 262178, 'FREE': 131083, 'OBJECT': 131078, 'FREEOBJECTS': 16
cat testpool.enc_copy.data.extra-data2.i.dump | grep ZP | wc -l
# 131073
ls /mnt/testpool/enc/data | wc -l
# 131072
# Again we have almost exactly the same number of "ZP/" objects as changes in the snapshot
from here I just sent data back and forth a couple times
zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
zfs list -o name,written -H
sleep 5
zfs list -o name,written -H
zfs umount -a
zfs mount -a
zfs list -o name,written -H
sleep 5
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 129M
zfs rollback testpool/enc_copy/data@1 -r
zfs send -wi @1 testpool/enc/data@2 | zfs recv testpool/enc_copy/data
sleep 5
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 0
touch /mnt/testpool/enc_copy/data/new
sleep 5
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 129M
zfs list
# NAME USED AVAIL REFER MOUNTPOINT
# testpool 2.40G 1.22G 24K /mnt/testpool
# testpool/enc 1.14G 1.22G 98K /mnt/testpool/enc
# testpool/enc/data 1.14G 1.22G 1.14G /mnt/testpool/enc/data
# testpool/enc_copy 1.26G 1.22G 95K /mnt/testpool/enc_copy
# testpool/enc_copy/data 1.26G 1.22G 1.14G /mnt/testpool/enc_copy/data
# ouch, wasting 10% of our space
zfs snapshot testpool/enc_copy/data@new
zfs send -wi @2 testpool/enc_copy/data@new | zfs recv testpool/enc/data
sleep 5
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs list
# NAME USED AVAIL REFER MOUNTPOINT
# testpool 2.53G 1.10G 24K /mnt/testpool
# testpool/enc 1.26G 1.10G 98K /mnt/testpool/enc
# testpool/enc/data 1.26G 1.10G 1.14G /mnt/testpool/enc/data
# testpool/enc_copy 1.26G 1.10G 95K /mnt/testpool/enc_copy
# testpool/enc_copy/data 1.26G 1.10G 1.14G /mnt/testpool/enc_copy/data
zfs umount -a
zfs mount -a
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 129M
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs list
# NAME USED AVAIL REFER MOUNTPOINT
# testpool 2.65G 994M 24K /mnt/testpool
# testpool/enc 1.39G 994M 98K /mnt/testpool/enc
# testpool/enc/data 1.39G 994M 1.14G /mnt/testpool/enc/data
# testpool/enc_copy 1.26G 994M 95K /mnt/testpool/enc_copy
# testpool/enc_copy/data 1.26G 994M 1.14G /mnt/testpool/enc_copy/data
zfs rollback testpool/enc/data@new
touch /mnt/testpool/enc/data/new2
sleep 8
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 354K
# testpool/enc_copy 0
# testpool/enc_copy/data 0
zfs list -t snapshot
# NAME USED AVAIL REFER MOUNTPOINT
# testpool@0 0B - 24K -
# testpool/enc@0 0B - 98K -
# testpool/enc@1 0B - 98K -
# testpool/enc/data@1 86K - 98K -
# testpool/enc/data@2 129M - 1.14G -
# testpool/enc/data@new 339K - 1.14G -
# testpool/enc_copy@0 8K - 95K -
# testpool/enc_copy@1 0B - 95K -
# testpool/enc_copy/data@1 83K - 95K -
# testpool/enc_copy/data@2 129M - 1.14G -
# testpool/enc_copy/data@new 0B - 1.14G -
zfs snapshot testpool/enc/data@new2
zfs send -wi @new testpool/enc/data@new2 | zfs recv testpool/enc_copy/data
sleep 8
zfs list -o name,written -H
zfs umount -a
zfs mount -a
sleep 8
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 129M
# testpool/enc_copy 0
# testpool/enc_copy/data 129M
zfs list -t snapshot
# NAME USED AVAIL REFER MOUNTPOINT
# testpool@0 0B - 24K -
# testpool/enc@0 0B - 98K -
# testpool/enc@1 0B - 98K -
# testpool/enc/data@1 86K - 98K -
# testpool/enc/data@2 129M - 1.14G -
# testpool/enc/data@new 339K - 1.14G -
# testpool/enc/data@new2 206K - 1.14G -
# testpool/enc_copy@0 8K - 95K -
# testpool/enc_copy@1 0B - 95K -
# testpool/enc_copy/data@1 83K - 95K -
# testpool/enc_copy/data@2 129M - 1.14G -
# testpool/enc_copy/data@new 338K - 1.14G -
# testpool/enc_copy/data@new2 204K - 1.14G -
# But how does that add up?
# NAME USED AVAIL REFER MOUNTPOINT
# testpool 2.78G 865M 24K /mnt/testpool
# testpool/enc 1.39G 865M 98K /mnt/testpool/enc
# testpool/enc/data 1.39G 865M 1.14G /mnt/testpool/enc/data
# testpool/enc_copy 1.39G 865M 95K /mnt/testpool/enc_copy
# testpool/enc_copy/data 1.39G 865M 1.14G /mnt/testpool/enc_copy/data
# for 1GB of files we're already at 1.4GB used. This is not sustainable.
touch /mnt/testpool/enc/data/new3
zfs snapshot testpool/enc/data@new3
zfs send -wi @new2 testpool/enc/data@new3 | zfs recv testpool/enc_copy/data -F
sleep 8
zfs list -o name,written -H
zfs umount -a
zfs mount -a
sleep 8
zfs list -o name,written -H
# testpool 0
# testpool/enc 0
# testpool/enc/data 0
# testpool/enc_copy 0
# testpool/enc_copy/data 129M
zfs list
# NAME USED AVAIL REFER MOUNTPOINT
# testpool 2.91G 735M 24K /mnt/testpool
# testpool/enc 1.39G 735M 98K /mnt/testpool/enc
# testpool/enc/data 1.39G 735M 1.14G /mnt/testpool/enc/data
# testpool/enc_copy 1.52G 735M 95K /mnt/testpool/enc_copy
# testpool/enc_copy/data 1.52G 735M 1.14G /mnt/testpool/enc_copy/data
# ouch 1.5G. almost 50% wasted.