Skip to content

Instantly share code, notes, and snippets.

@jclulow
Created September 29, 2018 00:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jclulow/8ffe2217dba2e46c9c42999395c63a0f to your computer and use it in GitHub Desktop.
Save jclulow/8ffe2217dba2e46c9c42999395c63a0f to your computer and use it in GitHub Desktop.
commit 90d44d2517948e2a60ca3289f38d2b71d49ec9fa
Author: Joshua M. Clulow <jmc@joyent.com>
Date: Thu Nov 3 20:20:42 2016 +0000
XXX OS-4718 ZFS actively hostile to 512e drive replacements
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c
index bc15615a14..4352fc271d 100644
--- a/usr/src/uts/common/fs/zfs/spa.c
+++ b/usr/src/uts/common/fs/zfs/spa.c
@@ -4668,11 +4668,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW));
/*
- * The new device cannot have a higher alignment requirement
- * than the top-level vdev.
+ * The new device cannot have a higher alignment requirement than the
+ * top-level vdev. If this is an Advanced Format (e.g. 512e) disk, we
+ * also need to check the fallback logical ashift value.
*/
- if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift)
+ if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift &&
+ (newvd->vdev_ashift_af == 0 ||
+ newvd->vdev_ashift_af > oldvd->vdev_top->vdev_ashift)) {
return (spa_vdev_exit(spa, newrootvd, txg, EDOM));
+ }
/*
* If this is an in-place replacement, update oldvd's path and devid
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
index 8df5b3b785..a4a6dfe028 100644
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h
@@ -139,6 +139,7 @@ struct vdev {
uint64_t vdev_min_asize; /* min acceptable asize */
uint64_t vdev_max_asize; /* max acceptable asize */
uint64_t vdev_ashift; /* block alignment shift */
+ uint64_t vdev_ashift_af; /* adv. format fallback shift */
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
uint64_t vdev_prevstate; /* used when reopening a vdev */
vdev_ops_t *vdev_ops; /* vdev operations */
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c
index f9191dd0f9..548fa58e3d 100644
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c
@@ -524,11 +524,31 @@ skip_open:
VDEV_DEBUG("vdev_disk_open(\"%s\"): "
"both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
vd->vdev_path, error);
- pbsize = DEV_BSIZE;
+ blksz = pbsize = DEV_BSIZE;
}
*ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
+ /*
+ * Advanced Format (512e) disks have a 4KB physical sector size, but
+ * also report a 512 byte logical sector size (through emulation in the
+ * firmware) to better support legacy operating systems. While we
+ * generally wish to create new pools with a 4KB block size, we also
+ * need to allow people to use AF disks in their existing 512 byte
+ * pools, even if not completely optimal.
+ */
+ if (blksz != 0 && blksz < pbsize) {
+ /*
+ * The logical block size is smaller than the reported physical
+ * block size. Record the logical ashift so that
+ * spa_vdev_attach() can use it as a fallback.
+ */
+ vd->vdev_ashift_af = highbit64(MAX(blksz,
+ SPA_MINBLOCKSIZE)) - 1;
+ } else {
+ vd->vdev_ashift_af = 0;
+ }
+
if (vd->vdev_wholedisk == 1) {
int wce = 1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment