Created
September 29, 2018 00:09
-
-
Save jclulow/8ffe2217dba2e46c9c42999395c63a0f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 90d44d2517948e2a60ca3289f38d2b71d49ec9fa | |
Author: Joshua M. Clulow <jmc@joyent.com> | |
Date: Thu Nov 3 20:20:42 2016 +0000 | |
XXX OS-4718 ZFS actively hostile to 512e drive replacements | |
diff --git a/usr/src/uts/common/fs/zfs/spa.c b/usr/src/uts/common/fs/zfs/spa.c | |
index bc15615a14..4352fc271d 100644 | |
--- a/usr/src/uts/common/fs/zfs/spa.c | |
+++ b/usr/src/uts/common/fs/zfs/spa.c | |
@@ -4668,11 +4668,15 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing) | |
return (spa_vdev_exit(spa, newrootvd, txg, EOVERFLOW)); | |
/* | |
- * The new device cannot have a higher alignment requirement | |
- * than the top-level vdev. | |
+ * The new device cannot have a higher alignment requirement than the | |
+ * top-level vdev. If this is an Advanced Format (e.g. 512e) disk, we | |
+ * also need to check the fallback logical ashift value. | |
*/ | |
- if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift) | |
+ if (newvd->vdev_ashift > oldvd->vdev_top->vdev_ashift && | |
+ (newvd->vdev_ashift_af == 0 || | |
+ newvd->vdev_ashift_af > oldvd->vdev_top->vdev_ashift)) { | |
return (spa_vdev_exit(spa, newrootvd, txg, EDOM)); | |
+ } | |
/* | |
* If this is an in-place replacement, update oldvd's path and devid | |
diff --git a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h | |
index 8df5b3b785..a4a6dfe028 100644 | |
--- a/usr/src/uts/common/fs/zfs/sys/vdev_impl.h | |
+++ b/usr/src/uts/common/fs/zfs/sys/vdev_impl.h | |
@@ -139,6 +139,7 @@ struct vdev { | |
uint64_t vdev_min_asize; /* min acceptable asize */ | |
uint64_t vdev_max_asize; /* max acceptable asize */ | |
uint64_t vdev_ashift; /* block alignment shift */ | |
+ uint64_t vdev_ashift_af; /* adv. format fallback shift */ | |
uint64_t vdev_state; /* see VDEV_STATE_* #defines */ | |
uint64_t vdev_prevstate; /* used when reopening a vdev */ | |
vdev_ops_t *vdev_ops; /* vdev operations */ | |
diff --git a/usr/src/uts/common/fs/zfs/vdev_disk.c b/usr/src/uts/common/fs/zfs/vdev_disk.c | |
index f9191dd0f9..548fa58e3d 100644 | |
--- a/usr/src/uts/common/fs/zfs/vdev_disk.c | |
+++ b/usr/src/uts/common/fs/zfs/vdev_disk.c | |
@@ -524,11 +524,31 @@ skip_open: | |
VDEV_DEBUG("vdev_disk_open(\"%s\"): " | |
"both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n", | |
vd->vdev_path, error); | |
- pbsize = DEV_BSIZE; | |
+ blksz = pbsize = DEV_BSIZE; | |
} | |
*ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1; | |
+ /* | |
+ * Advanced Format (512e) disks have a 4KB physical sector size, but | |
+ * also report a 512 byte logical sector size (through emulation in the | |
+ * firmware) to better support legacy operating systems. While we | |
+ * generally wish to create new pools with a 4KB block size, we also | |
+ * need to allow people to use AF disks in their existing 512 byte | |
+ * pools, even if not completely optimal. | |
+ */ | |
+ if (blksz != 0 && blksz < pbsize) { | |
+ /* | |
+ * The logical block size is smaller than the reported physical | |
+ * block size. Record the logical ashift so that | |
+ * spa_vdev_attach() can use it as a fallback. | |
+ */ | |
+ vd->vdev_ashift_af = highbit64(MAX(blksz, | |
+ SPA_MINBLOCKSIZE)) - 1; | |
+ } else { | |
+ vd->vdev_ashift_af = 0; | |
+ } | |
+ | |
if (vd->vdev_wholedisk == 1) { | |
int wce = 1; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment