Created
August 30, 2016 15:02
-
-
Save pcd1193182/8521edf6a4f4d612afb050cc5ce925b0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/usr/src/uts/common/fs/zfs/dnode.c b/usr/src/uts/common/fs/zfs/dnode.c | |
index 7fdb64d..540b23e 100644 | |
--- a/usr/src/uts/common/fs/zfs/dnode.c | |
+++ b/usr/src/uts/common/fs/zfs/dnode.c | |
@@ -1552,6 +1552,72 @@ dnode_dirty_l1(dnode_t *dn, uint64_t l1blkid, dmu_tx_t *tx) | |
} | |
} | |
+/* | |
+ * Dirty all the in-core level-1 dbufs in the range specified by start_blkid | |
+ * and end_blkid. | |
+ */ | |
+static void | |
+dnode_dirty_l1range(dnode_t *dn, uint64_t start_blkid, uint64_t end_blkid, | |
+ dmu_tx_t *tx) | |
+{ | |
+ dmu_buf_impl_t db_search; | |
+ dmu_buf_impl_t *db; | |
+ avl_index_t where; | |
+ | |
+ mutex_enter(&dn->dn_dbufs_mtx); | |
+ | |
+ db_search.db_level = 1; | |
+ db_search.db_blkid = start_blkid + 1; | |
+ db_search.db_state = DB_SEARCH; | |
+ for (;;) { | |
+ | |
+ db = avl_find(&dn->dn_dbufs, &db_search, &where); | |
+ if (db == NULL) | |
+ db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); | |
+ | |
+ if (db == NULL || db->db_level != 1 || | |
+ db->db_blkid >= end_blkid) { | |
+ break; | |
+ } | |
+ | |
+ /* | |
+ * Setup the next blkid we want to search for. | |
+ */ | |
+ db_search.db_blkid = db->db_blkid + 1; | |
+ ASSERT3U(db->db_blkid, >=, start_blkid); | |
+ | |
+ /* | |
+ * If the dbuf transitions to DB_EVICTING while we're trying | |
+ * to dirty it, then we will be unable to discover it in | |
+ * the dbuf hash table. This will result in a call to | |
+ * dbuf_create() which needs to acquire the dn_dbufs_mtx | |
+ * lock. To avoid a deadlock, we drop the lock before | |
+ * dirtying the level-1 dbuf. | |
+ */ | |
+ mutex_exit(&dn->dn_dbufs_mtx); | |
+ dnode_dirty_l1(dn, db->db_blkid, tx); | |
+ mutex_enter(&dn->dn_dbufs_mtx); | |
+ } | |
+ | |
+#ifdef ZFS_DEBUG | |
+ /* | |
+ * Walk all the in-core level-1 dbufs and verify they have been dirtied. | |
+ */ | |
+ db_search.db_level = 1; | |
+ db_search.db_blkid = start_blkid + 1; | |
+ db_search.db_state = DB_SEARCH; | |
+ db = avl_find(&dn->dn_dbufs, &db_search, &where); | |
+ if (db == NULL) | |
+ db = avl_nearest(&dn->dn_dbufs, where, AVL_AFTER); | |
+ for (; db != NULL; db = AVL_NEXT(&dn->dn_dbufs, db)) { | |
+ if (db->db_level != 1 || db->db_blkid >= end_blkid) | |
+ break; | |
+ ASSERT(db->db_dirtycnt > 0); | |
+ } | |
+#endif | |
+ mutex_exit(&dn->dn_dbufs_mtx); | |
+} | |
+ | |
void | |
dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) | |
{ | |
@@ -1704,6 +1770,8 @@ dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx) | |
if (last != first) | |
dnode_dirty_l1(dn, last, tx); | |
+ dnode_dirty_l1range(dn, first, last, tx); | |
+ | |
int shift = dn->dn_datablkshift + dn->dn_indblkshift - | |
SPA_BLKPTRSHIFT; | |
diff --git a/usr/src/uts/common/fs/zfs/dnode_sync.c b/usr/src/uts/common/fs/zfs/dnode_sync.c | |
index d4e6502..d2b7b9f 100644 | |
--- a/usr/src/uts/common/fs/zfs/dnode_sync.c | |
+++ b/usr/src/uts/common/fs/zfs/dnode_sync.c | |
@@ -237,7 +237,7 @@ free_verify(dmu_buf_impl_t *db, uint64_t start, uint64_t end, dmu_tx_t *tx) | |
static void | |
free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, | |
- dmu_tx_t *tx) | |
+ boolean_t force_free, dmu_tx_t *tx) | |
{ | |
dnode_t *dn; | |
blkptr_t *bp; | |
@@ -289,32 +289,21 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, | |
rw_exit(&dn->dn_struct_rwlock); | |
ASSERT3P(bp, ==, subdb->db_blkptr); | |
- free_children(subdb, blkid, nblks, tx); | |
+ free_children(subdb, blkid, nblks, force_free, tx); | |
dbuf_rele(subdb, FTAG); | |
} | |
} | |
- /* If this whole block is free, free ourself too. */ | |
- for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) { | |
- if (!BP_IS_HOLE(bp)) | |
- break; | |
- } | |
- if (i == 1 << epbs) { | |
+ if (force_free) { | |
+ for (i = 0, bp = db->db.db_data; i < 1 << epbs; i++, bp++) | |
+ ASSERT(BP_IS_HOLE(bp)); | |
/* | |
- * We only found holes. Grab the rwlock to prevent | |
- * anybody from reading the blocks we're about to | |
- * zero out. | |
+ * TODO do we need rwlock | |
*/ | |
rw_enter(&dn->dn_struct_rwlock, RW_WRITER); | |
bzero(db->db.db_data, db->db.db_size); | |
rw_exit(&dn->dn_struct_rwlock); | |
free_blocks(dn, db->db_blkptr, 1, tx); | |
- } else { | |
- /* | |
- * Partial block free; must be marked dirty so that it | |
- * will be written out. | |
- */ | |
- ASSERT(db->db_dirtycnt > 0); | |
} | |
DB_DNODE_EXIT(db); | |
@@ -327,7 +316,7 @@ free_children(dmu_buf_impl_t *db, uint64_t blkid, uint64_t nblks, | |
*/ | |
static void | |
dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, | |
- dmu_tx_t *tx) | |
+ boolean_t force_free, dmu_tx_t *tx) | |
{ | |
blkptr_t *bp = dn->dn_phys->dn_blkptr; | |
int dnlevel = dn->dn_phys->dn_nlevels; | |
@@ -367,7 +356,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, | |
TRUE, FALSE, FTAG, &db)); | |
rw_exit(&dn->dn_struct_rwlock); | |
- free_children(db, blkid, nblks, tx); | |
+ free_children(db, blkid, nblks, force_free, tx); | |
dbuf_rele(db, FTAG); | |
} | |
} | |
@@ -386,6 +375,7 @@ dnode_sync_free_range_impl(dnode_t *dn, uint64_t blkid, uint64_t nblks, | |
typedef struct dnode_sync_free_range_arg { | |
dnode_t *dsfra_dnode; | |
dmu_tx_t *dsfra_tx; | |
+ boolean_t dsfra_force_free; | |
} dnode_sync_free_range_arg_t; | |
static void | |
@@ -395,7 +385,8 @@ dnode_sync_free_range(void *arg, uint64_t blkid, uint64_t nblks) | |
dnode_t *dn = dsfra->dsfra_dnode; | |
mutex_exit(&dn->dn_mtx); | |
- dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_tx); | |
+ dnode_sync_free_range_impl(dn, blkid, nblks, dsfra->dsfra_force_free, | |
+ dsfra->dsfra_tx); | |
mutex_enter(&dn->dn_mtx); | |
} | |
@@ -672,6 +663,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx) | |
dnode_sync_free_range_arg_t dsfra; | |
dsfra.dsfra_dnode = dn; | |
dsfra.dsfra_tx = tx; | |
+ dsfra.dsfra_force_free = freeing_dnode; | |
mutex_enter(&dn->dn_mtx); | |
range_tree_vacate(dn->dn_free_ranges[txgoff], | |
dnode_sync_free_range, &dsfra); | |
diff --git a/usr/src/uts/common/fs/zfs/zfs_znode.c b/usr/src/uts/common/fs/zfs/zfs_znode.c | |
index 6304ec8..dd2cc1d 100644 | |
--- a/usr/src/uts/common/fs/zfs/zfs_znode.c | |
+++ b/usr/src/uts/common/fs/zfs/zfs_znode.c | |
@@ -20,7 +20,7 @@ | |
*/ | |
/* | |
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. | |
- * Copyright (c) 2012, 2014 by Delphix. All rights reserved. | |
+ * Copyright (c) 2012, 2016 by Delphix. All rights reserved. | |
*/ | |
/* Portions Copyright 2007 Jeremy Teo */ | |
@@ -1590,7 +1590,8 @@ zfs_trunc(znode_t *zp, uint64_t end) | |
return (0); | |
} | |
- error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); | |
+ error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, | |
+ DMU_OBJECT_END); | |
if (error) { | |
zfs_range_unlock(rl); | |
return (error); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment