Created
April 11, 2019 18:08
Star
You must be signed in to star a gist
Patch applied to 2.12.0 on Fir MDS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| commit 565011c390b653fddfd26364e4df0097f862569e | |
| Author: Lai Siyao <lai.siyao@whamcloud.com> | |
| Date: Mon Mar 4 23:56:16 2019 +0800 | |
| LU-12037 mdt: add option for cross-MDT rename | |
| Add option mdt.mdt_remote_rename, if it's not set (it's set by | |
| default), do cross-MDT rename as cp, this is used for debug or | |
| user want to move inode in rename. | |
| Add sanity test_24z. | |
| Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com> | |
| Change-Id: Ia0d122f1716f17078b375f770a193347a6e50708 | |
| Conflicts: | |
| lustre/mdt/mdt_internal.h | |
| lustre/mdt/mdt_lproc.c | |
| diff --git a/lustre/fld/fld_handler.c b/lustre/fld/fld_handler.c | |
| index 9b84b62..42f00da 100644 | |
| --- a/lustre/fld/fld_handler.c | |
| +++ b/lustre/fld/fld_handler.c | |
| @@ -398,6 +398,7 @@ int fid_is_local(const struct lu_env *env, | |
| } | |
| return result; | |
| } | |
| +EXPORT_SYMBOL(fid_is_local); | |
| static void fld_server_debugfs_fini(struct lu_server_fld *fld) | |
| { | |
| diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c | |
| index 4caa22a..b537dd4 100644 | |
| --- a/lustre/mdt/mdt_handler.c | |
| +++ b/lustre/mdt/mdt_handler.c | |
| @@ -5077,6 +5077,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, | |
| m->mdt_enable_striped_dir = 1; | |
| m->mdt_enable_dir_migration = 1; | |
| m->mdt_enable_remote_dir_gid = 0; | |
| + m->mdt_enable_remote_rename = 1; | |
| atomic_set(&m->mdt_mds_mds_conns, 0); | |
| atomic_set(&m->mdt_async_commit_count, 0); | |
| diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h | |
| index 5bf811f..788cb2a 100644 | |
| --- a/lustre/mdt/mdt_internal.h | |
| +++ b/lustre/mdt/mdt_internal.h | |
| @@ -251,6 +251,7 @@ struct mdt_device { | |
| mdt_enable_remote_dir:1, | |
| mdt_enable_striped_dir:1, | |
| mdt_enable_dir_migration:1, | |
| + mdt_enable_remote_rename:1, | |
| mdt_skip_lfsck:1; | |
| /* user with gid can create remote/striped | |
| diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c | |
| index da3c529..dc02fb6 100644 | |
| --- a/lustre/mdt/mdt_lproc.c | |
| +++ b/lustre/mdt/mdt_lproc.c | |
| @@ -979,6 +979,34 @@ mdt_migrate_hsm_allowed_seq_write(struct file *file, const char __user *buffer, | |
| } | |
| LPROC_SEQ_FOPS(mdt_migrate_hsm_allowed); | |
| +static int mdt_enable_remote_rename_seq_show(struct seq_file *m, void *data) | |
| +{ | |
| + struct obd_device *obd = m->private; | |
| + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); | |
| + | |
| + seq_printf(m, "%u\n", mdt->mdt_enable_remote_rename); | |
| + return 0; | |
| +} | |
| + | |
| +static ssize_t | |
| +mdt_enable_remote_rename_seq_write(struct file *file, const char __user *buffer, | |
| + size_t count, loff_t *off) | |
| +{ | |
| + struct seq_file *m = file->private_data; | |
| + struct obd_device *obd = m->private; | |
| + struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev); | |
| + bool val; | |
| + int rc; | |
| + | |
| + rc = kstrtobool_from_user(buffer, count, &val); | |
| + if (rc) | |
| + return rc; | |
| + | |
| + mdt->mdt_enable_remote_rename = val; | |
| + return count; | |
| +} | |
| +LPROC_SEQ_FOPS(mdt_enable_remote_rename); | |
| + | |
| LPROC_SEQ_FOPS_RO_TYPE(mdt, recovery_status); | |
| LPROC_SEQ_FOPS_RO_TYPE(mdt, num_exports); | |
| LPROC_SEQ_FOPS_RO_TYPE(mdt, target_instance); | |
| @@ -1048,6 +1076,8 @@ static struct lprocfs_vars lprocfs_mdt_obd_vars[] = { | |
| .fops = &mdt_enable_striped_dir_fops }, | |
| { .name = "enable_dir_migration", | |
| .fops = &mdt_enable_dir_migration_fops }, | |
| + { .name = "enable_remote_rename", | |
| + .fops = &mdt_enable_remote_rename_fops }, | |
| { .name = "hsm_control", | |
| .fops = &mdt_hsm_cdt_control_fops }, | |
| { .name = "recovery_time_hard", | |
| diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c | |
| index fada9fc..e4c0fda 100644 | |
| --- a/lustre/mdt/mdt_reint.c | |
| +++ b/lustre/mdt/mdt_reint.c | |
| @@ -2257,20 +2257,10 @@ static int mdt_rename_determine_lock_order(struct mdt_thread_info *info, | |
| * 2 - srcdir child; 3 - tgtdir child. | |
| * Update on disk version of srcdir child. | |
| */ | |
| -/** | |
| - * For DNE phase I, only these renames are allowed | |
| - * mv src_p/src_c tgt_p/tgt_c | |
| - * 1. src_p/src_c/tgt_p/tgt_c are in the same MDT. | |
| - * 2. src_p and tgt_p are same directory, and tgt_c does not | |
| - * exists. In this case, all of modification will happen | |
| - * in the MDT where ithesource parent is, only one remote | |
| - * update is needed, i.e. set c_time/m_time on the child. | |
| - * And tgt_c will be still in the same MDT as the original | |
| - * src_c. | |
| - */ | |
| static int mdt_reint_rename(struct mdt_thread_info *info, | |
| struct mdt_lock_handle *unused) | |
| { | |
| + struct mdt_device *mdt = info->mti_mdt; | |
| struct mdt_reint_record *rr = &info->mti_rr; | |
| struct md_attr *ma = &info->mti_attr; | |
| struct ptlrpc_request *req = mdt_info_req(info); | |
| @@ -2302,25 +2292,10 @@ static int mdt_reint_rename(struct mdt_thread_info *info, | |
| !fid_is_md_operative(rr->rr_fid2)) | |
| RETURN(-EPERM); | |
| - /* | |
| - * Note: do not enqueue rename lock for replay request, because | |
| - * if other MDT holds rename lock, but being blocked to wait for | |
| - * this MDT to finish its recovery, and the failover MDT can not | |
| - * get rename lock, which will cause deadlock. | |
| - */ | |
| - if (!req_is_replay(req)) { | |
| - rc = mdt_rename_lock(info, &rename_lh); | |
| - if (rc != 0) { | |
| - CERROR("%s: can't lock FS for rename: rc = %d\n", | |
| - mdt_obd_name(info->mti_mdt), rc); | |
| - RETURN(rc); | |
| - } | |
| - } | |
| - | |
| /* find both parents. */ | |
| msrcdir = mdt_parent_find_check(info, rr->rr_fid1, 0); | |
| if (IS_ERR(msrcdir)) | |
| - GOTO(out_unlock_rename, rc = PTR_ERR(msrcdir)); | |
| + RETURN(PTR_ERR(msrcdir)); | |
| OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME3, 5); | |
| @@ -2333,9 +2308,36 @@ static int mdt_reint_rename(struct mdt_thread_info *info, | |
| GOTO(out_put_srcdir, rc = PTR_ERR(mtgtdir)); | |
| } | |
| + /* | |
| + * Note: do not enqueue rename lock for replay request, because | |
| + * if other MDT holds rename lock, but being blocked to wait for | |
| + * this MDT to finish its recovery, and the failover MDT can not | |
| + * get rename lock, which will cause deadlock. | |
| + */ | |
| + if (!req_is_replay(req)) { | |
| + /* | |
| + * Normally rename RPC is handled on the MDT with the target | |
| + * directory (if target exists, it's on the MDT with the | |
| + * target), if the source directory is remote, it's a hint that | |
| + * source is remote too (this may not be true, but it won't | |
| + * cause any issue), return -EXDEV early to avoid taking | |
| + * rename_lock. | |
| + */ | |
| + if (!mdt->mdt_enable_remote_rename && | |
| + mdt_object_remote(msrcdir)) | |
| + GOTO(out_put_tgtdir, rc = -EXDEV); | |
| + | |
| + rc = mdt_rename_lock(info, &rename_lh); | |
| + if (rc != 0) { | |
| + CERROR("%s: can't lock FS for rename: rc = %d\n", | |
| + mdt_obd_name(mdt), rc); | |
| + GOTO(out_put_tgtdir, rc); | |
| + } | |
| + } | |
| + | |
| rc = mdt_rename_determine_lock_order(info, msrcdir, mtgtdir); | |
| if (rc < 0) | |
| - GOTO(out_put_tgtdir, rc); | |
| + GOTO(out_unlock_rename, rc); | |
| reverse = rc; | |
| @@ -2360,7 +2362,7 @@ relock: | |
| rc = mdt_object_lock_save(info, mtgtdir, lh_tgtdirp, 1, | |
| cos_incompat); | |
| if (rc) | |
| - GOTO(out_put_tgtdir, rc); | |
| + GOTO(out_unlock_rename, rc); | |
| OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5); | |
| @@ -2368,13 +2370,13 @@ relock: | |
| cos_incompat); | |
| if (rc != 0) { | |
| mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc); | |
| - GOTO(out_put_tgtdir, rc); | |
| + GOTO(out_unlock_rename, rc); | |
| } | |
| } else { | |
| rc = mdt_object_lock_save(info, msrcdir, lh_srcdirp, 0, | |
| cos_incompat); | |
| if (rc) | |
| - GOTO(out_put_tgtdir, rc); | |
| + GOTO(out_unlock_rename, rc); | |
| OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5); | |
| @@ -2390,7 +2392,7 @@ relock: | |
| } | |
| if (rc != 0) { | |
| mdt_object_unlock(info, msrcdir, lh_srcdirp, rc); | |
| - GOTO(out_put_tgtdir, rc); | |
| + GOTO(out_unlock_rename, rc); | |
| } | |
| } | |
| @@ -2420,6 +2422,9 @@ relock: | |
| GOTO(out_put_old, rc = -ENOENT); | |
| } | |
| + if (mdt_object_remote(mold) && !mdt->mdt_enable_remote_rename) | |
| + GOTO(out_put_old, rc = -EXDEV); | |
| + | |
| /* Check if @mtgtdir is subdir of @mold, before locking child | |
| * to avoid reverse locking. */ | |
| if (mtgtdir != msrcdir) { | |
| @@ -2612,13 +2617,13 @@ out_put_old: | |
| out_unlock_parents: | |
| mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc); | |
| mdt_object_unlock(info, msrcdir, lh_srcdirp, rc); | |
| +out_unlock_rename: | |
| + if (lustre_handle_is_used(&rename_lh)) | |
| + mdt_rename_unlock(&rename_lh); | |
| out_put_tgtdir: | |
| mdt_object_put(info->mti_env, mtgtdir); | |
| out_put_srcdir: | |
| mdt_object_put(info->mti_env, msrcdir); | |
| -out_unlock_rename: | |
| - if (lustre_handle_is_used(&rename_lh)) | |
| - mdt_rename_unlock(&rename_lh); | |
| /* If 'discard' is set then new_fid must exits. | |
| * DOM data discard need neither object nor lock, | |
| diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh | |
| index 8b1bc51..a2f7735 100755 | |
| --- a/lustre/tests/sanity.sh | |
| +++ b/lustre/tests/sanity.sh | |
| @@ -1287,6 +1287,36 @@ test_24y() { | |
| } | |
| run_test 24y "rename/link on the same dir should succeed" | |
| +test_24z() { | |
| + [[ $MDSCOUNT -lt 2 ]] && skip_env "needs >= 2 MDTs" | |
| + [[ $MDS1_VERSION -lt $(version_code 2.12.51) ]] && | |
| + skip "Need MDS version at least 2.12.51" | |
| + | |
| + local index | |
| + | |
| + for index in 0 1; do | |
| + $LFS mkdir -i $index $DIR/$tdir.$index || error "mkdir failed" | |
| + touch $DIR/$tdir.0/$tfile.$index || error "touch failed" | |
| + done | |
| + | |
| + mv $DIR/$tdir.0/$tfile.0 $DIR/$tdir.1 || error "mv $tfile.0 failed" | |
| + | |
| + index=$($LFS getstripe -m $DIR/$tdir.1/$tfile.0) | |
| + [ $index -eq 0 ] || error "$tfile.0 is on MDT$index" | |
| + | |
| + local mdts=$(comma_list $(mdts_nodes)) | |
| + | |
| + do_nodes $mdts $LCTL set_param mdt.*.enable_remote_rename=0 | |
| + stack_trap "do_nodes $mdts $LCTL \ | |
| + set_param mdt.*.enable_remote_rename=1" EXIT | |
| + | |
| + mv $DIR/$tdir.0/$tfile.1 $DIR/$tdir.1 || error "mv $tfile.1 failed" | |
| + | |
| + index=$($LFS getstripe -m $DIR/$tdir.1/$tfile.1) | |
| + [ $index -eq 1 ] || error "$tfile.1 is on MDT$index" | |
| +} | |
| +run_test 24z "cross-MDT rename is done as cp" | |
| + | |
| test_24A() { # LU-3182 | |
| local NFILES=5000 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment