From b8498f7b08c00735659a5b8435c45c0b6ddd4bd8 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 14 Feb 2025 02:26:04 +0800 Subject: [PATCH] DAOS-16876 vos: discard invalid DTX when commit or abort - b26 When commit or abort a DTX, we will check whether it is a valid entry or not. For invalid case, we will discard it with warning message and increase related metrics counter. It may be not perfect solution, but it is efficient to help the user to cleanup system efficiently. Signed-off-by: Jeff Olivier Signed-off-by: Fan Yong --- src/include/daos_srv/evtree.h | 13 +++++++ src/vos/evt_priv.h | 2 +- src/vos/evtree.c | 10 ++++++ src/vos/ilog.c | 40 ++++++++++++--------- src/vos/tests/vts_ilog.c | 18 ++++++++++ src/vos/vos_common.c | 6 ++++ src/vos/vos_dtx.c | 66 ++++++++++++++++++++++++++--------- src/vos/vos_ilog.c | 3 ++ src/vos/vos_internal.h | 15 ++++++++ src/vos/vos_obj_index.c | 2 ++ src/vos/vos_tls.h | 2 ++ src/vos/vos_tree.c | 11 ++++++ 12 files changed, 154 insertions(+), 34 deletions(-) diff --git a/src/include/daos_srv/evtree.h b/src/include/daos_srv/evtree.h index 63224259ccc..8ae4f12638e 100644 --- a/src/include/daos_srv/evtree.h +++ b/src/include/daos_srv/evtree.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -810,4 +811,16 @@ evt_feats_get(struct evt_root *root) */ int evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats); +/** Validate the provided evt. + * + * Note: It is designed for catastrophic recovery. Not to perform at run-time. + * + * \param evt[in] + * \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to + * + * \return true if evt is valid. + **/ +bool +evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid); + #endif /* __DAOS_EV_TREE_H__ */ diff --git a/src/vos/evt_priv.h b/src/vos/evt_priv.h index e855a9c74b2..0150259e987 100644 --- a/src/vos/evt_priv.h +++ b/src/vos/evt_priv.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -115,7 +116,6 @@ struct evt_context { umem_off2ptr(evt_umm(tcx), offset) #define EVT_NODE_MAGIC 0xf00d -#define EVT_DESC_MAGIC 0xbeefdead /** Convert an offset to a evtree node descriptor * \param[IN] tcx Tree context diff --git a/src/vos/evtree.c b/src/vos/evtree.c index d635453f8b2..883e3820bc6 100644 --- a/src/vos/evtree.c +++ b/src/vos/evtree.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -4086,3 +4087,12 @@ evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats) return rc; } +bool +evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid) +{ + if (evt == NULL || evt->dc_magic != EVT_DESC_MAGIC) { + return false; + } + + return (evt->dc_dtx == dtx_lid); +} diff --git a/src/vos/ilog.c b/src/vos/ilog.c index 1d1d6508087..b8eae2a70b9 100644 --- a/src/vos/ilog.c +++ b/src/vos/ilog.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -390,16 +391,18 @@ ilog_create(struct umem_instance *umm, struct ilog_df *root) return rc; } -#define ILOG_ASSERT_VALID(root_df) \ - do { \ - struct ilog_root *_root; \ - \ - _root = (struct ilog_root *)(root_df); \ - D_ASSERTF((_root != NULL) && \ - ILOG_MAGIC_VALID(_root->lr_magic), \ - "Invalid ilog root detected %p magic=%#x\n", \ - _root, _root == NULL ? 0 : _root->lr_magic); \ - } while (0) +#define ILOG_CHECK_VALID(root_df) \ + ({ \ + struct ilog_root *_root = NULL; \ + D_ASSERT((root_df) != NULL); \ + _root = (struct ilog_root *)(root_df); \ + if (!ILOG_MAGIC_VALID(_root->lr_magic)) { \ + D_WARN("Invalid ilog root detected %p magic=%#x\n", _root, \ + _root == NULL ? 0 : _root->lr_magic); \ + _root = NULL; \ + } \ + _root != NULL; \ + }) int ilog_open(struct umem_instance *umm, struct ilog_df *root, @@ -408,7 +411,8 @@ ilog_open(struct umem_instance *umm, struct ilog_df *root, struct ilog_context *lctx; int rc; - ILOG_ASSERT_VALID(root); + if (!ILOG_CHECK_VALID(root)) + return -DER_NONEXIST; rc = ilog_ctx_create(umm, (struct ilog_root *)root, cbs, &lctx); if (rc != 0) @@ -474,7 +478,7 @@ ilog_destroy(struct umem_instance *umm, int rc = 0; struct ilog_array_cache cache = {0}; - ILOG_ASSERT_VALID(root); + D_ASSERT(ILOG_CHECK_VALID(root)); rc = ilog_tx_begin(&lctx); if (rc != 0) { @@ -984,8 +988,12 @@ ilog_modify(daos_handle_t loh, const struct ilog_id *id_in, "%s in incarnation log " DF_X64 " status: rc=" DF_RC " tree_version: %d\n", opc_str[opc], id_in->id_epoch, DP_RC(rc), ilog_mag2ver(lctx->ic_root->lr_magic)); - if (rc == 0 && version != ilog_mag2ver(lctx->ic_root->lr_magic) && - (opc == ILOG_OP_PERSIST || opc == ILOG_OP_ABORT)) { + if (rc == 0 && opc != ILOG_OP_UPDATE) { + if (version == ilog_mag2ver(lctx->ic_root->lr_magic)) { + D_WARN("ilog entry on %s doesn't exist\n", opc_str[opc]); + return -DER_NONEXIST; + } + /** If we persisted or aborted an entry successfully, * invoke the callback, if applicable but without * deregistration @@ -1213,7 +1221,7 @@ ilog_fetch(struct umem_instance *umm, struct ilog_df *root_df, int rc = 0; bool retry; - ILOG_ASSERT_VALID(root_df); + D_ASSERT(ILOG_CHECK_VALID(root_df)); root = (struct ilog_root *)root_df; @@ -1539,7 +1547,7 @@ ilog_aggregate(struct umem_instance *umm, struct ilog_df *ilog, root = lctx->ic_root; - ILOG_ASSERT_VALID(root); + D_ASSERT(ILOG_CHECK_VALID(root)); D_ASSERT(!ilog_empty(root)); /* ilog_fetch should have failed */ diff --git a/src/vos/tests/vts_ilog.c b/src/vos/tests/vts_ilog.c index c696ff0b487..4ee2ea41c68 100644 --- a/src/vos/tests/vts_ilog.c +++ b/src/vos/tests/vts_ilog.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -530,6 +531,12 @@ ilog_test_update(void **state) rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries); assert_rc_equal(rc, 0); + /* Test non-existent tx */ + id.id_epoch = epoch; + id.id_tx_id = current_tx_id.id_tx_id + 4000; + rc = ilog_persist(loh, &id); + assert_rc_equal(rc, -DER_NONEXIST); + /* Commit the punch ilog. */ id.id_epoch = epoch; id.id_tx_id = current_tx_id.id_tx_id; @@ -668,6 +675,12 @@ ilog_test_abort(void **state) rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries); assert_rc_equal(rc, 0); + /* Test non-existent tx */ + id = current_tx_id; + id.id_tx_id += 400; + rc = ilog_abort(loh, &id); + assert_rc_equal(rc, -DER_NONEXIST); + id = current_tx_id; rc = ilog_abort(loh, &id); LOG_FAIL(rc, 0, "Failed to abort log entry\n"); @@ -735,6 +748,11 @@ ilog_test_abort(void **state) rc = ilog_destroy(umm, &ilog_callbacks, ilog); assert_rc_equal(rc, 0); + /** Test open of "reallocated" ilog */ + memset(ilog, 0xa1, sizeof(*ilog)); + rc = ilog_open(umm, ilog, &ilog_callbacks, false, &loh); + assert_rc_equal(rc, -DER_NONEXIST); + assert_true(d_list_empty(&fake_tx_list)); ilog_free_root(umm, ilog); } diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 011f8a8ccd5..cedda5dc756 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -581,6 +581,12 @@ vos_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->vtl_invalid_dtx, D_TM_STATS_GAUGE, + "Number of invalid active DTX", "entries", + "io/dtx/invalid/tgt_%u", tgt_id); + if (rc) + D_WARN("Failed to create invalid DTX cnt sensor: " DF_RC "\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, "Number of cached vos object", "entry", "mem/vos/vos_obj_%u/tgt_%u", diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 2cf6ad2ec45..bc9baf92daf 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -1,6 +1,7 @@ /** * (C) Copyright 2019-2024 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -573,7 +574,7 @@ dtx_ilog_rec_release(struct umem_instance *umm, struct vos_container *cont, ilog_close(loh); - if (rc != 0) + if (rc != 0 && rc != -DER_NONEXIST) D_ERROR("Failed to release ilog rec for "DF_DTI", abort %s: "DF_RC"\n", DP_DTI(&DAE_XID(dae)), abort ? "yes" : "no", DP_RC(rc)); @@ -598,6 +599,11 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, struct vos_irec_df *svt; svt = umem_off2ptr(umm, umem_off2offset(rec)); + + if (!vos_irec_is_valid(svt, DAE_LID(dae))) { + rc = -DER_NONEXIST; + break; + } if (abort) { if (DAE_INDEX(dae) != DTX_INDEX_INVAL) { rc = umem_tx_add_ptr(umm, &svt->ir_dtx, @@ -621,6 +627,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, struct evt_desc *evt; evt = umem_off2ptr(umm, umem_off2offset(rec)); + + if (!evt_desc_is_valid(evt, DAE_LID(dae))) { + rc = -DER_NONEXIST; + break; + } + if (abort) { if (DAE_INDEX(dae) != DTX_INDEX_INVAL) { rc = umem_tx_add_ptr(umm, &evt->dc_dtx, @@ -648,6 +660,11 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, break; } + if (unlikely(rc == -DER_NONEXIST)) + D_WARN("DTX record no longer exists, may indicate some corruption: " + DF_DTI " type %u, discard\n", + DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec)); + return rc; } @@ -657,6 +674,8 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab struct umem_instance *umm = vos_cont2umm(cont); struct vos_dtx_act_ent_df *dae_df; struct vos_dtx_blob_df *dbd; + struct vos_tls *tls = vos_tls_get(false); + bool invalid = false; int count; int i; int rc = 0; @@ -685,42 +704,55 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd, DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); - if (dae->dae_records != NULL) { + /* Handle DTX records as FIFO order to find out potential invalid DTX earlier. */ + + if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) + count = DTX_INLINE_REC_CNT; + else + count = DAE_REC_CNT(dae); + + for (i = 0; i < count; i++) { + rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); + if (unlikely(rc == -DER_NONEXIST)) { + d_tm_inc_gauge(tls->vtl_invalid_dtx, 1); + invalid = true; + break; + } + if (rc != 0) + return rc; + } + + if (!invalid && dae->dae_records != NULL) { D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT); D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off)); - for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) { + for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) { rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort); + if (unlikely(rc == -DER_NONEXIST)) { + d_tm_inc_gauge(tls->vtl_invalid_dtx, 1); + invalid = true; + break; + } if (rc != 0) return rc; } + } + if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) { rc = umem_free(umm, dae_df->dae_rec_off); if (rc != 0) return rc; - if (keep_act) { + if (!invalid && keep_act) { rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off)); if (rc != 0) return rc; - dae_df->dae_rec_off = UMOFF_NULL; } - count = DTX_INLINE_REC_CNT; - } else { - D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT); - - count = DAE_REC_CNT(dae); - } - - for (i = count - 1; i >= 0; i--) { - rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); - if (rc != 0) - return rc; } - if (keep_act) { + if (!invalid && keep_act) { /* When re-commit partial committed DTX, the count can be zero. */ if (dae_df->dae_rec_cnt > 0) { rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt, diff --git a/src/vos/vos_ilog.c b/src/vos/vos_ilog.c index 54abf2f407f..758edaec5c9 100644 --- a/src/vos/vos_ilog.c +++ b/src/vos/vos_ilog.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -427,6 +428,7 @@ int vos_ilog_update_(struct vos_container *cont, struct ilog_df *ilog, vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont)); rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh); + D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected"); if (rc != 0) { D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc)); return rc; @@ -522,6 +524,7 @@ vos_ilog_punch_(struct vos_container *cont, struct ilog_df *ilog, punch_log: vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont)); rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh); + D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected"); if (rc != 0) { D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc)); return rc; diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index 1e0a219df0b..05a2ff7afd4 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1,6 +1,7 @@ /** * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -454,6 +455,8 @@ struct vos_dtx_cmt_ent { #define DCE_EPOCH(dce) ((dce)->dce_base.dce_epoch) #define DCE_CMT_TIME(dce) ((dce)->dce_base.dce_cmt_time) +#define EVT_DESC_MAGIC 0xbeefdead + extern uint64_t vos_evt_feats; /** Flags for internal use - Bit 63 can be used for another purpose so as to @@ -1858,4 +1861,16 @@ vos_io_scm(struct vos_pool *pool, daos_iod_type_t type, daos_size_t size, enum v int vos_insert_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t *oid); +/** Validate the provided svt. + * + * Note: It is designed for catastrophic recovery. Not to perform at run-time. + * + * \param svt[in] + * \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to + * + * \return true if svt is valid. + **/ +bool +vos_irec_is_valid(const struct vos_irec_df *svt, uint32_t dtx_lid); + #endif /* __VOS_INTERNAL_H__ */ diff --git a/src/vos/vos_obj_index.c b/src/vos/vos_obj_index.c index b8123672aa4..d47157f45fa 100644 --- a/src/vos/vos_obj_index.c +++ b/src/vos/vos_obj_index.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -289,6 +290,7 @@ vos_oi_find_alloc(struct vos_container *cont, daos_unit_oid_t oid, goto skip_log; vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont)); rc = ilog_open(vos_cont2umm(cont), &obj->vo_ilog, &cbs, dth == NULL, &loh); + D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected"); if (rc != 0) return rc; diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 2fc328457d0..11f45beef17 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -63,6 +64,7 @@ struct vos_tls { bool vtl_hash_set; }; struct d_tm_node_t *vtl_committed; + struct d_tm_node_t *vtl_invalid_dtx; struct d_tm_node_t *vtl_obj_cnt; struct d_tm_node_t *vtl_lru_alloc_size; }; diff --git a/src/vos/vos_tree.c b/src/vos/vos_tree.c index c36fcaa88c5..4beeb7e766f 100644 --- a/src/vos/vos_tree.c +++ b/src/vos/vos_tree.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1292,3 +1293,13 @@ obj_tree_find_attr(unsigned tree_class, int flags) return NULL; } } + +bool +vos_irec_is_valid(const struct vos_irec_df *svt, uint32_t dtx_lid) +{ + if (svt == NULL) { + return false; + } + + return svt->ir_dtx == dtx_lid; +}