diff --git a/src/tests/ftest/util/telemetry_utils.py b/src/tests/ftest/util/telemetry_utils.py index cc0610af872..0979058b6e9 100644 --- a/src/tests/ftest/util/telemetry_utils.py +++ b/src/tests/ftest/util/telemetry_utils.py @@ -1,5 +1,7 @@ """ (C) Copyright 2021-2024 Intel Corporation. +(C) Copyright 2025 Hewlett Packard Enterprise Development LP +(C) Copyright 2025 Google LLC SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -207,6 +209,8 @@ class TelemetryUtils(): _gen_stats_metrics("engine_io_dtx_committable") ENGINE_IO_DTX_COMMITTED_METRICS = \ _gen_stats_metrics("engine_io_dtx_committed") + ENGINE_IO_DTX_INVALID_METRICS = \ + _gen_stats_metrics("engine_io_dtx_invalid") ENGINE_IO_LATENCY_FETCH_METRICS = \ _gen_stats_metrics("engine_io_latency_fetch") ENGINE_IO_LATENCY_BULK_FETCH_METRICS = \ @@ -310,6 +314,7 @@ class TelemetryUtils(): ENGINE_IO_METRICS = ENGINE_IO_DTX_ASYNC_CMT_LAT_METRICS +\ ENGINE_IO_DTX_COMMITTABLE_METRICS +\ ENGINE_IO_DTX_COMMITTED_METRICS +\ + ENGINE_IO_DTX_INVALID_METRICS +\ ENGINE_IO_LATENCY_FETCH_METRICS +\ ENGINE_IO_LATENCY_BULK_FETCH_METRICS +\ ENGINE_IO_LATENCY_VOS_FETCH_METRICS +\ diff --git a/src/vos/evt_priv.h b/src/vos/evt_priv.h index e855a9c74b2..0150259e987 100644 --- a/src/vos/evt_priv.h +++ b/src/vos/evt_priv.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2017-2022 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -115,7 +116,6 @@ struct evt_context { umem_off2ptr(evt_umm(tcx), offset) #define EVT_NODE_MAGIC 0xf00d -#define EVT_DESC_MAGIC 0xbeefdead /** Convert an offset to a evtree node descriptor * \param[IN] tcx Tree context diff --git a/src/vos/ilog.c b/src/vos/ilog.c index 58e48dffd83..e1698058bac 100644 --- a/src/vos/ilog.c +++ b/src/vos/ilog.c @@ -1,6 +1,7 @@ /** * (C) Copyright 2019-2024 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -345,16 +346,18 @@ ilog_create(struct umem_instance *umm, struct ilog_df *root) return rc; } -#define ILOG_ASSERT_VALID(root_df) \ - do { \ - struct ilog_root *_root; \ - \ - _root = (struct ilog_root *)(root_df); \ - D_ASSERTF((_root != NULL) && \ - ILOG_MAGIC_VALID(_root->lr_magic), \ - "Invalid ilog root detected %p magic=%#x\n", \ - _root, _root == NULL ? 0 : _root->lr_magic); \ - } while (0) +#define ILOG_CHECK_VALID(root_df) \ + ({ \ + struct ilog_root *_root = NULL; \ + D_ASSERT((root_df) != NULL); \ + _root = (struct ilog_root *)(root_df); \ + if (!ILOG_MAGIC_VALID(_root->lr_magic)) { \ + D_WARN("Invalid ilog root detected %p magic=%#x\n", _root, \ + _root == NULL ? 0 : _root->lr_magic); \ + _root = NULL; \ + } \ + _root != NULL; \ + }) int ilog_open(struct umem_instance *umm, struct ilog_df *root, @@ -363,7 +366,8 @@ ilog_open(struct umem_instance *umm, struct ilog_df *root, struct ilog_context *lctx; int rc; - ILOG_ASSERT_VALID(root); + if (!ILOG_CHECK_VALID(root)) + return -DER_NONEXIST; rc = ilog_ctx_create(umm, (struct ilog_root *)root, cbs, &lctx); if (rc != 0) @@ -429,7 +433,7 @@ ilog_destroy(struct umem_instance *umm, int rc = 0; struct ilog_array_cache cache = {0}; - ILOG_ASSERT_VALID(root); + D_ASSERT(ILOG_CHECK_VALID(root)); rc = ilog_tx_begin(&lctx); if (rc != 0) { @@ -939,8 +943,12 @@ ilog_modify(daos_handle_t loh, const struct ilog_id *id_in, "%s in incarnation log " DF_X64 " status: rc=" DF_RC " tree_version: %d\n", opc_str[opc], id_in->id_epoch, DP_RC(rc), ilog_mag2ver(lctx->ic_root->lr_magic)); - if (rc == 0 && version != ilog_mag2ver(lctx->ic_root->lr_magic) && - (opc == ILOG_OP_PERSIST || opc == ILOG_OP_ABORT)) { + if (rc == 0 && opc != ILOG_OP_UPDATE) { + if (version == ilog_mag2ver(lctx->ic_root->lr_magic)) { + D_WARN("ilog entry on %s doesn't exist\n", opc_str[opc]); + return -DER_NONEXIST; + } + /** If we persisted or aborted an entry successfully, * invoke the callback, if applicable but without * deregistration @@ -1168,7 +1176,7 @@ ilog_fetch(struct umem_instance *umm, struct ilog_df *root_df, int rc = 0; bool retry; - ILOG_ASSERT_VALID(root_df); + D_ASSERT(ILOG_CHECK_VALID(root_df)); root = (struct ilog_root *)root_df; @@ -1494,7 +1502,7 @@ ilog_aggregate(struct umem_instance *umm, struct ilog_df *ilog, root = lctx->ic_root; - ILOG_ASSERT_VALID(root); + D_ASSERT(ILOG_CHECK_VALID(root)); D_ASSERT(!ilog_empty(root)); /* ilog_fetch should have failed */ diff --git a/src/vos/tests/vts_ilog.c b/src/vos/tests/vts_ilog.c index 2d0072efc2c..546f02a6d1a 100644 --- a/src/vos/tests/vts_ilog.c +++ b/src/vos/tests/vts_ilog.c @@ -1,6 +1,7 @@ /** * (C) Copyright 2019-2024 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -532,6 +533,12 @@ ilog_test_update(void **state) rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries); assert_rc_equal(rc, 0); + /* Test non-existent tx */ + id.id_epoch = epoch; + id.id_tx_id = current_tx_id.id_tx_id + 4000; + rc = ilog_persist(loh, &id); + assert_rc_equal(rc, -DER_NONEXIST); + /* Commit the punch ilog. */ id.id_epoch = epoch; id.id_tx_id = current_tx_id.id_tx_id; @@ -670,6 +677,12 @@ ilog_test_abort(void **state) rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries); assert_rc_equal(rc, 0); + /* Test non-existent tx */ + id = current_tx_id; + id.id_tx_id += 400; + rc = ilog_abort(loh, &id); + assert_rc_equal(rc, -DER_NONEXIST); + id = current_tx_id; rc = ilog_abort(loh, &id); LOG_FAIL(rc, 0, "Failed to abort log entry\n"); @@ -737,6 +750,11 @@ ilog_test_abort(void **state) rc = ilog_destroy(umm, &ilog_callbacks, ilog); assert_rc_equal(rc, 0); + /** Test open of "reallocated" ilog */ + memset(ilog, 0xa1, sizeof(*ilog)); + rc = ilog_open(umm, ilog, &ilog_callbacks, false, &loh); + assert_rc_equal(rc, -DER_NONEXIST); + assert_true(d_list_empty(&fake_tx_list)); ilog_free_root(umm, ilog); } diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index 948b3a2f6d0..b3f58f6cf31 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -1,6 +1,7 @@ /** * (C) Copyright 2016-2024 Intel Corporation. * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -580,6 +581,12 @@ vos_tls_init(int tags, int xs_id, int tgt_id) D_WARN("Failed to create committed cnt sensor: "DF_RC"\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->vtl_invalid_dtx, D_TM_STATS_GAUGE, + "Number of invalid active DTX", "entries", + "io/dtx/invalid/tgt_%u", tgt_id); + if (rc) + D_WARN("Failed to create invalid DTX cnt sensor: " DF_RC "\n", DP_RC(rc)); + rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE, "Number of cached vos object", "entry", "mem/vos/vos_obj_%u/tgt_%u", diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 2127db776c9..1eeea28ade1 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -1,6 +1,7 @@ /** * (C) Copyright 2019-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -546,7 +547,7 @@ dtx_ilog_rec_release(struct umem_instance *umm, struct vos_container *cont, ilog_close(loh); - if (rc != 0) + if (rc != 0 && rc != -DER_NONEXIST) D_ERROR("Failed to release ilog rec for "DF_DTI", abort %s: "DF_RC"\n", DP_DTI(&DAE_XID(dae)), abort ? "yes" : "no", DP_RC(rc)); @@ -571,6 +572,11 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, struct vos_irec_df *svt; svt = umem_off2ptr(umm, umem_off2offset(rec)); + + if (!vos_irec_is_valid(svt, DAE_LID(dae))) { + rc = -DER_NONEXIST; + break; + } if (abort) { if (DAE_INDEX(dae) != DTX_INDEX_INVAL) { rc = umem_tx_add_ptr(umm, &svt->ir_dtx, @@ -594,6 +600,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, struct evt_desc *evt; evt = umem_off2ptr(umm, umem_off2offset(rec)); + + if (!evt_desc_is_valid(evt, DAE_LID(dae))) { + rc = -DER_NONEXIST; + break; + } + if (abort) { if (DAE_INDEX(dae) != DTX_INDEX_INVAL) { rc = umem_tx_add_ptr(umm, &evt->dc_dtx, @@ -621,6 +633,15 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, break; } + if (rc == -DER_NONEXIST) { + struct vos_tls *tls = vos_tls_get(false); + + D_WARN("DTX record no longer exists, may indicate some corruption: " + DF_DTI " type %u, discard\n", + DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec)); + d_tm_inc_gauge(tls->vtl_invalid_dtx, 1); + } + return rc; } @@ -630,6 +651,7 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab struct umem_instance *umm = vos_cont2umm(cont); struct vos_dtx_act_ent_df *dae_df; struct vos_dtx_blob_df *dbd; + bool invalid = false; int count; int i; int rc = 0; @@ -658,42 +680,52 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd, DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id)); - if (dae->dae_records != NULL) { + /* Handle DTX records as FIFO order to find out potential invalid DTX earlier. */ + + if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT) + count = DTX_INLINE_REC_CNT; + else + count = DAE_REC_CNT(dae); + + for (i = 0; i < count; i++) { + rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); + if (unlikely(rc == -DER_NONEXIST)) { + invalid = true; + break; + } + if (rc != 0) + return rc; + } + + if (!invalid && dae->dae_records != NULL) { D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT); D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off)); - for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) { + for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) { rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort); + if (unlikely(rc == -DER_NONEXIST)) { + invalid = true; + break; + } if (rc != 0) return rc; } + } + if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) { rc = umem_free(umm, dae_df->dae_rec_off); if (rc != 0) return rc; - if (keep_act) { + if (!invalid && keep_act) { rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off)); if (rc != 0) return rc; - dae_df->dae_rec_off = UMOFF_NULL; } - - count = DTX_INLINE_REC_CNT; - } else { - D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT); - - count = DAE_REC_CNT(dae); } - for (i = count - 1; i >= 0; i--) { - rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort); - if (rc != 0) - return rc; - } - - if (keep_act) { + if (!invalid && keep_act) { /* When re-commit partial committed DTX, the count can be zero. */ if (dae_df->dae_rec_cnt > 0) { rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt, @@ -720,6 +752,9 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab return 0; } + if (invalid) + rc = 0; + if (!UMOFF_IS_NULL(dae_df->dae_mbs_off)) { /* dae_mbs_off will be invalid via flag DTE_INVALID. */ rc = umem_free(umm, dae_df->dae_mbs_off); diff --git a/src/vos/vos_ilog.c b/src/vos/vos_ilog.c index 54abf2f407f..758edaec5c9 100644 --- a/src/vos/vos_ilog.c +++ b/src/vos/vos_ilog.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -427,6 +428,7 @@ int vos_ilog_update_(struct vos_container *cont, struct ilog_df *ilog, vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont)); rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh); + D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected"); if (rc != 0) { D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc)); return rc; @@ -522,6 +524,7 @@ vos_ilog_punch_(struct vos_container *cont, struct ilog_df *ilog, punch_log: vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont)); rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh); + D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected"); if (rc != 0) { D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc)); return rc; diff --git a/src/vos/vos_internal.h b/src/vos/vos_internal.h index b5ada253806..4a398c1de66 100644 --- a/src/vos/vos_internal.h +++ b/src/vos/vos_internal.h @@ -1,6 +1,7 @@ /** * (C) Copyright 2016-2024 Intel Corporation. - * (C) Copyright 2025 Hewlett Packard Enterprise Development LP. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -514,6 +515,8 @@ struct vos_dtx_cmt_ent { #define DCE_EPOCH(dce) ((dce)->dce_base.dce_epoch) #define DCE_CMT_TIME(dce) ((dce)->dce_base.dce_cmt_time) +#define EVT_DESC_MAGIC 0xbeefdead + extern uint64_t vos_evt_feats; /** Flags for internal use - Bit 63 can be used for another purpose so as to diff --git a/src/vos/vos_obj_index.c b/src/vos/vos_obj_index.c index 7a7735d78d0..fab7d718e24 100644 --- a/src/vos/vos_obj_index.c +++ b/src/vos/vos_obj_index.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2025 Google LLC * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent @@ -338,6 +339,7 @@ vos_oi_find_alloc(struct vos_container *cont, daos_unit_oid_t oid, if (log) { vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont)); rc = ilog_open(vos_cont2umm(cont), &obj->vo_ilog, &cbs, dth == NULL, &loh); + D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected"); if (rc != 0) return rc; diff --git a/src/vos/vos_tls.h b/src/vos/vos_tls.h index 2fc328457d0..e34231877af 100644 --- a/src/vos/vos_tls.h +++ b/src/vos/vos_tls.h @@ -1,5 +1,7 @@ /** * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP + * (C) Copyright 2025 Google LLC * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -63,6 +65,7 @@ struct vos_tls { bool vtl_hash_set; }; struct d_tm_node_t *vtl_committed; + struct d_tm_node_t *vtl_invalid_dtx; struct d_tm_node_t *vtl_obj_cnt; struct d_tm_node_t *vtl_lru_alloc_size; };