Skip to content

Commit

Permalink
DAOS-16876 vos: Improve error reporting on corruption crash (#15747)
Browse files Browse the repository at this point in the history
If the records referenced by a DTX have been removed from the
tree, the data will no longer match. We can't detect this in
all cases but we can avoid overwriting when we do detect it.

Signed-off-by: Jeff Olivier <[email protected]>
Co-authored-by: Fan Yong <[email protected]>
  • Loading branch information
jolivier23 and Nasf-Fan authored Feb 19, 2025
1 parent 70a8967 commit e98c5ac
Show file tree
Hide file tree
Showing 10 changed files with 121 additions and 37 deletions.
5 changes: 5 additions & 0 deletions src/tests/ftest/util/telemetry_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""
(C) Copyright 2021-2024 Intel Corporation.
(C) Copyright 2025 Hewlett Packard Enterprise Development LP
(C) Copyright 2025 Google LLC
SPDX-License-Identifier: BSD-2-Clause-Patent
"""
Expand Down Expand Up @@ -207,6 +209,8 @@ class TelemetryUtils():
_gen_stats_metrics("engine_io_dtx_committable")
ENGINE_IO_DTX_COMMITTED_METRICS = \
_gen_stats_metrics("engine_io_dtx_committed")
ENGINE_IO_DTX_INVALID_METRICS = \
_gen_stats_metrics("engine_io_dtx_invalid")
ENGINE_IO_LATENCY_FETCH_METRICS = \
_gen_stats_metrics("engine_io_latency_fetch")
ENGINE_IO_LATENCY_BULK_FETCH_METRICS = \
Expand Down Expand Up @@ -310,6 +314,7 @@ class TelemetryUtils():
ENGINE_IO_METRICS = ENGINE_IO_DTX_ASYNC_CMT_LAT_METRICS +\
ENGINE_IO_DTX_COMMITTABLE_METRICS +\
ENGINE_IO_DTX_COMMITTED_METRICS +\
ENGINE_IO_DTX_INVALID_METRICS +\
ENGINE_IO_LATENCY_FETCH_METRICS +\
ENGINE_IO_LATENCY_BULK_FETCH_METRICS +\
ENGINE_IO_LATENCY_VOS_FETCH_METRICS +\
Expand Down
2 changes: 1 addition & 1 deletion src/vos/evt_priv.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2017-2022 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -115,7 +116,6 @@ struct evt_context {
umem_off2ptr(evt_umm(tcx), offset)

#define EVT_NODE_MAGIC 0xf00d
#define EVT_DESC_MAGIC 0xbeefdead

/** Convert an offset to a evtree node descriptor
* \param[IN] tcx Tree context
Expand Down
40 changes: 24 additions & 16 deletions src/vos/ilog.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -345,16 +346,18 @@ ilog_create(struct umem_instance *umm, struct ilog_df *root)
return rc;
}

#define ILOG_ASSERT_VALID(root_df) \
do { \
struct ilog_root *_root; \
\
_root = (struct ilog_root *)(root_df); \
D_ASSERTF((_root != NULL) && \
ILOG_MAGIC_VALID(_root->lr_magic), \
"Invalid ilog root detected %p magic=%#x\n", \
_root, _root == NULL ? 0 : _root->lr_magic); \
} while (0)
#define ILOG_CHECK_VALID(root_df) \
({ \
struct ilog_root *_root = NULL; \
D_ASSERT((root_df) != NULL); \
_root = (struct ilog_root *)(root_df); \
if (!ILOG_MAGIC_VALID(_root->lr_magic)) { \
D_WARN("Invalid ilog root detected %p magic=%#x\n", _root, \
_root == NULL ? 0 : _root->lr_magic); \
_root = NULL; \
} \
_root != NULL; \
})

int
ilog_open(struct umem_instance *umm, struct ilog_df *root,
Expand All @@ -363,7 +366,8 @@ ilog_open(struct umem_instance *umm, struct ilog_df *root,
struct ilog_context *lctx;
int rc;

ILOG_ASSERT_VALID(root);
if (!ILOG_CHECK_VALID(root))
return -DER_NONEXIST;

rc = ilog_ctx_create(umm, (struct ilog_root *)root, cbs, &lctx);
if (rc != 0)
Expand Down Expand Up @@ -429,7 +433,7 @@ ilog_destroy(struct umem_instance *umm,
int rc = 0;
struct ilog_array_cache cache = {0};

ILOG_ASSERT_VALID(root);
D_ASSERT(ILOG_CHECK_VALID(root));

rc = ilog_tx_begin(&lctx);
if (rc != 0) {
Expand Down Expand Up @@ -939,8 +943,12 @@ ilog_modify(daos_handle_t loh, const struct ilog_id *id_in,
"%s in incarnation log " DF_X64 " status: rc=" DF_RC " tree_version: %d\n",
opc_str[opc], id_in->id_epoch, DP_RC(rc), ilog_mag2ver(lctx->ic_root->lr_magic));

if (rc == 0 && version != ilog_mag2ver(lctx->ic_root->lr_magic) &&
(opc == ILOG_OP_PERSIST || opc == ILOG_OP_ABORT)) {
if (rc == 0 && opc != ILOG_OP_UPDATE) {
if (version == ilog_mag2ver(lctx->ic_root->lr_magic)) {
D_WARN("ilog entry on %s doesn't exist\n", opc_str[opc]);
return -DER_NONEXIST;
}

/** If we persisted or aborted an entry successfully,
* invoke the callback, if applicable but without
* deregistration
Expand Down Expand Up @@ -1168,7 +1176,7 @@ ilog_fetch(struct umem_instance *umm, struct ilog_df *root_df,
int rc = 0;
bool retry;

ILOG_ASSERT_VALID(root_df);
D_ASSERT(ILOG_CHECK_VALID(root_df));

root = (struct ilog_root *)root_df;

Expand Down Expand Up @@ -1494,7 +1502,7 @@ ilog_aggregate(struct umem_instance *umm, struct ilog_df *ilog,

root = lctx->ic_root;

ILOG_ASSERT_VALID(root);
D_ASSERT(ILOG_CHECK_VALID(root));

D_ASSERT(!ilog_empty(root)); /* ilog_fetch should have failed */

Expand Down
18 changes: 18 additions & 0 deletions src/vos/tests/vts_ilog.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -532,6 +533,12 @@ ilog_test_update(void **state)
rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries);
assert_rc_equal(rc, 0);

/* Test non-existent tx */
id.id_epoch = epoch;
id.id_tx_id = current_tx_id.id_tx_id + 4000;
rc = ilog_persist(loh, &id);
assert_rc_equal(rc, -DER_NONEXIST);

/* Commit the punch ilog. */
id.id_epoch = epoch;
id.id_tx_id = current_tx_id.id_tx_id;
Expand Down Expand Up @@ -670,6 +677,12 @@ ilog_test_abort(void **state)
rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries);
assert_rc_equal(rc, 0);

/* Test non-existent tx */
id = current_tx_id;
id.id_tx_id += 400;
rc = ilog_abort(loh, &id);
assert_rc_equal(rc, -DER_NONEXIST);

id = current_tx_id;
rc = ilog_abort(loh, &id);
LOG_FAIL(rc, 0, "Failed to abort log entry\n");
Expand Down Expand Up @@ -737,6 +750,11 @@ ilog_test_abort(void **state)
rc = ilog_destroy(umm, &ilog_callbacks, ilog);
assert_rc_equal(rc, 0);

/** Test open of "reallocated" ilog */
memset(ilog, 0xa1, sizeof(*ilog));
rc = ilog_open(umm, ilog, &ilog_callbacks, false, &loh);
assert_rc_equal(rc, -DER_NONEXIST);

assert_true(d_list_empty(&fake_tx_list));
ilog_free_root(umm, ilog);
}
Expand Down
7 changes: 7 additions & 0 deletions src/vos/vos_common.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -580,6 +581,12 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_invalid_dtx, D_TM_STATS_GAUGE,
"Number of invalid active DTX", "entries",
"io/dtx/invalid/tgt_%u", tgt_id);
if (rc)
D_WARN("Failed to create invalid DTX cnt sensor: " DF_RC "\n", DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE,
"Number of cached vos object", "entry",
"mem/vos/vos_obj_%u/tgt_%u",
Expand Down
73 changes: 54 additions & 19 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -546,7 +547,7 @@ dtx_ilog_rec_release(struct umem_instance *umm, struct vos_container *cont,

ilog_close(loh);

if (rc != 0)
if (rc != 0 && rc != -DER_NONEXIST)
D_ERROR("Failed to release ilog rec for "DF_DTI", abort %s: "DF_RC"\n",
DP_DTI(&DAE_XID(dae)), abort ? "yes" : "no", DP_RC(rc));

Expand All @@ -571,6 +572,11 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
struct vos_irec_df *svt;

svt = umem_off2ptr(umm, umem_off2offset(rec));

if (!vos_irec_is_valid(svt, DAE_LID(dae))) {
rc = -DER_NONEXIST;
break;
}
if (abort) {
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) {
rc = umem_tx_add_ptr(umm, &svt->ir_dtx,
Expand All @@ -594,6 +600,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
struct evt_desc *evt;

evt = umem_off2ptr(umm, umem_off2offset(rec));

if (!evt_desc_is_valid(evt, DAE_LID(dae))) {
rc = -DER_NONEXIST;
break;
}

if (abort) {
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) {
rc = umem_tx_add_ptr(umm, &evt->dc_dtx,
Expand Down Expand Up @@ -621,6 +633,15 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
break;
}

if (rc == -DER_NONEXIST) {
struct vos_tls *tls = vos_tls_get(false);

D_WARN("DTX record no longer exists, may indicate some corruption: "
DF_DTI " type %u, discard\n",
DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec));
d_tm_inc_gauge(tls->vtl_invalid_dtx, 1);
}

return rc;
}

Expand All @@ -630,6 +651,7 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
struct umem_instance *umm = vos_cont2umm(cont);
struct vos_dtx_act_ent_df *dae_df;
struct vos_dtx_blob_df *dbd;
bool invalid = false;
int count;
int i;
int rc = 0;
Expand Down Expand Up @@ -658,42 +680,52 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd,
DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id));

if (dae->dae_records != NULL) {
/* Handle DTX records as FIFO order to find out potential invalid DTX earlier. */

if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT)
count = DTX_INLINE_REC_CNT;
else
count = DAE_REC_CNT(dae);

for (i = 0; i < count; i++) {
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort);
if (unlikely(rc == -DER_NONEXIST)) {
invalid = true;
break;
}
if (rc != 0)
return rc;
}

if (!invalid && dae->dae_records != NULL) {
D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT);
D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off));

for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) {
for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) {
rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort);
if (unlikely(rc == -DER_NONEXIST)) {
invalid = true;
break;
}
if (rc != 0)
return rc;
}
}

if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) {
rc = umem_free(umm, dae_df->dae_rec_off);
if (rc != 0)
return rc;

if (keep_act) {
if (!invalid && keep_act) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off));
if (rc != 0)
return rc;

dae_df->dae_rec_off = UMOFF_NULL;
}

count = DTX_INLINE_REC_CNT;
} else {
D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT);

count = DAE_REC_CNT(dae);
}

for (i = count - 1; i >= 0; i--) {
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort);
if (rc != 0)
return rc;
}

if (keep_act) {
if (!invalid && keep_act) {
/* When re-commit partial committed DTX, the count can be zero. */
if (dae_df->dae_rec_cnt > 0) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt,
Expand All @@ -720,6 +752,9 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
return 0;
}

if (invalid)
rc = 0;

if (!UMOFF_IS_NULL(dae_df->dae_mbs_off)) {
/* dae_mbs_off will be invalid via flag DTE_INVALID. */
rc = umem_free(umm, dae_df->dae_mbs_off);
Expand Down
3 changes: 3 additions & 0 deletions src/vos/vos_ilog.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -427,6 +428,7 @@ int vos_ilog_update_(struct vos_container *cont, struct ilog_df *ilog,

vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont));
rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh);
D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected");
if (rc != 0) {
D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc));
return rc;
Expand Down Expand Up @@ -522,6 +524,7 @@ vos_ilog_punch_(struct vos_container *cont, struct ilog_df *ilog,
punch_log:
vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont));
rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh);
D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected");
if (rc != 0) {
D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc));
return rc;
Expand Down
5 changes: 4 additions & 1 deletion src/vos/vos_internal.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -514,6 +515,8 @@ struct vos_dtx_cmt_ent {
#define DCE_EPOCH(dce) ((dce)->dce_base.dce_epoch)
#define DCE_CMT_TIME(dce) ((dce)->dce_base.dce_cmt_time)

#define EVT_DESC_MAGIC 0xbeefdead

extern uint64_t vos_evt_feats;

/** Flags for internal use - Bit 63 can be used for another purpose so as to
Expand Down
2 changes: 2 additions & 0 deletions src/vos/vos_obj_index.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
Expand Down Expand Up @@ -338,6 +339,7 @@ vos_oi_find_alloc(struct vos_container *cont, daos_unit_oid_t oid,
if (log) {
vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont));
rc = ilog_open(vos_cont2umm(cont), &obj->vo_ilog, &cbs, dth == NULL, &loh);
D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected");
if (rc != 0)
return rc;

Expand Down
Loading

0 comments on commit e98c5ac

Please sign in to comment.