Skip to content

Commit

Permalink
DAOS-16876 vos: discard invalid DTX when commit or abort - b26
Browse files Browse the repository at this point in the history
When commit or abort a DTX, we will check whether it is a valid
entry or not. For invalid case, we will discard it with warning
message and increase related metrics counter.

It may be not perfect solution, but it is efficient to help the
user to cleanup system efficiently.

Signed-off-by: Jeff Olivier <[email protected]>
Signed-off-by: Fan Yong <[email protected]>
  • Loading branch information
Nasf-Fan committed Feb 13, 2025
1 parent 1d2d343 commit b8498f7
Show file tree
Hide file tree
Showing 12 changed files with 154 additions and 34 deletions.
13 changes: 13 additions & 0 deletions src/include/daos_srv/evtree.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2017-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -810,4 +811,16 @@ evt_feats_get(struct evt_root *root)
*/
int evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats);

/** Validate the provided evt.
*
* Note: It is designed for catastrophic recovery. Not to perform at run-time.
*
* \param evt[in]
* \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to
*
* \return true if evt is valid.
**/
bool
evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid);

#endif /* __DAOS_EV_TREE_H__ */
2 changes: 1 addition & 1 deletion src/vos/evt_priv.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2017-2022 Intel Corporation.

Check failure on line 2 in src/vos/evt_priv.h

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -115,7 +116,6 @@ struct evt_context {
umem_off2ptr(evt_umm(tcx), offset)

#define EVT_NODE_MAGIC 0xf00d
#define EVT_DESC_MAGIC 0xbeefdead

/** Convert an offset to a evtree node descriptor
* \param[IN] tcx Tree context
Expand Down
10 changes: 10 additions & 0 deletions src/vos/evtree.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2017-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -4086,3 +4087,12 @@ evt_feats_set(struct evt_root *root, struct umem_instance *umm, uint64_t feats)
return rc;
}

bool
evt_desc_is_valid(const struct evt_desc *evt, uint32_t dtx_lid)
{
if (evt == NULL || evt->dc_magic != EVT_DESC_MAGIC) {
return false;
}

return (evt->dc_dtx == dtx_lid);
}
40 changes: 24 additions & 16 deletions src/vos/ilog.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.

Check failure on line 2 in src/vos/ilog.c

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -390,16 +391,18 @@ ilog_create(struct umem_instance *umm, struct ilog_df *root)
return rc;
}

#define ILOG_ASSERT_VALID(root_df) \
do { \
struct ilog_root *_root; \
\
_root = (struct ilog_root *)(root_df); \
D_ASSERTF((_root != NULL) && \
ILOG_MAGIC_VALID(_root->lr_magic), \
"Invalid ilog root detected %p magic=%#x\n", \
_root, _root == NULL ? 0 : _root->lr_magic); \
} while (0)
#define ILOG_CHECK_VALID(root_df) \
({ \
struct ilog_root *_root = NULL; \
D_ASSERT((root_df) != NULL); \
_root = (struct ilog_root *)(root_df); \
if (!ILOG_MAGIC_VALID(_root->lr_magic)) { \
D_WARN("Invalid ilog root detected %p magic=%#x\n", _root, \
_root == NULL ? 0 : _root->lr_magic); \
_root = NULL; \
} \
_root != NULL; \
})

int
ilog_open(struct umem_instance *umm, struct ilog_df *root,
Expand All @@ -408,7 +411,8 @@ ilog_open(struct umem_instance *umm, struct ilog_df *root,
struct ilog_context *lctx;
int rc;

ILOG_ASSERT_VALID(root);
if (!ILOG_CHECK_VALID(root))
return -DER_NONEXIST;

rc = ilog_ctx_create(umm, (struct ilog_root *)root, cbs, &lctx);
if (rc != 0)
Expand Down Expand Up @@ -474,7 +478,7 @@ ilog_destroy(struct umem_instance *umm,
int rc = 0;
struct ilog_array_cache cache = {0};

ILOG_ASSERT_VALID(root);
D_ASSERT(ILOG_CHECK_VALID(root));

rc = ilog_tx_begin(&lctx);
if (rc != 0) {
Expand Down Expand Up @@ -984,8 +988,12 @@ ilog_modify(daos_handle_t loh, const struct ilog_id *id_in,
"%s in incarnation log " DF_X64 " status: rc=" DF_RC " tree_version: %d\n",
opc_str[opc], id_in->id_epoch, DP_RC(rc), ilog_mag2ver(lctx->ic_root->lr_magic));

if (rc == 0 && version != ilog_mag2ver(lctx->ic_root->lr_magic) &&
(opc == ILOG_OP_PERSIST || opc == ILOG_OP_ABORT)) {
if (rc == 0 && opc != ILOG_OP_UPDATE) {
if (version == ilog_mag2ver(lctx->ic_root->lr_magic)) {
D_WARN("ilog entry on %s doesn't exist\n", opc_str[opc]);
return -DER_NONEXIST;
}

/** If we persisted or aborted an entry successfully,
* invoke the callback, if applicable but without
* deregistration
Expand Down Expand Up @@ -1213,7 +1221,7 @@ ilog_fetch(struct umem_instance *umm, struct ilog_df *root_df,
int rc = 0;
bool retry;

ILOG_ASSERT_VALID(root_df);
D_ASSERT(ILOG_CHECK_VALID(root_df));

root = (struct ilog_root *)root_df;

Expand Down Expand Up @@ -1539,7 +1547,7 @@ ilog_aggregate(struct umem_instance *umm, struct ilog_df *ilog,

root = lctx->ic_root;

ILOG_ASSERT_VALID(root);
D_ASSERT(ILOG_CHECK_VALID(root));

D_ASSERT(!ilog_empty(root)); /* ilog_fetch should have failed */

Expand Down
18 changes: 18 additions & 0 deletions src/vos/tests/vts_ilog.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.

Check failure on line 2 in src/vos/tests/vts_ilog.c

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -530,6 +531,12 @@ ilog_test_update(void **state)
rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries);
assert_rc_equal(rc, 0);

/* Test non-existent tx */
id.id_epoch = epoch;
id.id_tx_id = current_tx_id.id_tx_id + 4000;
rc = ilog_persist(loh, &id);
assert_rc_equal(rc, -DER_NONEXIST);

/* Commit the punch ilog. */
id.id_epoch = epoch;
id.id_tx_id = current_tx_id.id_tx_id;
Expand Down Expand Up @@ -668,6 +675,12 @@ ilog_test_abort(void **state)
rc = entries_check(umm, ilog, &ilog_callbacks, NULL, 0, entries);
assert_rc_equal(rc, 0);

/* Test non-existent tx */
id = current_tx_id;
id.id_tx_id += 400;
rc = ilog_abort(loh, &id);
assert_rc_equal(rc, -DER_NONEXIST);

id = current_tx_id;
rc = ilog_abort(loh, &id);
LOG_FAIL(rc, 0, "Failed to abort log entry\n");
Expand Down Expand Up @@ -735,6 +748,11 @@ ilog_test_abort(void **state)
rc = ilog_destroy(umm, &ilog_callbacks, ilog);
assert_rc_equal(rc, 0);

/** Test open of "reallocated" ilog */
memset(ilog, 0xa1, sizeof(*ilog));
rc = ilog_open(umm, ilog, &ilog_callbacks, false, &loh);
assert_rc_equal(rc, -DER_NONEXIST);

assert_true(d_list_empty(&fake_tx_list));
ilog_free_root(umm, ilog);
}
Expand Down
6 changes: 6 additions & 0 deletions src/vos/vos_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,12 @@ vos_tls_init(int tags, int xs_id, int tgt_id)
D_WARN("Failed to create committed cnt sensor: "DF_RC"\n",
DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_invalid_dtx, D_TM_STATS_GAUGE,
"Number of invalid active DTX", "entries",
"io/dtx/invalid/tgt_%u", tgt_id);
if (rc)
D_WARN("Failed to create invalid DTX cnt sensor: " DF_RC "\n", DP_RC(rc));

rc = d_tm_add_metric(&tls->vtl_obj_cnt, D_TM_GAUGE,
"Number of cached vos object", "entry",
"mem/vos/vos_obj_%u/tgt_%u",
Expand Down
66 changes: 49 additions & 17 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -573,7 +574,7 @@ dtx_ilog_rec_release(struct umem_instance *umm, struct vos_container *cont,

ilog_close(loh);

if (rc != 0)
if (rc != 0 && rc != -DER_NONEXIST)
D_ERROR("Failed to release ilog rec for "DF_DTI", abort %s: "DF_RC"\n",
DP_DTI(&DAE_XID(dae)), abort ? "yes" : "no", DP_RC(rc));

Expand All @@ -598,6 +599,11 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
struct vos_irec_df *svt;

svt = umem_off2ptr(umm, umem_off2offset(rec));

if (!vos_irec_is_valid(svt, DAE_LID(dae))) {
rc = -DER_NONEXIST;
break;
}
if (abort) {
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) {
rc = umem_tx_add_ptr(umm, &svt->ir_dtx,
Expand All @@ -621,6 +627,12 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
struct evt_desc *evt;

evt = umem_off2ptr(umm, umem_off2offset(rec));

if (!evt_desc_is_valid(evt, DAE_LID(dae))) {
rc = -DER_NONEXIST;
break;
}

if (abort) {
if (DAE_INDEX(dae) != DTX_INDEX_INVAL) {
rc = umem_tx_add_ptr(umm, &evt->dc_dtx,
Expand Down Expand Up @@ -648,6 +660,11 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont,
break;
}

if (unlikely(rc == -DER_NONEXIST))
D_WARN("DTX record no longer exists, may indicate some corruption: "
DF_DTI " type %u, discard\n",
DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec));

return rc;
}

Expand All @@ -657,6 +674,8 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
struct umem_instance *umm = vos_cont2umm(cont);
struct vos_dtx_act_ent_df *dae_df;
struct vos_dtx_blob_df *dbd;
struct vos_tls *tls = vos_tls_get(false);
bool invalid = false;
int count;
int i;
int rc = 0;
Expand Down Expand Up @@ -685,42 +704,55 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
abort ? "abort" : "commit", DP_DTI(&DAE_XID(dae)), dbd,
DP_UUID(cont->vc_pool->vp_id), DP_UUID(cont->vc_id));

if (dae->dae_records != NULL) {
/* Handle DTX records as FIFO order to find out potential invalid DTX earlier. */

if (DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT)
count = DTX_INLINE_REC_CNT;
else
count = DAE_REC_CNT(dae);

for (i = 0; i < count; i++) {
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort);
if (unlikely(rc == -DER_NONEXIST)) {
d_tm_inc_gauge(tls->vtl_invalid_dtx, 1);
invalid = true;
break;
}
if (rc != 0)
return rc;
}

if (!invalid && dae->dae_records != NULL) {
D_ASSERT(DAE_REC_CNT(dae) > DTX_INLINE_REC_CNT);
D_ASSERT(!UMOFF_IS_NULL(dae_df->dae_rec_off));

for (i = DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT - 1; i >= 0; i--) {
for (i = 0; i < DAE_REC_CNT(dae) - DTX_INLINE_REC_CNT; i++) {
rc = do_dtx_rec_release(umm, cont, dae, dae->dae_records[i], abort);
if (unlikely(rc == -DER_NONEXIST)) {
d_tm_inc_gauge(tls->vtl_invalid_dtx, 1);
invalid = true;
break;
}
if (rc != 0)
return rc;
}
}

if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) {
rc = umem_free(umm, dae_df->dae_rec_off);
if (rc != 0)
return rc;

if (keep_act) {
if (!invalid && keep_act) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_off, sizeof(dae_df->dae_rec_off));
if (rc != 0)
return rc;

dae_df->dae_rec_off = UMOFF_NULL;
}

count = DTX_INLINE_REC_CNT;
} else {
D_ASSERT(DAE_REC_CNT(dae) <= DTX_INLINE_REC_CNT);

count = DAE_REC_CNT(dae);
}

for (i = count - 1; i >= 0; i--) {
rc = do_dtx_rec_release(umm, cont, dae, DAE_REC_INLINE(dae)[i], abort);
if (rc != 0)
return rc;
}

if (keep_act) {
if (!invalid && keep_act) {
/* When re-commit partial committed DTX, the count can be zero. */
if (dae_df->dae_rec_cnt > 0) {
rc = umem_tx_add_ptr(umm, &dae_df->dae_rec_cnt,
Expand Down
3 changes: 3 additions & 0 deletions src/vos/vos_ilog.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2019-2024 Intel Corporation.

Check failure on line 2 in src/vos/vos_ilog.c

View workflow job for this annotation

GitHub Actions / Copyright check

Copyright out of date
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -427,6 +428,7 @@ int vos_ilog_update_(struct vos_container *cont, struct ilog_df *ilog,

vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont));
rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh);
D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected");
if (rc != 0) {
D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc));
return rc;
Expand Down Expand Up @@ -522,6 +524,7 @@ vos_ilog_punch_(struct vos_container *cont, struct ilog_df *ilog,
punch_log:
vos_ilog_desc_cbs_init(&cbs, vos_cont2hdl(cont));
rc = ilog_open(vos_cont2umm(cont), ilog, &cbs, dth == NULL, &loh);
D_ASSERTF(rc != -DER_NONEXIST, "Uncorrectable incarnation log corruption detected");
if (rc != 0) {
D_ERROR("Could not open incarnation log: "DF_RC"\n", DP_RC(rc));
return rc;
Expand Down
15 changes: 15 additions & 0 deletions src/vos/vos_internal.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -454,6 +455,8 @@ struct vos_dtx_cmt_ent {
#define DCE_EPOCH(dce) ((dce)->dce_base.dce_epoch)
#define DCE_CMT_TIME(dce) ((dce)->dce_base.dce_cmt_time)

#define EVT_DESC_MAGIC 0xbeefdead

extern uint64_t vos_evt_feats;

/** Flags for internal use - Bit 63 can be used for another purpose so as to
Expand Down Expand Up @@ -1858,4 +1861,16 @@ vos_io_scm(struct vos_pool *pool, daos_iod_type_t type, daos_size_t size, enum v
int
vos_insert_oid(struct dtx_handle *dth, struct vos_container *cont, daos_unit_oid_t *oid);

/** Validate the provided svt.
*
* Note: It is designed for catastrophic recovery. Not to perform at run-time.
*
* \param svt[in]
* \param dtx_lid[in] local id of the DTX entry the evt is supposed to belong to
*
* \return true if svt is valid.
**/
bool
vos_irec_is_valid(const struct vos_irec_df *svt, uint32_t dtx_lid);

#endif /* __VOS_INTERNAL_H__ */
Loading

0 comments on commit b8498f7

Please sign in to comment.