From 3f6c6b6aff9468d0f68a731c496a6ef38fd5ca21 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 14 Feb 2025 23:12:09 +0800 Subject: [PATCH] DAOS-16876 tests: test for invalid DTX entry DTX logic should be able to detect and handle invalid DTX entry, such as the one has dangling referenced record. Add new test case to simulate the case of dirty DTX entry was left on disk after commit. Signed-off-by: Fan Yong --- src/dtx/dtx_common.c | 4 +- src/dtx/dtx_rpc.c | 3 +- src/dtx/dtx_srv.c | 6 +- src/include/daos/common.h | 4 +- src/include/daos_srv/vos_types.h | 2 + src/tests/suite/daos_base_tx.c | 95 +++++++++++++++++++++++--------- src/vos/vos_dtx.c | 30 +++++++--- 7 files changed, 107 insertions(+), 37 deletions(-) diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index 8544579a537..e4b7abe7f7c 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -1440,7 +1440,9 @@ dtx_leader_end(struct dtx_leader_handle *dlh, struct ds_cont_hdl *coh, int resul D_WARN(DF_UUID": Fail to sync %s commit DTX "DF_DTI": "DF_RC"\n", DP_UUID(cont->sc_uuid), dlh->dlh_coll ? "collective" : "regular", DP_DTI(&dth->dth_xid), DP_RC(rc)); - if (likely(dtx_batched_ult_max != 0)) { + if (likely(dtx_batched_ult_max != 0 && + !DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P1) && + !DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P2))) { dth->dth_sync = 0; goto cache; } diff --git a/src/dtx/dtx_rpc.c b/src/dtx/dtx_rpc.c index 49b972d294d..38f8f0952f5 100644 --- a/src/dtx/dtx_rpc.c +++ b/src/dtx/dtx_rpc.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -411,7 +412,7 @@ dtx_req_list_send(struct dtx_common_args *dca, bool is_reentrance) dca->dca_drr->drr_result = 0; if (unlikely(dra->dra_opc == DTX_COMMIT && dca->dca_i == 0 && - DAOS_FAIL_CHECK(DAOS_DTX_FAIL_COMMIT))) + DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P1))) dtx_req_send(dca->dca_drr, 1); else dtx_req_send(dca->dca_drr, dca->dca_epoch); diff --git a/src/dtx/dtx_srv.c b/src/dtx/dtx_srv.c index b1541ba94c8..0d0e1fd106e 100644 --- a/src/dtx/dtx_srv.c +++ b/src/dtx/dtx_srv.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -189,7 +190,7 @@ dtx_handler(crt_rpc_t *rpc) break; if (unlikely(din->di_epoch == 1)) - D_GOTO(out, rc = -DER_IO); + daos_fail_loc_set(DAOS_DTX_PARTIAL_COMMIT_P2); while (i < din->di_dtx_array.ca_count) { if (i + count > din->di_dtx_array.ca_count) @@ -205,6 +206,9 @@ dtx_handler(crt_rpc_t *rpc) i += count; } + if (unlikely(din->di_epoch == 1)) + D_GOTO(out, rc = -DER_IO); + if (din->di_flags.ca_count > 0) flags = din->di_flags.ca_arrays; diff --git a/src/include/daos/common.h b/src/include/daos/common.h index 2225a417c47..62759eaa8ce 100644 --- a/src/include/daos/common.h +++ b/src/include/daos/common.h @@ -848,11 +848,13 @@ enum { #define DAOS_DTX_RESEND_DELAY1 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x48) #define DAOS_DTX_UNCERTAIN (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x49) #define DAOS_DTX_RESYNC_DELAY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4a) -#define DAOS_DTX_FAIL_COMMIT (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4b) #define DAOS_OBJ_SYNC_RETRY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4c) #define DAOS_OBJ_COLL_SPARSE (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4d) +#define DAOS_DTX_PARTIAL_COMMIT_P1 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4e) +#define DAOS_DTX_PARTIAL_COMMIT_P2 (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x4f) + #define DAOS_NVME_FAULTY (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x50) #define DAOS_NVME_WRITE_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x51) #define DAOS_NVME_READ_ERR (DAOS_FAIL_UNIT_TEST_GROUP_LOC | 0x52) diff --git a/src/include/daos_srv/vos_types.h b/src/include/daos_srv/vos_types.h index 4b9273f7038..28436b53e4e 100644 --- a/src/include/daos_srv/vos_types.h +++ b/src/include/daos_srv/vos_types.h @@ -61,6 +61,8 @@ enum dtx_entry_flags { DTE_PARTIAL_COMMITTED = (1 << 5), /* The DTX epoch is sorted locally. */ DTE_EPOCH_SORTED = (1 << 6), + /* The active DTX entry is redundant, should be discarded. */ + DTE_REDUN = (1 << 7), }; struct dtx_entry { diff --git a/src/tests/suite/daos_base_tx.c b/src/tests/suite/daos_base_tx.c index 765c0677ebf..99df056d155 100644 --- a/src/tests/suite/daos_base_tx.c +++ b/src/tests/suite/daos_base_tx.c @@ -1,5 +1,6 @@ /** * (C) Copyright 2019-2024 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -847,41 +848,85 @@ dtx_20(void **state) static void dtx_21(void **state) { - test_arg_t *arg = *state; - char *update_buf; - const char *dkey = dts_dtx_dkey; - const char *akey = dts_dtx_akey; - daos_obj_id_t oid; - struct ioreq req; + test_arg_t *arg = *state; + char *update_buf; + const char *dkey = dts_dtx_dkey; + const char *akey = dts_dtx_akey; + daos_obj_id_t oid; + struct ioreq req; + daos_pool_info_t info_prep = { 0 }; + daos_pool_info_t info_post = { 0 }; + int rc; FAULT_INJECTION_REQUIRED(); - print_message("do not abort partially committed DTX\n"); + print_message("handle partially committed DTX\n"); if (!test_runable(arg, dts_dtx_replica_cnt)) return; - D_ALLOC(update_buf, dts_dtx_iosize); - assert_non_null(update_buf); - dts_buf_render(update_buf, dts_dtx_iosize); - - oid = daos_test_oid_gen(arg->coh, dts_dtx_class, 0, 0, arg->myrank); - ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); - - dtx_set_fail_loc(arg, DAOS_DTX_FAIL_COMMIT); /* - * The DTX that create the object will trigger synchronous commit. One of - * the replicas will fail commit locally because of DAOS_DTX_FAIL_COMMIT. - * But the other replicas will commit successfully, then related data can - * be accessed. + * The DTX that create the object will trigger synchronous commit. One non-leader + * replica will commit locally but left the active DTX entry on disk, then report + * IO failure, that will cause the DTX on leader to be marked as partially commit. + * + * Then restart the system, such DTX will be re-loaded from disk. The subsequent + * DTX resync will re-commit such partial committed DTX. The logic on non-leader + * should be able to detect and handle such re-committed DTX; otherwise, it will + * trigger assertion. + * + * To simplify the test logic, only perform the test on rank_0. */ - insert_single(dkey, akey, 0, update_buf, dts_dtx_iosize, DAOS_TX_NONE, &req); - dtx_set_fail_loc(arg, 0); - dtx_check_replicas(dkey, akey, "update_succ", update_buf, dts_dtx_iosize, &req); + par_barrier(PAR_COMM_WORLD); + if (arg->myrank == 0) { + rc = daos_pool_query(arg->pool.poh, NULL, &info_prep, NULL, NULL); + assert_true(rc == 0); - D_FREE(update_buf); - ioreq_fini(&req); + D_ALLOC(update_buf, dts_dtx_iosize); + assert_non_null(update_buf); + dts_buf_render(update_buf, dts_dtx_iosize); + + oid = daos_test_oid_gen(arg->coh, OC_RP_3GX, 0, 0, arg->myrank); + ioreq_init(&req, arg->coh, oid, DAOS_IOD_ARRAY, arg); + + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, + DAOS_DTX_PARTIAL_COMMIT_P1, 0, NULL); + + print_message("Generating partially committed DTX ...\n"); + insert_single(dkey, akey, 0, update_buf, dts_dtx_iosize, DAOS_TX_NONE, &req); + + D_FREE(update_buf); + ioreq_fini(&req); + daos_debug_set_params(arg->group, -1, DMG_KEY_FAIL_LOC, 0, 0, NULL); + } + par_barrier(PAR_COMM_WORLD); + + test_teardown_cont_hdl(arg); + daos_pool_disconnect(arg->pool.poh, NULL); + arg->pool.poh = DAOS_HDL_INVAL; + + par_barrier(PAR_COMM_WORLD); + if (arg->myrank == 0) { + print_message("Stopping system ...\n"); + dmg_system_stop_rank(dmg_config_file, CRT_NO_RANK, false); + + print_message("Starting system ...\n"); + dmg_system_start_rank(dmg_config_file, CRT_NO_RANK); + + /* Sleep a while for DTX resync. */ + sleep(30); + + rc = daos_pool_connect(arg->pool.pool_str, arg->group, DAOS_PC_RW, + &arg->pool.poh, NULL, NULL); + assert_true(rc == 0); + + rc = daos_pool_query(arg->pool.poh, NULL, &info_post, NULL, NULL); + assert_true(rc == 0); + + assert_int_equal(info_prep.pi_ndisabled, info_post.pi_ndisabled); + } + par_barrier(PAR_COMM_WORLD); } static void @@ -1047,7 +1092,7 @@ static const struct CMUnitTest dtx_tests[] = { dtx_19, NULL, test_case_teardown}, {"DTX20: race between DTX refresh and DTX resync", dtx_20, dtx_base_rf1_setup, rebuild_sub_teardown}, - {"DTX21: do not abort partially committed DTX", + {"DTX21: handle partially committed DTX", dtx_21, dtx_base_rf0_setup, rebuild_sub_teardown}, {"DTX22: iteration does not return aborted DTX", dtx_22, NULL, test_case_teardown}, diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 1eeea28ade1..b27cd460b0e 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -634,12 +634,10 @@ do_dtx_rec_release(struct umem_instance *umm, struct vos_container *cont, } if (rc == -DER_NONEXIST) { - struct vos_tls *tls = vos_tls_get(false); - D_WARN("DTX record no longer exists, may indicate some corruption: " DF_DTI " type %u, discard\n", DP_DTI(&DAE_XID(dae)), dtx_umoff_flag2type(rec)); - d_tm_inc_gauge(tls->vtl_invalid_dtx, 1); + d_tm_inc_gauge(vos_tls_get(cont->vc_pool->vp_sysdb)->vtl_invalid_dtx, 1); } return rc; @@ -712,6 +710,24 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab } } + /* + * Inject fault to simulate the case of DTX records are left on disk after commit. + * Then when container re-open (such as for engine restart), it will be re-loaded + * from disk and then trigger DTX re-commit via DTX resync. + */ + if (unlikely(DAOS_FAIL_CHECK(DAOS_DTX_PARTIAL_COMMIT_P2))) { + rc = umem_tx_add_ptr(umm, &dae_df->dae_flags, sizeof(dae_df->dae_flags)); + if (rc != 0) + return rc; + + dae_df->dae_flags |= DTE_REDUN; + return 0; + } + + /* It is expected to detect some invalid DTX records. Otherwise assert for test. */ + if (DAE_REC_CNT(dae) != 0 && unlikely(DAE_FLAGS(dae) & DTE_REDUN)) + D_ASSERT(invalid); + if (!UMOFF_IS_NULL(dae_df->dae_rec_off)) { rc = umem_free(umm, dae_df->dae_rec_off); if (rc != 0) @@ -2313,8 +2329,7 @@ vos_dtx_post_handle(struct vos_container *cont, } if (!abort && dces != NULL) { - struct vos_tls *tls = vos_tls_get(false); - int j = 0; + int j = 0; D_ASSERT(cont->vc_pool->vp_sysdb == false); for (i = 0; i < count; i++) { @@ -2325,7 +2340,7 @@ vos_dtx_post_handle(struct vos_container *cont, if (j > 0) { cont->vc_dtx_committed_count += j; cont->vc_pool->vp_dtx_committed_count += j; - d_tm_inc_gauge(tls->vtl_committed, j); + d_tm_inc_gauge(vos_tls_get(cont->vc_pool->vp_sysdb)->vtl_committed, j); } } @@ -2951,7 +2966,6 @@ vos_dtx_set_flags(daos_handle_t coh, struct dtx_id dtis[], int count, uint32_t f int vos_dtx_aggregate(daos_handle_t coh) { - struct vos_tls *tls = vos_tls_get(false); struct vos_container *cont; struct vos_cont_df *cont_df; struct umem_instance *umm; @@ -3070,7 +3084,7 @@ vos_dtx_aggregate(daos_handle_t coh) cont->vc_dtx_committed_count -= count; cont->vc_pool->vp_dtx_committed_count -= count; - d_tm_dec_gauge(tls->vtl_committed, count); + d_tm_dec_gauge(vos_tls_get(cont->vc_pool->vp_sysdb)->vtl_committed, count); } DL_CDEBUG(rc != 0, DLOG_ERR, DB_IO, rc,