From fc052c92cd763922a7a2d6851ace0b51b137b159 Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Wed, 19 Feb 2025 15:45:24 +0800 Subject: [PATCH] DAOS-16876 dtx: misc fixes for DTX related logic 1. Do not clear dae_need_release flag on DTX entry in DRAM until related local transaction has been successfully committed. Otherwise, it will skip subsequent DTX commit or abort this entry. 2. Do not allow the case that some DTX entry does not exist in DRAM but related modification target still references it. Add assertion check during vos_dtx_check_availability() and vos_dtx_deregister_record(). 3. Replace daos_gettime_coarse() with daos_wallclock_secs() for DTX time. Then DTX cleanup and DTX aggregation logic can use them after restart. 4. Bypass MRU operation during DTX LRU array lookup, that is unnecessary and may hide some potential bugs. Signed-off-by: Fan Yong --- src/dtx/dtx_common.c | 2 +- src/vos/lru_array.h | 4 ++- src/vos/vos_dtx.c | 70 +++++++++++++++----------------------------- 3 files changed, 28 insertions(+), 48 deletions(-) diff --git a/src/dtx/dtx_common.c b/src/dtx/dtx_common.c index 8544579a537..65252ac67f2 100644 --- a/src/dtx/dtx_common.c +++ b/src/dtx/dtx_common.c @@ -180,7 +180,7 @@ dtx_free_dbca(struct dtx_batched_cont_args *dbca) static inline uint64_t dtx_sec2age(uint64_t sec) { - uint64_t cur = daos_gettime_coarse(); + uint64_t cur = daos_wallclock_secs(); if (unlikely(cur <= sec)) return 0; diff --git a/src/vos/lru_array.h b/src/vos/lru_array.h index e6092ea5bd9..b18a5f8daa3 100644 --- a/src/vos/lru_array.h +++ b/src/vos/lru_array.h @@ -1,5 +1,6 @@ /** * (C) Copyright 2020-2023 Intel Corporation. + * (C) Copyright 2025 Hewlett Packard Enterprise Development LP * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -216,7 +217,8 @@ lrua_lookup_idx(struct lru_array *array, uint32_t idx, uint64_t key, entry = &sub->ls_table[ent_idx]; if (entry->le_key == key) { - if (touch_mru && !array->la_evicting) { + if (touch_mru && !array->la_evicting && + !(array->la_flags & LRU_FLAG_EVICT_MANUAL)) { /** Only make mru if we are not evicting it */ lrua_move_to_mru(array, sub, entry, ent_idx); } diff --git a/src/vos/vos_dtx.c b/src/vos/vos_dtx.c index 1eeea28ade1..d7eef3201e7 100644 --- a/src/vos/vos_dtx.c +++ b/src/vos/vos_dtx.c @@ -34,8 +34,8 @@ D_ASSERT(dae->dae_dth->dth_ent == dae); \ dae->dae_dth->dth_ent = NULL; \ } \ - D_DEBUG(DB_TRACE, "Evicting lid "DF_DTI": lid=%lx\n", \ - DP_DTI(&DAE_XID(dae)), DAE_LID(dae) & DTX_LID_SOLO_MASK); \ + D_DEBUG(DB_IO, "Evicting DTX "DF_DTI": lid=%x\n", \ + DP_DTI(&DAE_XID(dae)), DAE_LID(dae)); \ d_list_del_init(&dae->dae_link); \ lrua_evictx(cont->vc_dtx_array, \ (DAE_LID(dae) & DTX_LID_SOLO_MASK) - DTX_LID_RESERVED, \ @@ -187,6 +187,7 @@ dtx_act_ent_cleanup(struct vos_container *cont, struct vos_dtx_act_ent *dae, D_FREE(dae->dae_records); dae->dae_rec_cap = 0; DAE_REC_CNT(dae) = 0; + dae->dae_need_release = 0; if (!keep_df) { dae->dae_df_off = UMOFF_NULL; @@ -825,9 +826,6 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab UMOFF_P(dbd_off), DP_UUID(cont->vc_id)); } - if (rc == 0) - dae->dae_need_release = 0; - return rc; } @@ -894,6 +892,14 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t */ if (dae->dae_committing) D_GOTO(out, rc = -DER_ALREADY); + } else { + struct dtx_handle *dth = vos_dth_get(cont->vc_pool->vp_sysdb); + + D_ASSERT(dtx_is_valid_handle(dth)); + D_ASSERT(dth->dth_ent != NULL); + D_ASSERT(dth->dth_solo); + + dae = dth->dth_ent; } /* Generate committed DTX entry when it is not required to keep the active DTX entry. */ @@ -903,22 +909,8 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t D_GOTO(out, rc = -DER_NOMEM); DCE_CMT_TIME(dce) = cmt_time; - if (dae != NULL) { - DCE_XID(dce) = DAE_XID(dae); - DCE_EPOCH(dce) = DAE_EPOCH(dae); - } else { - struct dtx_handle *dth = vos_dth_get(false); - - D_ASSERT(!cont->vc_pool->vp_sysdb); - D_ASSERT(dtx_is_valid_handle(dth)); - D_ASSERT(dth->dth_solo); - - dae = dth->dth_ent; - D_ASSERT(dae != NULL); - - DCE_XID(dce) = *dti; - DCE_EPOCH(dce) = dth->dth_epoch; - } + DCE_XID(dce) = DAE_XID(dae); + DCE_EPOCH(dce) = DAE_EPOCH(dae); d_iov_set(&riov, dce, sizeof(*dce)); rc = dbtree_upsert(cont->vc_dtx_committed_hdl, BTR_PROBE_EQ, @@ -1153,15 +1145,15 @@ vos_dtx_alloc(struct umem_instance *umm, struct dtx_handle *dth) DAE_INDEX(dae) = DTX_INDEX_INVAL; dae->dae_dth = dth; - D_DEBUG(DB_IO, "Allocated new lid DTX: "DF_DTI" lid=%lx, dae=%p\n", - DP_DTI(&dth->dth_xid), DAE_LID(dae) & DTX_LID_SOLO_MASK, dae); + D_DEBUG(DB_IO, "Allocated new DTX " DF_DTI ", lid=%x, epoch " DF_U64 ", dae=%p\n", + DP_DTI(&dth->dth_xid), DAE_LID(dae), DAE_EPOCH(dae), dae); d_iov_set(&kiov, &DAE_XID(dae), sizeof(DAE_XID(dae))); d_iov_set(&riov, dae, sizeof(*dae)); rc = dbtree_upsert(cont->vc_dtx_active_hdl, BTR_PROBE_EQ, DAOS_INTENT_UPDATE, &kiov, &riov, NULL); if (rc == 0) { - dae->dae_start_time = daos_gettime_coarse(); + dae->dae_start_time = daos_wallclock_secs(); d_list_add_tail(&dae->dae_link, &cont->vc_dtx_act_list); if (dth->dth_epoch_owner) d_list_add_tail(&dae->dae_order_link, &cont->vc_dtx_sorted_list); @@ -1309,17 +1301,8 @@ vos_dtx_check_availability(daos_handle_t coh, uint32_t entry, found = lrua_lookupx(cont->vc_dtx_array, (entry & DTX_LID_SOLO_MASK) - DTX_LID_RESERVED, epoch, &dae); - if (!found) { - D_ASSERTF(!(entry & DTX_LID_SOLO_FLAG), - "non-committed solo entry %lu must be there, epoch "DF_X64", boundary " - DF_X64"\n", entry & DTX_LID_SOLO_MASK, epoch, cont->vc_solo_dtx_epoch); - - D_DEBUG(DB_TRACE, - "Entry %d "DF_U64" not in lru array, it must be committed\n", - entry, epoch); - - return ALB_AVAILABLE_CLEAN; - } + D_ASSERTF(found, "Non-committed DTX must be in active table: lid=%x, epoch=" + DF_U64 ", boundary " DF_U64 "\n", entry, epoch, cont->vc_solo_dtx_epoch); if (intent == DAOS_INTENT_PURGE) { uint32_t age = d_hlc_age2sec(DAE_XID(dae).dti_hlc); @@ -1708,15 +1691,10 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh, if (!vos_dtx_is_normal_entry(entry)) return 0; - D_ASSERT(entry >= DTX_LID_RESERVED); - - found = lrua_lookupx(vos_hdl2cont(coh)->vc_dtx_array, entry - DTX_LID_RESERVED, - epoch, &dae); - if (!found) { - D_WARN("Could not find active DTX record for lid=%d, epoch=" - DF_U64"\n", entry, epoch); - return 0; - } + found = lrua_lookupx(vos_hdl2cont(coh)->vc_dtx_array, + (entry & DTX_LID_SOLO_MASK) - DTX_LID_RESERVED, epoch, &dae); + D_ASSERTF(found, "Could not find active DTX record for lid=%x, epoch=" DF_U64 "\n", + entry, epoch); /* * NOTE: If the record to be deregistered (for free or overwrite, and so on) is referenced @@ -2148,7 +2126,7 @@ vos_dtx_commit_internal(struct vos_container *cont, struct dtx_id dtis[], struct vos_dtx_blob_df *dbd; struct vos_dtx_blob_df *dbd_prev; umem_off_t dbd_off; - uint64_t cmt_time = daos_gettime_coarse(); + uint64_t cmt_time = daos_wallclock_secs(); int committed = 0; int rc = 0; int p = 0; @@ -3215,7 +3193,7 @@ vos_dtx_act_reindex(struct vos_container *cont) uint64_t min_eph = 0; /* The largest diff for above pairs 'max_eph - min_eph'. */ uint64_t diff = 0; - uint64_t start_time = daos_gettime_coarse(); + uint64_t start_time = daos_wallclock_secs(); int rc = 0; int i;