Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16876 dtx: misc fixes for DTX related logic #15942

Merged
merged 1 commit into from
Feb 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/dtx/dtx_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ dtx_free_dbca(struct dtx_batched_cont_args *dbca)
static inline uint64_t
dtx_sec2age(uint64_t sec)
{
uint64_t cur = daos_gettime_coarse();
uint64_t cur = daos_wallclock_secs();

if (unlikely(cur <= sec))
return 0;
Expand Down
4 changes: 3 additions & 1 deletion src/vos/lru_array.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2020-2023 Intel Corporation.
* (C) Copyright 2025 Hewlett Packard Enterprise Development LP
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -216,7 +217,8 @@ lrua_lookup_idx(struct lru_array *array, uint32_t idx, uint64_t key,

entry = &sub->ls_table[ent_idx];
if (entry->le_key == key) {
if (touch_mru && !array->la_evicting) {
if (touch_mru && !array->la_evicting &&
!(array->la_flags & LRU_FLAG_EVICT_MANUAL)) {
/** Only make mru if we are not evicting it */
lrua_move_to_mru(array, sub, entry, ent_idx);
}
Expand Down
70 changes: 24 additions & 46 deletions src/vos/vos_dtx.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
D_ASSERT(dae->dae_dth->dth_ent == dae); \
dae->dae_dth->dth_ent = NULL; \
} \
D_DEBUG(DB_TRACE, "Evicting lid "DF_DTI": lid=%lx\n", \
DP_DTI(&DAE_XID(dae)), DAE_LID(dae) & DTX_LID_SOLO_MASK); \
D_DEBUG(DB_IO, "Evicting DTX "DF_DTI": lid=%x\n", \
DP_DTI(&DAE_XID(dae)), DAE_LID(dae)); \
d_list_del_init(&dae->dae_link); \
lrua_evictx(cont->vc_dtx_array, \
(DAE_LID(dae) & DTX_LID_SOLO_MASK) - DTX_LID_RESERVED, \
Expand Down Expand Up @@ -187,6 +187,7 @@ dtx_act_ent_cleanup(struct vos_container *cont, struct vos_dtx_act_ent *dae,
D_FREE(dae->dae_records);
dae->dae_rec_cap = 0;
DAE_REC_CNT(dae) = 0;
dae->dae_need_release = 0;

if (!keep_df) {
dae->dae_df_off = UMOFF_NULL;
Expand Down Expand Up @@ -825,9 +826,6 @@ dtx_rec_release(struct vos_container *cont, struct vos_dtx_act_ent *dae, bool ab
UMOFF_P(dbd_off), DP_UUID(cont->vc_id));
}

if (rc == 0)
dae->dae_need_release = 0;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the key change? The dae_need_release should be cleared after local tx committed instead of here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, originally we cleared such flag too early.

return rc;
}

Expand Down Expand Up @@ -894,6 +892,14 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t
*/
if (dae->dae_committing)
D_GOTO(out, rc = -DER_ALREADY);
} else {
struct dtx_handle *dth = vos_dth_get(cont->vc_pool->vp_sysdb);

D_ASSERT(dtx_is_valid_handle(dth));
D_ASSERT(dth->dth_ent != NULL);
D_ASSERT(dth->dth_solo);

dae = dth->dth_ent;
}

/* Generate committed DTX entry when it is not required to keep the active DTX entry. */
Expand All @@ -903,22 +909,8 @@ vos_dtx_commit_one(struct vos_container *cont, struct dtx_id *dti, daos_epoch_t
D_GOTO(out, rc = -DER_NOMEM);

DCE_CMT_TIME(dce) = cmt_time;
if (dae != NULL) {
DCE_XID(dce) = DAE_XID(dae);
DCE_EPOCH(dce) = DAE_EPOCH(dae);
} else {
struct dtx_handle *dth = vos_dth_get(false);

D_ASSERT(!cont->vc_pool->vp_sysdb);
D_ASSERT(dtx_is_valid_handle(dth));
D_ASSERT(dth->dth_solo);

dae = dth->dth_ent;
D_ASSERT(dae != NULL);

DCE_XID(dce) = *dti;
DCE_EPOCH(dce) = dth->dth_epoch;
}
DCE_XID(dce) = DAE_XID(dae);
DCE_EPOCH(dce) = DAE_EPOCH(dae);

d_iov_set(&riov, dce, sizeof(*dce));
rc = dbtree_upsert(cont->vc_dtx_committed_hdl, BTR_PROBE_EQ,
Expand Down Expand Up @@ -1153,15 +1145,15 @@ vos_dtx_alloc(struct umem_instance *umm, struct dtx_handle *dth)
DAE_INDEX(dae) = DTX_INDEX_INVAL;
dae->dae_dth = dth;

D_DEBUG(DB_IO, "Allocated new lid DTX: "DF_DTI" lid=%lx, dae=%p\n",
DP_DTI(&dth->dth_xid), DAE_LID(dae) & DTX_LID_SOLO_MASK, dae);
D_DEBUG(DB_IO, "Allocated new DTX " DF_DTI ", lid=%x, epoch " DF_U64 ", dae=%p\n",
DP_DTI(&dth->dth_xid), DAE_LID(dae), DAE_EPOCH(dae), dae);

d_iov_set(&kiov, &DAE_XID(dae), sizeof(DAE_XID(dae)));
d_iov_set(&riov, dae, sizeof(*dae));
rc = dbtree_upsert(cont->vc_dtx_active_hdl, BTR_PROBE_EQ,
DAOS_INTENT_UPDATE, &kiov, &riov, NULL);
if (rc == 0) {
dae->dae_start_time = daos_gettime_coarse();
dae->dae_start_time = daos_wallclock_secs();
d_list_add_tail(&dae->dae_link, &cont->vc_dtx_act_list);
if (dth->dth_epoch_owner)
d_list_add_tail(&dae->dae_order_link, &cont->vc_dtx_sorted_list);
Expand Down Expand Up @@ -1309,17 +1301,8 @@ vos_dtx_check_availability(daos_handle_t coh, uint32_t entry,

found = lrua_lookupx(cont->vc_dtx_array, (entry & DTX_LID_SOLO_MASK) - DTX_LID_RESERVED,
epoch, &dae);
if (!found) {
D_ASSERTF(!(entry & DTX_LID_SOLO_FLAG),
"non-committed solo entry %lu must be there, epoch "DF_X64", boundary "
DF_X64"\n", entry & DTX_LID_SOLO_MASK, epoch, cont->vc_solo_dtx_epoch);

D_DEBUG(DB_TRACE,
"Entry %d "DF_U64" not in lru array, it must be committed\n",
entry, epoch);

return ALB_AVAILABLE_CLEAN;
}
D_ASSERTF(found, "Non-committed DTX must be in active table: lid=%x, epoch="
DF_U64 ", boundary " DF_U64 "\n", entry, epoch, cont->vc_solo_dtx_epoch);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, I'm wondering why we need to tolerate this before? Is it because of the bug you fixed in this PR or other reason?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may because we wanted to make the system to be more robust at that time or related optimized/tolerated case has been removed. Anyway, consider current implementation, add assertion here will help us to detect corrupted DTX entry more early.


if (intent == DAOS_INTENT_PURGE) {
uint32_t age = d_hlc_age2sec(DAE_XID(dae).dti_hlc);
Expand Down Expand Up @@ -1708,15 +1691,10 @@ vos_dtx_deregister_record(struct umem_instance *umm, daos_handle_t coh,
if (!vos_dtx_is_normal_entry(entry))
return 0;

D_ASSERT(entry >= DTX_LID_RESERVED);

found = lrua_lookupx(vos_hdl2cont(coh)->vc_dtx_array, entry - DTX_LID_RESERVED,
epoch, &dae);
if (!found) {
D_WARN("Could not find active DTX record for lid=%d, epoch="
DF_U64"\n", entry, epoch);
return 0;
}
found = lrua_lookupx(vos_hdl2cont(coh)->vc_dtx_array,
(entry & DTX_LID_SOLO_MASK) - DTX_LID_RESERVED, epoch, &dae);
D_ASSERTF(found, "Could not find active DTX record for lid=%x, epoch=" DF_U64 "\n",
entry, epoch);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same question here, was there any other consideration on tolerating "!found" before? I'm worried that if deregister or check availability could be called before the in-memory tree is fully setup? (though that should not be tolerated as well).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Currently, we do not allow to access the DTX LRU array before its fully setup. So it is safe from this perspective.


/*
* NOTE: If the record to be deregistered (for free or overwrite, and so on) is referenced
Expand Down Expand Up @@ -2148,7 +2126,7 @@ vos_dtx_commit_internal(struct vos_container *cont, struct dtx_id dtis[],
struct vos_dtx_blob_df *dbd;
struct vos_dtx_blob_df *dbd_prev;
umem_off_t dbd_off;
uint64_t cmt_time = daos_gettime_coarse();
uint64_t cmt_time = daos_wallclock_secs();
int committed = 0;
int rc = 0;
int p = 0;
Expand Down Expand Up @@ -3215,7 +3193,7 @@ vos_dtx_act_reindex(struct vos_container *cont)
uint64_t min_eph = 0;
/* The largest diff for above pairs 'max_eph - min_eph'. */
uint64_t diff = 0;
uint64_t start_time = daos_gettime_coarse();
uint64_t start_time = daos_wallclock_secs();
int rc = 0;
int i;

Expand Down
Loading