From aee0149222acbe060b01ead109e68b9faa406e5a Mon Sep 17 00:00:00 2001 From: Ken Cain Date: Mon, 3 Feb 2025 10:37:55 -0500 Subject: [PATCH] DAOS-16768 pool: larger ABT ULT stack sizes (#15822) With this change, three ULTs in pool and container code launched via ds_pool_thread_collective() are changed to specify a larger ("deep") stack size of 64KiB rather than a default 16KiB stack size. i.e., the flags parameter specified as DSS_ULT_DEEP_STACK. The three ULT function entrypoints are: cont_open_one, cont_snap_update_one,and update_vos_prop_on_targets. Before this change, intermittently in CI testing, shortly after daos_engine startup, a dmg pool list (pool query on the back end) would occasionally result in a segmentation fault in an engine, in these three particular areas of the code. Specifically, the faults occurred within the ABT thread create, inside ABTI_mem_pool_alloc(). This change is based on a guess that the stack size parameter may have some effect. Signed-off-by: Kenneth Cain --- src/container/srv_target.c | 11 ++++++----- src/pool/srv_target.c | 5 +++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/container/srv_target.c b/src/container/srv_target.c index 4ebe4aecd10..2718cbbc84a 100644 --- a/src/container/srv_target.c +++ b/src/container/srv_target.c @@ -1792,8 +1792,9 @@ ds_cont_tgt_open(uuid_t pool_uuid, uuid_t cont_hdl_uuid, DP_UUID(pool_uuid), DP_UUID(cont_uuid), DP_UUID(cont_hdl_uuid)); retry: - rc = ds_pool_thread_collective(pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | - PO_COMP_ST_DOWNOUT, cont_open_one, &arg, 0); + rc = ds_pool_thread_collective(pool_uuid, + PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT, + cont_open_one, &arg, DSS_ULT_DEEP_STACK); if (rc != 0) { if (rc == -DER_AGAIN) { dss_sleep(50); @@ -2105,9 +2106,9 @@ ds_cont_tgt_snapshots_update(uuid_t pool_uuid, uuid_t cont_uuid, * the up targets in this scenario. The target property will be updated * upon initiating container aggregation. */ - return ds_pool_thread_collective(pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | - PO_COMP_ST_DOWNOUT | PO_COMP_ST_UP, - cont_snap_update_one, &args, 0); + return ds_pool_thread_collective( + pool_uuid, PO_COMP_ST_NEW | PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT | PO_COMP_ST_UP, + cont_snap_update_one, &args, DSS_ULT_DEEP_STACK); } void diff --git a/src/pool/srv_target.c b/src/pool/srv_target.c index bd6ec4f9c11..3dccac41e1b 100644 --- a/src/pool/srv_target.c +++ b/src/pool/srv_target.c @@ -2256,8 +2256,9 @@ ds_pool_tgt_prop_update(struct ds_pool *pool, struct pool_iv_prop *iv_prop) arg.uvp_checkpoint_props_changed = 1; } - ret = ds_pool_thread_collective(pool->sp_uuid, PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT | - PO_COMP_ST_NEW, update_vos_prop_on_targets, &arg, 0); + ret = ds_pool_thread_collective(pool->sp_uuid, + PO_COMP_ST_DOWN | PO_COMP_ST_DOWNOUT | PO_COMP_ST_NEW, + update_vos_prop_on_targets, &arg, DSS_ULT_DEEP_STACK); if (ret != 0) return ret;