diff --git a/src/bio/bio_xstream.c b/src/bio/bio_xstream.c index 00b9b16b19f..15a6b2270ca 100644 --- a/src/bio/bio_xstream.c +++ b/src/bio/bio_xstream.c @@ -293,11 +293,12 @@ bio_nvme_init(const char *nvme_conf, int numa_node, unsigned int mem_size, nvme_glb.bd_bs_opts.cluster_sz = DAOS_BS_CLUSTER_SZ; nvme_glb.bd_bs_opts.max_channel_ops = BIO_BS_MAX_CHANNEL_OPS; - env = getenv("VOS_BDEV_CLASS"); + d_agetenv_str(&env, "VOS_BDEV_CLASS"); if (env && strcasecmp(env, "AIO") == 0) { D_WARN("AIO device(s) will be used!\n"); nvme_glb.bd_bdev_class = BDEV_CLASS_AIO; } + d_freeenv_str(&env); if (numa_node > 0) { bio_numa_node = (unsigned int)numa_node; diff --git a/src/cart/crt_hg.c b/src/cart/crt_hg.c index 6c3805a78c9..9aca5a578a3 100644 --- a/src/cart/crt_hg.c +++ b/src/cart/crt_hg.c @@ -793,8 +793,7 @@ crt_hg_free_protocol_info(struct na_protocol_info *na_protocol_info) int crt_hg_init(void) { - int rc = 0; - char *env; + int rc = 0; if (crt_initialized()) { D_ERROR("CaRT already initialized.\n"); @@ -803,10 +802,8 @@ crt_hg_init(void) #define EXT_FAC DD_FAC(external) - env = getenv("HG_LOG_SUBSYS"); - if (!env) { - env = getenv("HG_LOG_LEVEL"); - if (!env) + if (!d_isenv_def("HG_LOG_SUBSYS")) { + if (!d_isenv_def("HG_LOG_LEVEL")) HG_Set_log_level("warning"); HG_Set_log_subsys("hg,na"); } diff --git a/src/cart/crt_init.c b/src/cart/crt_init.c index 8359afa4903..aa5716e71b5 100644 --- a/src/cart/crt_init.c +++ b/src/cart/crt_init.c @@ -18,6 +18,44 @@ static volatile int gdata_init_flag; struct crt_plugin_gdata crt_plugin_gdata; static bool g_prov_settings_applied[CRT_PROV_COUNT]; +/* List of the environment variables used in CaRT */ +static const char *crt_env_names[] = {"D_PROVIDER", + "D_INTERFACE", + "D_DOMAIN", + "D_PORT", + "CRT_PHY_ADDR_STR", + "D_LOG_STDERR_IN_LOG", + "D_LOG_SIZE", + "D_LOG_FILE", + "D_LOG_FILE_APPEND_PID", + "D_LOG_MASK", + "DD_MASK", + "DD_STDERR", + "DD_SUBSYS", + "CRT_TIMEOUT", + "CRT_ATTACH_INFO_PATH", + "OFI_PORT", + "OFI_INTERFACE", + "OFI_DOMAIN", + "CRT_CREDIT_EP_CTX", + "CRT_CTX_SHARE_ADDR", + "CRT_CTX_NUM", + "D_FI_CONFIG", + "FI_UNIVERSE_SIZE", + "CRT_ENABLE_MEM_PIN", + "FI_OFI_RXM_USE_SRX", + "D_LOG_FLUSH", + "CRT_MRC_ENABLE", + "CRT_SECONDARY_PROVIDER", + "D_PROVIDER_AUTH_KEY", + "D_PORT_AUTO_ADJUST", + "D_POLL_TIMEOUT", + "D_LOG_FILE_APPEND_RANK", + "D_QUOTA_RPCS", + "D_POST_INIT", + "D_POST_INCR", + "DAOS_SIGNAL_REGISTER"}; + static void crt_lib_init(void) __attribute__((__constructor__)); @@ -62,52 +100,20 @@ crt_lib_fini(void) static void dump_envariables(void) { - int i; - char *val; - char *envars[] = {"D_PROVIDER", - "D_INTERFACE", - "D_DOMAIN", - "D_PORT", - "CRT_PHY_ADDR_STR", - "D_LOG_STDERR_IN_LOG", - "D_LOG_SIZE", - "D_LOG_FILE", - "D_LOG_FILE_APPEND_PID", - "D_LOG_MASK", - "DD_MASK", - "DD_STDERR", - "DD_SUBSYS", - "CRT_TIMEOUT", - "CRT_ATTACH_INFO_PATH", - "OFI_PORT", - "OFI_INTERFACE", - "OFI_DOMAIN", - "CRT_CREDIT_EP_CTX", - "CRT_CTX_SHARE_ADDR", - "CRT_CTX_NUM", - "D_FI_CONFIG", - "FI_UNIVERSE_SIZE", - "CRT_ENABLE_MEM_PIN", - "FI_OFI_RXM_USE_SRX", - "D_LOG_FLUSH", - "CRT_MRC_ENABLE", - "CRT_SECONDARY_PROVIDER", - "D_PROVIDER_AUTH_KEY", - "D_PORT_AUTO_ADJUST", - "D_POLL_TIMEOUT", - "D_LOG_FILE_APPEND_RANK", - "D_QUOTA_RPCS", - "D_POST_INIT", - "D_POST_INCR", - "DAOS_SIGNAL_REGISTER"}; + int i; D_INFO("-- ENVARS: --\n"); - for (i = 0; i < ARRAY_SIZE(envars); i++) { - val = getenv(envars[i]); - if (strcmp(envars[i], "D_PROVIDER_AUTH_KEY") == 0 && val) - D_INFO("%s = %s\n", envars[i], "********"); + for (i = 0; i < ARRAY_SIZE(crt_env_names); i++) { + char *val = NULL; + + d_agetenv_str(&val, crt_env_names[i]); + if (val == NULL) + continue; + if (strcmp(crt_env_names[i], "D_PROVIDER_AUTH_KEY") == 0) + D_INFO("%s = %s\n", crt_env_names[i], "********"); else - D_INFO("%s = %s\n", envars[i], val); + D_INFO("%s = %s\n", crt_env_names[i], val); + d_freeenv_str(&val); } } @@ -596,41 +602,40 @@ crt_protocol_info_free(struct crt_protocol_info *protocol_info) int crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) { - char *provider_env; - char *interface_env; - char *domain_env; - char *auth_key_env; - char *tmp; - struct timeval now; - unsigned int seed; - const char *path; - bool server; - int rc = 0; - char *provider_str0 = NULL; - char *provider_str1 = NULL; - crt_provider_t primary_provider; - crt_provider_t secondary_provider; - crt_provider_t tmp_prov; - char *port_str, *port0, *port1; - char *iface0, *iface1, *domain0, *domain1; - char *auth_key0, *auth_key1; - int num_secondaries = 0; - bool port_auto_adjust = false; - int i; + char *provider; + char *provider_env = NULL; + char *interface; + char *interface_env = NULL; + char *domain; + char *domain_env = NULL; + char *auth_key; + char *auth_key_env = NULL; + struct timeval now; + unsigned int seed; + char *path; + bool server = flags & CRT_FLAG_BIT_SERVER; + int rc = 0; + char *provider_str0 = NULL; + char *provider_str1 = NULL; + crt_provider_t primary_provider; + crt_provider_t secondary_provider; + crt_provider_t tmp_prov; + char *port; + char *port_env = NULL; + char *port0 = NULL; + char *port1 = NULL; + char *iface0 = NULL; + char *iface1 = NULL; + char *domain0 = NULL; + char *domain1 = NULL; + char *auth_key0 = NULL; + char *auth_key1 = NULL; + int num_secondaries = 0; + bool port_auto_adjust = false; + int i; d_signal_register(); - server = flags & CRT_FLAG_BIT_SERVER; - port_str = NULL; - port0 = NULL; - port1 = NULL; - iface0 = NULL; - iface1 = NULL; - domain0 = NULL; - domain1 = NULL; - auth_key0 = NULL; - auth_key1 = NULL; - /* d_log_init is reference counted */ rc = d_log_init(); if (rc != 0) { @@ -677,7 +682,7 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) crt_gdata.cg_auto_swim_disable = (flags & CRT_FLAG_BIT_AUTO_SWIM_DISABLE) ? 1 : 0; - path = getenv("CRT_ATTACH_INFO_PATH"); + d_agetenv_str(&path, "CRT_ATTACH_INFO_PATH"); if (path != NULL && strlen(path) > 0) { rc = crt_group_config_path_set(path); if (rc != 0) @@ -687,55 +692,55 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) else D_DEBUG(DB_ALL, "set group_config_path as %s.\n", path); } + d_freeenv_str(&path); if (opt && opt->cio_auth_key) - auth_key_env = opt->cio_auth_key; - else - auth_key_env = getenv("D_PROVIDER_AUTH_KEY"); + auth_key = opt->cio_auth_key; + else { + d_agetenv_str(&auth_key_env, "D_PROVIDER_AUTH_KEY"); + auth_key = auth_key_env; + } if (opt && opt->cio_provider) - provider_env = opt->cio_provider; + provider = opt->cio_provider; else { - provider_env = getenv(CRT_PHY_ADDR_ENV); - - tmp = getenv("D_PROVIDER"); - if (tmp) - provider_env = tmp; + d_agetenv_str(&provider_env, "D_PROVIDER"); + if (provider_env == NULL) + d_agetenv_str(&provider_env, CRT_PHY_ADDR_ENV); + provider = provider_env; } if (opt && opt->cio_interface) - interface_env = opt->cio_interface; + interface = opt->cio_interface; else { - interface_env = getenv("OFI_INTERFACE"); - - tmp = getenv("D_INTERFACE"); - if (tmp) - interface_env = tmp; + d_agetenv_str(&interface_env, "D_INTERFACE"); + if (interface_env == NULL) { + d_agetenv_str(&interface_env, "OFI_INTERFACE"); + } + interface = interface_env; } if (opt && opt->cio_domain) - domain_env = opt->cio_domain; + domain = opt->cio_domain; else { - domain_env = getenv("OFI_DOMAIN"); - - tmp = getenv("D_DOMAIN"); - if (tmp) - domain_env = tmp; + d_agetenv_str(&domain_env, "D_DOMAIN"); + if (domain_env == NULL) + d_agetenv_str(&domain_env, "OFI_DOMAIN"); + domain = domain_env; } if (opt && opt->cio_port) - port_str = opt->cio_port; + port = opt->cio_port; else { - port_str = getenv("OFI_PORT"); - - tmp = getenv("D_PORT"); - if (tmp) - port_str = tmp; + d_agetenv_str(&port_env, "D_PORT"); + if (port_env == NULL) + d_agetenv_str(&port_env, "OFI_PORT"); + port = port_env; } d_getenv_bool("D_PORT_AUTO_ADJUST", &port_auto_adjust); - rc = __split_arg(provider_env, &provider_str0, &provider_str1); + rc = __split_arg(provider, &provider_str0, &provider_str1); if (rc != 0) D_GOTO(unlock, rc); @@ -743,20 +748,20 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) secondary_provider = crt_str_to_provider(provider_str1); if (primary_provider == CRT_PROV_UNKNOWN) { - D_ERROR("Requested provider %s not found\n", provider_env); + D_ERROR("Requested provider %s not found\n", provider); D_GOTO(unlock, rc = -DER_NONEXIST); } - rc = __split_arg(interface_env, &iface0, &iface1); + rc = __split_arg(interface, &iface0, &iface1); if (rc != 0) D_GOTO(unlock, rc); - rc = __split_arg(domain_env, &domain0, &domain1); + rc = __split_arg(domain, &domain0, &domain1); if (rc != 0) D_GOTO(unlock, rc); - rc = __split_arg(port_str, &port0, &port1); + rc = __split_arg(port, &port0, &port1); if (rc != 0) D_GOTO(unlock, rc); - rc = __split_arg(auth_key_env, &auth_key0, &auth_key1); + rc = __split_arg(auth_key, &auth_key0, &auth_key1); if (rc != 0) D_GOTO(unlock, rc); @@ -896,6 +901,11 @@ crt_init_opt(crt_group_id_t grpid, uint32_t flags, crt_init_options_t *opt) D_FREE(domain0); D_FREE(provider_str0); D_FREE(auth_key0); + d_freeenv_str(&port_env); + d_freeenv_str(&domain_env); + d_freeenv_str(&interface_env); + d_freeenv_str(&provider_env); + d_freeenv_str(&auth_key_env); if (rc != 0) { D_ERROR("failed, "DF_RC"\n", DP_RC(rc)); diff --git a/src/cart/utils/crt_utils.c b/src/cart/utils/crt_utils.c index 5e1a7582c90..6f7cee03c38 100644 --- a/src/cart/utils/crt_utils.c +++ b/src/cart/utils/crt_utils.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2019-2023 Intel Corporation. + * (C) Copyright 2019-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -101,11 +101,12 @@ write_completion_file(void) char *dir; char *completion_file = NULL; - dir = getenv("DAOS_TEST_SHARED_DIR"); + d_agetenv_str(&dir, "DAOS_TEST_SHARED_DIR"); D_ASSERTF(dir != NULL, "DAOS_TEST_SHARED_DIR must be set for --write_completion_file " "option.\n"); D_ASPRINTF(completion_file, "%s/test-servers-completed.txt.%d", dir, getpid()); + d_freeenv_str(&dir); D_ASSERTF(completion_file != NULL, "Error allocating completion_file string\n"); unlink(completion_file); @@ -412,12 +413,15 @@ crtu_dc_mgmt_net_cfg_rank_add(const char *name, crt_group_t *group, int crtu_dc_mgmt_net_cfg_setenv(const char *name) { - int rc; - char buf[SYS_INFO_BUF_SIZE]; - char *crt_timeout; - char *ofi_interface; - char *ofi_domain; - char *cli_srx_set; + int rc; + char *crt_phy_addr_str; + char *crt_ctx_share_addr = NULL; + char *cli_srx_set = NULL; + char *crt_timeout = NULL; + char *ofi_interface; + char *ofi_interface_env = NULL; + char *ofi_domain; + char *ofi_domain_env = NULL; struct dc_mgmt_sys_info crt_net_cfg_info = {0}; Mgmt__GetAttachInfoResp *crt_net_cfg_resp = NULL; @@ -433,29 +437,38 @@ crtu_dc_mgmt_net_cfg_setenv(const char *name) } /* These two are always set */ - D_INFO("setenv CRT_PHY_ADDR_STR=%s\n", crt_net_cfg_info.provider); - rc = d_setenv("CRT_PHY_ADDR_STR", crt_net_cfg_info.provider, 1); + crt_phy_addr_str = crt_net_cfg_info.provider; + D_INFO("setenv CRT_PHY_ADDR_STR=%s\n", crt_phy_addr_str); + rc = d_setenv("CRT_PHY_ADDR_STR", crt_phy_addr_str, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); - sprintf(buf, "%d", crt_net_cfg_info.crt_ctx_share_addr); - D_INFO("setenv CRT_CTX_SHARE_ADDR=%d\n", crt_net_cfg_info.crt_ctx_share_addr); - rc = d_setenv("CRT_CTX_SHARE_ADDR", buf, 1); + rc = asprintf(&crt_ctx_share_addr, "%d", crt_net_cfg_info.crt_ctx_share_addr); + if (rc < 0) { + crt_ctx_share_addr = NULL; + D_GOTO(cleanup, rc = -DER_NOMEM); + } + D_INFO("setenv CRT_CTX_SHARE_ADDR=%s\n", crt_ctx_share_addr); + rc = d_setenv("CRT_CTX_SHARE_ADDR", crt_ctx_share_addr, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); /* If the server has set this, the client must use the same value. */ if (crt_net_cfg_info.srv_srx_set != -1) { - sprintf(buf, "%d", crt_net_cfg_info.srv_srx_set); - rc = d_setenv("FI_OFI_RXM_USE_SRX", buf, 1); - D_INFO("setenv FI_OFI_RXM_USE_SRX=%d\n", crt_net_cfg_info.srv_srx_set); + rc = asprintf(&cli_srx_set, "%d", crt_net_cfg_info.srv_srx_set); + if (rc < 0) { + cli_srx_set = NULL; + D_GOTO(cleanup, rc = -DER_NOMEM); + } + D_INFO("setenv FI_OFI_RXM_USE_SRX=%s\n", cli_srx_set); + rc = d_setenv("FI_OFI_RXM_USE_SRX", cli_srx_set, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); - D_DEBUG(DB_MGMT, "Using server's value for FI_OFI_RXM_USE_SRX: %s\n", buf); + D_DEBUG(DB_MGMT, "Using server's value for FI_OFI_RXM_USE_SRX: %s\n", cli_srx_set); } else { /* Client may not set it if the server hasn't. */ - cli_srx_set = getenv("FI_OFI_RXM_USE_SRX"); + d_agetenv_str(&cli_srx_set, "FI_OFI_RXM_USE_SRX"); if (cli_srx_set) { D_ERROR("Client set FI_OFI_RXM_USE_SRX to %s, " "but server is unset!\n", cli_srx_set); @@ -464,47 +477,58 @@ crtu_dc_mgmt_net_cfg_setenv(const char *name) } /* Allow client env overrides for these three */ - crt_timeout = getenv("CRT_TIMEOUT"); + d_agetenv_str(&crt_timeout, "CRT_TIMEOUT"); if (!crt_timeout) { - sprintf(buf, "%d", crt_net_cfg_info.crt_timeout); - rc = d_setenv("CRT_TIMEOUT", buf, 1); - D_INFO("setenv CRT_TIMEOUT=%d\n", crt_net_cfg_info.crt_timeout); + rc = asprintf(&crt_timeout, "%d", crt_net_cfg_info.crt_timeout); + if (rc < 0) { + crt_timeout = NULL; + D_GOTO(cleanup, rc = -DER_NOMEM); + } + D_INFO("setenv CRT_TIMEOUT=%s\n", crt_timeout); + rc = d_setenv("CRT_TIMEOUT", crt_timeout, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); } else { D_DEBUG(DB_MGMT, "Using client provided CRT_TIMEOUT: %s\n", crt_timeout); } - ofi_interface = getenv("OFI_INTERFACE"); - if (!ofi_interface) { - rc = d_setenv("OFI_INTERFACE", crt_net_cfg_info.interface, 1); - D_INFO("Setting OFI_INTERFACE=%s\n", crt_net_cfg_info.interface); + d_agetenv_str(&ofi_interface_env, "OFI_INTERFACE"); + if (!ofi_interface_env) { + ofi_interface = crt_net_cfg_info.interface; + D_INFO("Setting OFI_INTERFACE=%s\n", ofi_interface); + rc = d_setenv("OFI_INTERFACE", ofi_interface, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); } else { + ofi_interface = ofi_interface_env; D_DEBUG(DB_MGMT, "Using client provided OFI_INTERFACE: %s\n", ofi_interface); } - ofi_domain = getenv("OFI_DOMAIN"); - if (!ofi_domain) { - rc = d_setenv("OFI_DOMAIN", crt_net_cfg_info.domain, 1); - D_INFO("Setting OFI_DOMAIN=%s\n", crt_net_cfg_info.domain); + d_agetenv_str(&ofi_domain_env, "OFI_DOMAIN"); + if (!ofi_domain_env) { + ofi_domain = crt_net_cfg_info.domain; + D_INFO("Setting OFI_DOMAIN=%s\n", ofi_domain); + rc = d_setenv("OFI_DOMAIN", ofi_domain, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); } else { + ofi_domain = ofi_domain_env; D_DEBUG(DB_MGMT, "Using client provided OFI_DOMAIN: %s\n", ofi_domain); } D_INFO("CaRT env setup with:\n" - "\tOFI_INTERFACE=%s, OFI_DOMAIN: %s, CRT_PHY_ADDR_STR: %s, " - "CRT_CTX_SHARE_ADDR: %s, CRT_TIMEOUT: %s\n", - getenv("OFI_INTERFACE"), getenv("OFI_DOMAIN"), - getenv("CRT_PHY_ADDR_STR"), - getenv("CRT_CTX_SHARE_ADDR"), getenv("CRT_TIMEOUT")); + "\tOFI_INTERFACE=%s, OFI_DOMAIN: %s, CRT_PHY_ADDR_STR: %s, " + "CRT_CTX_SHARE_ADDR: %s, CRT_TIMEOUT: %s\n", + ofi_interface, ofi_domain, crt_phy_addr_str, crt_ctx_share_addr, crt_timeout); cleanup: + d_freeenv_str(&ofi_domain_env); + d_freeenv_str(&ofi_interface_env); + d_freeenv_str(&crt_timeout); + d_freeenv_str(&cli_srx_set); + d_freeenv_str(&crt_ctx_share_addr); dc_put_attach_info(&crt_net_cfg_info, crt_net_cfg_resp); return rc; @@ -575,7 +599,7 @@ crtu_cli_start_basic(char *local_group_name, char *srv_group_name, if (*grp == NULL) D_GOTO(out, rc = -DER_INVAL); - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); /* load group info from a config file and * delete file upon return @@ -583,6 +607,7 @@ crtu_cli_start_basic(char *local_group_name, char *srv_group_name, rc = crtu_load_group_from_file(grp_cfg_file, *crt_ctx, *grp, -1, true); + d_freeenv_str(&grp_cfg_file); if (rc != 0) D_GOTO(out, rc); } @@ -644,7 +669,6 @@ crtu_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx, pthread_t *progress_thread, crt_group_t **grp, uint32_t *grp_size, crt_init_options_t *init_opt) { - char *env_self_rank; char *grp_cfg_file; char *my_uri; d_rank_t my_rank; @@ -653,8 +677,8 @@ crtu_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx, if (opts.assert_on_error) D_ASSERTF(opts.is_initialized == true, "crtu_test_init not called.\n"); - env_self_rank = getenv("CRT_L_RANK"); - my_rank = atoi(env_self_rank); + rc = d_getenv_uint32_t("CRT_L_RANK", &my_rank); + D_ASSERTF(rc == DER_SUCCESS, "Rank can not be retrieve: " DF_RC "\n", DP_RC(rc)); rc = d_log_init(); if (rc != 0) @@ -695,19 +719,19 @@ crtu_srv_start_basic(char *srv_group_name, crt_context_t *crt_ctx, D_GOTO(out, rc); } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); - rc = crt_rank_uri_get(*grp, my_rank, 0, &my_uri); if (rc != 0) D_GOTO(out, rc); + D_FREE(my_uri); + + rc = d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); /* load group info from a config file and delete file upon return */ rc = crtu_load_group_from_file(grp_cfg_file, crt_ctx[0], *grp, my_rank, true); + d_freeenv_str(&grp_cfg_file); if (rc != 0) D_GOTO(out, rc); - D_FREE(my_uri); - rc = crt_group_size(NULL, grp_size); if (rc != 0) D_GOTO(out, rc); diff --git a/src/client/api/agent.c b/src/client/api/agent.c index 365ba908ef6..ede098eb165 100644 --- a/src/client/api/agent.c +++ b/src/client/api/agent.c @@ -12,14 +12,16 @@ char *dc_agent_sockpath; int dc_agent_init() { - char *path = NULL; - char *envpath = getenv(DAOS_AGENT_DRPC_DIR_ENV); + char *path = NULL; + char *envpath; - if (envpath) + d_agetenv_str(&envpath, DAOS_AGENT_DRPC_DIR_ENV); + if (envpath != NULL) D_ASPRINTF(path, "%s/%s", envpath, DAOS_AGENT_DRPC_SOCK_NAME); else D_STRNDUP_S(path, DEFAULT_DAOS_AGENT_DRPC_SOCK); + d_freeenv_str(&envpath); if (path == NULL) return -DER_NOMEM; diff --git a/src/client/api/job.c b/src/client/api/job.c index e4a330f455c..c184c770840 100644 --- a/src/client/api/job.c +++ b/src/client/api/job.c @@ -37,22 +37,24 @@ int dc_job_init(void) { char *jobid; - char *jobid_env = getenv(JOBID_ENV); + char *jobid_env; int err = 0; + d_agetenv_str(&jobid_env, JOBID_ENV); if (jobid_env == NULL) { D_STRNDUP_S(jobid_env, DEFAULT_JOBID_ENV); } else { char *tmp_env = jobid_env; D_STRNDUP(jobid_env, tmp_env, MAX_ENV_NAME); + d_freeenv_str(&tmp_env); } if (jobid_env == NULL) D_GOTO(out_err, err = -DER_NOMEM); dc_jobid_env = jobid_env; - jobid = getenv(dc_jobid_env); + d_agetenv_str(&jobid, dc_jobid_env); if (jobid == NULL) { err = craft_default_jobid(&jobid); if (err) @@ -61,6 +63,7 @@ dc_job_init(void) char *tmp_jobid = jobid; D_STRNDUP(jobid, tmp_jobid, MAX_JOBID_LEN); + d_freeenv_str(&tmp_jobid); if (jobid == NULL) D_GOTO(out_env, err = -DER_NOMEM); } diff --git a/src/client/dfuse/dfuse_main.c b/src/client/dfuse/dfuse_main.c index 1ef48600a6f..ee67c3f2fe1 100644 --- a/src/client/dfuse/dfuse_main.c +++ b/src/client/dfuse/dfuse_main.c @@ -521,7 +521,7 @@ main(int argc, char **argv) } } - if (!dfuse_info->di_foreground && getenv("PMIX_RANK")) { + if (!dfuse_info->di_foreground && d_isenv_def("PMIX_RANK")) { DFUSE_TRA_WARNING(dfuse_info, "Not running in background under orterun"); dfuse_info->di_foreground = true; diff --git a/src/client/dfuse/il/int_read.c b/src/client/dfuse/il/int_read.c index 24f6be3051d..346cd14c288 100644 --- a/src/client/dfuse/il/int_read.c +++ b/src/client/dfuse/il/int_read.c @@ -13,21 +13,15 @@ #include "ioil.h" static ssize_t -read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) +read_bulksgl(d_sg_list_t *sgl, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - daos_size_t read_size = 0; - d_iov_t iov = {}; - d_sg_list_t sgl = {}; + daos_size_t read_size = 0; daos_event_t ev; daos_handle_t eqh; int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); - sgl.sg_nr = 1; - d_iov_set(&iov, (void *)buff, len); - sgl.sg_iovs = &iov; - rc = ioil_get_eqh(&eqh); if (rc == 0) { bool flag = false; @@ -39,8 +33,8 @@ read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *e D_GOTO(out, rc = daos_der2errno(rc)); } - rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, - &read_size, &ev); + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, sgl, position, &read_size, + &ev); if (rc) D_GOTO(out, rc); @@ -57,7 +51,7 @@ read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *e } rc = ev.ev_error; } else { - rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &read_size, + rc = dfs_read(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, sgl, position, &read_size, NULL); } out: @@ -72,29 +66,47 @@ read_bulk(char *buff, size_t len, off_t position, struct fd_entry *entry, int *e ssize_t ioil_do_pread(char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - return read_bulk(buff, len, position, entry, errcode); + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + + sgl.sg_nr = 1; + d_iov_set(&iov, (void *)buff, len); + sgl.sg_iovs = &iov; + + return read_bulksgl(&sgl, len, position, entry, errcode); } ssize_t ioil_do_preadv(const struct iovec *iov, int count, off_t position, struct fd_entry *entry, int *errcode) { - ssize_t bytes_read; - ssize_t total_read = 0; - int i; + d_iov_t *diov; + d_sg_list_t sgl = {}; + ssize_t total_read = 0; + int i; + int rc; + int new_count; + + D_ALLOC_ARRAY(diov, count); + if (diov == NULL) { + *errcode = ENOMEM; + return -1; + } - for (i = 0; i < count; i++) { - bytes_read = read_bulk(iov[i].iov_base, iov[i].iov_len, position, entry, errcode); + for (i = 0, new_count = 0; i < count; i++) { + /** See DAOS-15089. This is a workaround */ + if (iov[i].iov_len == 0) + continue; + d_iov_set(&diov[new_count++], iov[i].iov_base, iov[i].iov_len); + total_read += iov[i].iov_len; + } - if (bytes_read == -1) - return (ssize_t)-1; + sgl.sg_nr = new_count; + sgl.sg_iovs = diov; - if (bytes_read == 0) - return total_read; + rc = read_bulksgl(&sgl, total_read, position, entry, errcode); - position += bytes_read; - total_read += bytes_read; - } + D_FREE(diov); - return total_read; + return rc; } diff --git a/src/client/dfuse/il/int_write.c b/src/client/dfuse/il/int_write.c index 2de4b3a4460..c95e23e0909 100644 --- a/src/client/dfuse/il/int_write.c +++ b/src/client/dfuse/il/int_write.c @@ -14,21 +14,15 @@ #include "ioil.h" -ssize_t -ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) +static ssize_t +ioil_do_writesgl(d_sg_list_t *sgl, size_t len, off_t position, struct fd_entry *entry, int *errcode) { - d_iov_t iov = {}; - d_sg_list_t sgl = {}; daos_event_t ev; daos_handle_t eqh; int rc; DFUSE_TRA_DEBUG(entry->fd_dfsoh, "%#zx-%#zx", position, position + len - 1); - sgl.sg_nr = 1; - d_iov_set(&iov, (void *)buff, len); - sgl.sg_iovs = &iov; - rc = ioil_get_eqh(&eqh); if (rc == 0) { bool flag = false; @@ -40,7 +34,7 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en D_GOTO(out, rc = daos_der2errno(rc)); } - rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, &ev); + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, sgl, position, &ev); if (rc) D_GOTO(out, rc); @@ -57,7 +51,7 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en } rc = ev.ev_error; } else { - rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, &sgl, position, NULL); + rc = dfs_write(entry->fd_cont->ioc_dfs, entry->fd_dfsoh, sgl, position, NULL); } out: if (rc) { @@ -68,27 +62,50 @@ ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *en return len; } +ssize_t +ioil_do_writex(const char *buff, size_t len, off_t position, struct fd_entry *entry, int *errcode) +{ + d_iov_t iov = {}; + d_sg_list_t sgl = {}; + + sgl.sg_nr = 1; + d_iov_set(&iov, (void *)buff, len); + sgl.sg_iovs = &iov; + + return ioil_do_writesgl(&sgl, len, position, entry, errcode); +} + ssize_t ioil_do_pwritev(const struct iovec *iov, int count, off_t position, struct fd_entry *entry, int *errcode) { - ssize_t bytes_written; - ssize_t total_write = 0; - int i; + d_iov_t *diov; + d_sg_list_t sgl = {}; + size_t total_write = 0; + int i; + int rc; + int new_count; + + D_ALLOC_ARRAY(diov, count); + if (diov == NULL) { + *errcode = ENOMEM; + return -1; + } - for (i = 0; i < count; i++) { - bytes_written = - ioil_do_writex(iov[i].iov_base, iov[i].iov_len, position, entry, errcode); + for (i = 0, new_count = 0; i < count; i++) { + /** See DAOS-15089. This is a workaround */ + if (iov[i].iov_len == 0) + continue; + d_iov_set(&diov[new_count++], iov[i].iov_base, iov[i].iov_len); + total_write += iov[i].iov_len; + } - if (bytes_written == -1) - return (ssize_t)-1; + sgl.sg_nr = new_count; + sgl.sg_iovs = diov; - if (bytes_written == 0) - return total_write; + rc = ioil_do_writesgl(&sgl, total_write, position, entry, errcode); - position += bytes_written; - total_write += bytes_written; - } + D_FREE(diov); - return total_write; + return rc; } diff --git a/src/client/dfuse/pil4dfs/int_dfs.c b/src/client/dfuse/pil4dfs/int_dfs.c index 80ff5926442..b50caad76ef 100644 --- a/src/client/dfuse/pil4dfs/int_dfs.c +++ b/src/client/dfuse/pil4dfs/int_dfs.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2022-2023 Intel Corporation. + * (C) Copyright 2022-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -61,7 +61,7 @@ #define MAX_DAOS_MT (8) #define READ_DIR_BATCH_SIZE (96) -#define MAX_FD_DUP2ED (8) +#define MAX_FD_DUP2ED (16) #define MAX_MMAP_BLOCK (64) @@ -78,6 +78,14 @@ #define MAX_EQ 64 +/* the default min fd that will be used by DAOS */ +#define DAOS_MIN_FD 10 + +/* the number of low fd reserved */ +static uint16_t low_fd_count; +/* the list of low fd reserved */ +static int low_fd_list[DAOS_MIN_FD]; + /* In case of fork(), only the parent process could destroy daos env. */ static bool context_reset; static __thread daos_handle_t td_eqh; @@ -126,7 +134,7 @@ static _Atomic uint32_t daos_init_cnt; static bool report; static long int page_size; -static bool daos_inited; +static _Atomic bool daos_inited; static bool daos_debug_inited; static int num_dfs; static struct dfs_mt dfs_list[MAX_DAOS_MT]; @@ -185,7 +193,6 @@ struct mmap_obj { struct fd_dup2 { int fd_src, fd_dest; - bool dest_closed; }; /* Add the data structure for statx_timestamp and statx @@ -233,7 +240,7 @@ struct statx { #endif /* working dir of current process */ -static char cur_dir[DFS_MAX_PATH] = ""; +static char cur_dir[DFS_MAX_PATH + 1] = ""; static bool segv_handler_inited; /* Old segv handler */ struct sigaction old_segv; @@ -241,6 +248,7 @@ struct sigaction old_segv; /* the flag to indicate whether initlization is finished or not */ static bool hook_enabled; static bool hook_enabled_bak; +static pthread_mutex_t lock_reserve_fd; static pthread_mutex_t lock_dfs; static pthread_mutex_t lock_fd; static pthread_mutex_t lock_dirfd; @@ -403,6 +411,8 @@ static ssize_t (*next_pwrite)(int fd, const void *buf, size_t size, off_t offset static off_t (*libc_lseek)(int fd, off_t offset, int whence); static off_t (*pthread_lseek)(int fd, off_t offset, int whence); +static int new_fxstat(int vers, int fd, struct stat *buf); + static int (*next_fxstat)(int vers, int fd, struct stat *buf); static int (*next_fstat)(int fd, struct stat *buf); @@ -532,6 +542,8 @@ remove_dot_dot(char path[], int *len); static int remove_dot_and_cleanup(char szPath[], int len); +/* reference count of fake fd duplicated by real fd with dup2() */ +static int dup_ref_count[MAX_OPENED_FILE]; static struct file_obj *file_list[MAX_OPENED_FILE]; static struct dir_obj *dir_list[MAX_OPENED_DIR]; static struct mmap_obj mmap_list[MAX_MMAP_BLOCK]; @@ -548,7 +560,7 @@ find_next_available_dirfd(struct dir_obj *obj, int *new_fd); static int find_next_available_map(int *idx); static void -free_fd(int idx); +free_fd(int idx, bool closing_dup_fd); static void free_dirfd(int idx); static void @@ -600,13 +612,14 @@ query_dfs_mount(const char *path) static int discover_daos_mount_with_env(void) { - int idx, len_fs_root, rc; - char *fs_root = NULL; - char *pool = NULL; - char *container = NULL; + int idx, rc; + char *fs_root = NULL; + char *pool = NULL; + char *container = NULL; + size_t len_fs_root, len_pool, len_container; /* Add the mount if env DAOS_MOUNT_POINT is set. */ - fs_root = getenv("DAOS_MOUNT_POINT"); + rc = d_agetenv_str(&fs_root, "DAOS_MOUNT_POINT"); if (fs_root == NULL) /* env DAOS_MOUNT_POINT is undefined, return success (0) */ D_GOTO(out, rc = 0); @@ -633,31 +646,56 @@ discover_daos_mount_with_env(void) D_GOTO(out, rc = ENAMETOOLONG); } - pool = getenv("DAOS_POOL"); + d_agetenv_str(&pool, "DAOS_POOL"); if (pool == NULL) { D_FATAL("DAOS_POOL is not set.\n"); D_GOTO(out, rc = EINVAL); } - container = getenv("DAOS_CONTAINER"); + len_pool = strnlen(pool, DAOS_PROP_MAX_LABEL_BUF_LEN); + if (len_pool >= DAOS_PROP_MAX_LABEL_BUF_LEN) { + D_FATAL("DAOS_POOL is too long.\n"); + D_GOTO(out, rc = ENAMETOOLONG); + } + + rc = d_agetenv_str(&container, "DAOS_CONTAINER"); if (container == NULL) { D_FATAL("DAOS_CONTAINER is not set.\n"); D_GOTO(out, rc = EINVAL); } + len_container = strnlen(container, DAOS_PROP_MAX_LABEL_BUF_LEN); + if (len_container >= DAOS_PROP_MAX_LABEL_BUF_LEN) { + D_FATAL("DAOS_CONTAINER is too long.\n"); + D_GOTO(out, rc = ENAMETOOLONG); + } + D_STRNDUP(dfs_list[num_dfs].fs_root, fs_root, len_fs_root); if (dfs_list[num_dfs].fs_root == NULL) D_GOTO(out, rc = ENOMEM); - dfs_list[num_dfs].pool = pool; - dfs_list[num_dfs].cont = container; + D_STRNDUP(dfs_list[num_dfs].pool, pool, len_pool); + if (dfs_list[num_dfs].pool == NULL) + D_GOTO(free_fs_root, rc = ENOMEM); + + D_STRNDUP(dfs_list[num_dfs].cont, container, len_container); + if (dfs_list[num_dfs].cont == NULL) + D_GOTO(free_pool, rc = ENOMEM); + dfs_list[num_dfs].dfs_dir_hash = NULL; - dfs_list[num_dfs].len_fs_root = len_fs_root; + dfs_list[num_dfs].len_fs_root = (int)len_fs_root; atomic_init(&dfs_list[num_dfs].inited, 0); num_dfs++; - rc = 0; + D_GOTO(out, rc = 0); +free_pool: + D_FREE(dfs_list[num_dfs].pool); +free_fs_root: + D_FREE(dfs_list[num_dfs].fs_root); out: + d_freeenv_str(&container); + d_freeenv_str(&pool); + d_freeenv_str(&fs_root); return rc; } @@ -899,7 +937,7 @@ child_hdlr(void) int rc; /* daos is not initialized yet */ - if (!daos_inited) + if (atomic_load_relaxed(&daos_inited) == false) return; daos_eq_lib_reset_after_fork(); @@ -913,6 +951,58 @@ child_hdlr(void) context_reset = true; } +/* only free the reserved low fds when application exits or encounters error */ +static void +free_reserved_low_fd(void) +{ + int i; + + for (i = 0; i < low_fd_count; i++) + libc_close(low_fd_list[i]); + low_fd_count = 0; +} + +/* some applications especially bash scripts use specific low fds directly. + * It would be safer to avoid using such low fds (fd < DAOS_MIN_FD) in daos. + * We consume such low fds before any daos calls and close them only when + * application exits or encounters error. + */ + +static int +consume_low_fd(void) +{ + int rc = 0; + + if (atomic_load_relaxed(&daos_inited) == true) + return 0; + + D_MUTEX_LOCK(&lock_reserve_fd); + low_fd_count = 0; + low_fd_list[low_fd_count] = libc_open("/", O_PATH | O_DIRECTORY); + while (1) { + if (low_fd_list[low_fd_count] < 0) { + DS_ERROR(errno, "failed to reserve a low fd"); + goto err; + } else if (low_fd_list[low_fd_count] >= DAOS_MIN_FD) { + libc_close(low_fd_list[low_fd_count]); + break; + } else { + low_fd_count++; + } + low_fd_list[low_fd_count] = libc_open("/", O_RDONLY); + } + + D_MUTEX_UNLOCK(&lock_reserve_fd); + return rc; + +err: + rc = errno; + free_reserved_low_fd(); + D_MUTEX_UNLOCK(&lock_reserve_fd); + + return rc; +} + /** determine whether a path (both relative and absolute) is on DAOS or not. If yes, * returns parent object, item name, full path of parent dir, full absolute path, and * the pointer to struct dfs_mt. @@ -962,7 +1052,7 @@ query_path(const char *szInput, int *is_target_path, dfs_obj_t **parent, char *i if (strncmp(szInput, ".", 2) == 0) { /* special case for current work directory */ - pt_end = stpncpy(full_path_parse, cur_dir, DFS_MAX_PATH); + pt_end = stpncpy(full_path_parse, cur_dir, DFS_MAX_PATH + 1); len = (int)(pt_end - full_path_parse); if (len >= DFS_MAX_PATH) { D_DEBUG(DB_ANY, "full_path_parse[] is not large enough: %d (%s)\n", @@ -1005,24 +1095,37 @@ query_path(const char *szInput, int *is_target_path, dfs_obj_t **parent, char *i if (idx_dfs >= 0) { /* trying to avoid lock as much as possible */ - if (!daos_inited) { + if (atomic_load_relaxed(&daos_inited) == false) { /* daos_init() is expensive to call. We call it only when necessary. */ + + rc = consume_low_fd(); + if (rc) { + DS_ERROR(rc, "consume_low_fd() failed"); + *is_target_path = 0; + goto out_normal; + } + rc = daos_init(); if (rc) { DL_ERROR(rc, "daos_init() failed"); *is_target_path = 0; goto out_normal; } + if (eq_count_max) { - rc = daos_eq_create(&td_eqh); - if (rc) - DL_WARN(rc, "daos_eq_create() failed"); - main_eqh = td_eqh; - rc = pthread_atfork(NULL, NULL, &child_hdlr); - D_ASSERT(rc == 0); + D_MUTEX_LOCK(&lock_eqh); + if (daos_handle_is_inval(main_eqh)) { + rc = daos_eq_create(&td_eqh); + if (rc) + DL_WARN(rc, "daos_eq_create() failed"); + main_eqh = td_eqh; + rc = pthread_atfork(NULL, NULL, &child_hdlr); + D_ASSERT(rc == 0); + } + D_MUTEX_UNLOCK(&lock_eqh); } - daos_inited = true; + atomic_store_relaxed(&daos_inited, true); atomic_fetch_add_relaxed(&daos_init_cnt, 1); } @@ -1100,6 +1203,7 @@ query_path(const char *szInput, int *is_target_path, dfs_obj_t **parent, char *i D_GOTO(out_err, rc); } } else { + strncpy(*full_path, full_path_parse, len + 1); *is_target_path = 0; item_name[0] = '\0'; } @@ -1296,6 +1400,7 @@ find_next_available_fd(struct file_obj *obj, int *new_fd) new_obj->ref_count++; file_list[idx] = new_obj; } + dup_ref_count[idx] = 0; if (next_free_fd > last_fd) last_fd = next_free_fd; next_free_fd = -1; @@ -1315,6 +1420,24 @@ find_next_available_fd(struct file_obj *obj, int *new_fd) return 0; } +static void +inc_dup_ref_count(int fd) +{ + D_MUTEX_LOCK(&lock_fd); + dup_ref_count[fd - FD_FILE_BASE]++; + file_list[fd - FD_FILE_BASE]->ref_count++; + D_MUTEX_UNLOCK(&lock_fd); +} + +static void +dec_dup_ref_count(int fd) +{ + D_MUTEX_LOCK(&lock_fd); + dup_ref_count[fd - FD_FILE_BASE]--; + file_list[fd - FD_FILE_BASE]->ref_count--; + D_MUTEX_UNLOCK(&lock_fd); +} + static int find_next_available_dirfd(struct dir_obj *obj, int *new_dir_fd) { @@ -1399,7 +1522,7 @@ find_next_available_map(int *idx) /* May need to support duplicated fd as duplicated dirfd too. */ static void -free_fd(int idx) +free_fd(int idx, bool closing_dup_fd) { int i, rc; struct file_obj *saved_obj = NULL; @@ -1412,9 +1535,15 @@ free_fd(int idx) return; } + if (closing_dup_fd) + dup_ref_count[idx]--; file_list[idx]->ref_count--; if (file_list[idx]->ref_count == 0) saved_obj = file_list[idx]; + if (dup_ref_count[idx] > 0) { + D_MUTEX_UNLOCK(&lock_fd); + return; + } file_list[idx] = NULL; if (idx < next_free_fd) @@ -1506,7 +1635,7 @@ free_map(int idx) mmap_list[idx].addr = NULL; /* Need to call free_fd(). */ if (file_list[mmap_list[idx].fd - FD_FILE_BASE]->idx_mmap >= MAX_MMAP_BLOCK) - free_fd(mmap_list[idx].fd - FD_FILE_BASE); + free_fd(mmap_list[idx].fd - FD_FILE_BASE, false); mmap_list[idx].fd = -1; if (idx < next_free_map) @@ -1547,46 +1676,23 @@ get_fd_redirected(int fd) return fd_ret; } -/* This fd is a fake fd. There exists a associated kernel fd with dup2. - * Need to check whether fd is in fd_dup2_list[], set dest_closed true - * if yes. Otherwise, close the fake fd. - */ -static void -close_dup_fd_dest_fakefd(int fd) -{ - int i; - - if (fd < FD_FILE_BASE) - return; - - D_MUTEX_LOCK(&lock_fd_dup2ed); - if (num_fd_dup2ed > 0) { - for (i = 0; i < MAX_FD_DUP2ED; i++) { - if (fd_dup2_list[i].fd_dest == fd) { - fd_dup2_list[i].dest_closed = true; - D_MUTEX_UNLOCK(&lock_fd_dup2ed); - return; - } - } - } - D_MUTEX_UNLOCK(&lock_fd_dup2ed); - - free_fd(fd - FD_FILE_BASE); -} - /* This fd is a fd from kernel and it is associated with a fake fd. - * Need to 1) close(fd) 2) remove the entry in fd_dup2_list[] 3) close - * the fake fd if dest_closed is true. + * Need to 1) close(fd) 2) remove the entry in fd_dup2_list[] 3) decrease + * the dup reference count of the fake fd. */ + static int -close_dup_fd_src(int (*next_close)(int fd), int fd) +close_dup_fd(int (*next_close)(int fd), int fd, bool close_fd) { int i, rc, idx_dup = -1, fd_dest = -1; - /* close the fd from kernel */ - rc = next_close(fd); - if (rc != 0) - return (-1); + if (close_fd) { + /* close the fd from kernel */ + assert(fd < FD_FILE_BASE); + rc = next_close(fd); + if (rc != 0) + return (-1); + } /* remove the fd_dup entry */ D_MUTEX_LOCK(&lock_fd_dup2ed); @@ -1594,12 +1700,10 @@ close_dup_fd_src(int (*next_close)(int fd), int fd) for (i = 0; i < MAX_FD_DUP2ED; i++) { if (fd_dup2_list[i].fd_src == fd) { idx_dup = i; - if (fd_dup2_list[i].dest_closed) - fd_dest = fd_dup2_list[i].fd_dest; + fd_dest = fd_dup2_list[i].fd_dest; /* clear the value to free */ fd_dup2_list[i].fd_src = -1; fd_dup2_list[i].fd_dest = -1; - fd_dup2_list[i].dest_closed = false; num_fd_dup2ed--; break; } @@ -1613,8 +1717,7 @@ close_dup_fd_src(int (*next_close)(int fd), int fd) errno = EINVAL; return (-1); } - if (fd_dest > 0) - free_fd(fd_dest - FD_FILE_BASE); + free_fd(fd_dest - FD_FILE_BASE, true); return 0; } @@ -1628,7 +1731,6 @@ init_fd_dup2_list(void) for (i = 0; i < MAX_FD_DUP2ED; i++) { fd_dup2_list[i].fd_src = -1; fd_dup2_list[i].fd_dest = -1; - fd_dup2_list[i].dest_closed = false; } D_MUTEX_UNLOCK(&lock_fd_dup2ed); } @@ -1638,6 +1740,9 @@ allocate_dup2ed_fd(const int fd_src, const int fd_dest) { int i; + /* increase reference count of the fake fd */ + inc_dup_ref_count(fd_dest); + /* Not many applications use dup2(). Normally the number of fd duped is small. */ D_MUTEX_LOCK(&lock_fd_dup2ed); if (num_fd_dup2ed < MAX_FD_DUP2ED) { @@ -1645,7 +1750,6 @@ allocate_dup2ed_fd(const int fd_src, const int fd_dest) if (fd_dup2_list[i].fd_src == -1) { fd_dup2_list[i].fd_src = fd_src; fd_dup2_list[i].fd_dest = fd_dest; - fd_dup2_list[i].dest_closed = false; num_fd_dup2ed++; D_MUTEX_UNLOCK(&lock_fd_dup2ed); return i; @@ -1654,6 +1758,8 @@ allocate_dup2ed_fd(const int fd_src, const int fd_dest) } D_MUTEX_UNLOCK(&lock_fd_dup2ed); + /* decrease dup reference count in error */ + dec_dup_ref_count(fd_dest); DS_ERROR(EMFILE, "fd_dup2_list[] is out of space"); errno = EMFILE; return (-1); @@ -1695,7 +1801,7 @@ close_all_duped_fd(void) /* Only the main thread will call this function in the destruction phase */ for (i = 0; i < MAX_FD_DUP2ED; i++) { if (fd_dup2_list[i].fd_src >= 0) - close_dup_fd_src(libc_close, fd_dup2_list[i].fd_src); + close_dup_fd(libc_close, fd_dup2_list[i].fd_src, true); } num_fd_dup2ed = 0; } @@ -1765,7 +1871,7 @@ check_path_with_dirfd(int dirfd, char **full_path_out, const char *rel_path, int free(*full_path_out); *full_path_out = NULL; } - DS_ERROR(errno, "readlink() failed"); + D_DEBUG(DB_ANY, "readlink() failed: %d (%s)\n", errno, strerror(errno)); return (-1); } @@ -1780,8 +1886,8 @@ open_common(int (*real_open)(const char *pathname, int oflags, ...), const char mode_t mode_query = 0, mode_parent = 0; struct dfs_mt *dfs_mt; char item_name[DFS_MAX_NAME]; - char *parent_dir = NULL; - char *full_path = NULL; + char *parent_dir = NULL; + char *full_path = NULL; if (pathname == NULL) { errno = EFAULT; @@ -1839,14 +1945,15 @@ open_common(int (*real_open)(const char *pathname, int oflags, ...), const char } /* file/dir should be handled by DFS */ if (oflags & O_CREAT) { - rc = dfs_open(dfs_mt->dfs, parent, item_name, mode | S_IFREG, oflags, 0, 0, NULL, - &dfs_obj); + rc = dfs_open(dfs_mt->dfs, parent, item_name, mode | S_IFREG, oflags & (~O_APPEND), + 0, 0, NULL, &dfs_obj); mode_query = S_IFREG; } else if (!parent && (strncmp(item_name, "/", 2) == 0)) { - rc = dfs_lookup(dfs_mt->dfs, "/", oflags, &dfs_obj, &mode_query, NULL); + rc = + dfs_lookup(dfs_mt->dfs, "/", oflags & (~O_APPEND), &dfs_obj, &mode_query, NULL); } else { - rc = dfs_lookup_rel(dfs_mt->dfs, parent, item_name, oflags, &dfs_obj, &mode_query, - NULL); + rc = dfs_lookup_rel(dfs_mt->dfs, parent, item_name, oflags & (~O_APPEND), &dfs_obj, + &mode_query, NULL); } if (rc) @@ -1908,6 +2015,15 @@ open_common(int (*real_open)(const char *pathname, int oflags, ...), const char FREE(parent_dir); + if (oflags & O_APPEND) { + struct stat fstat; + + rc = new_fxstat(1, idx_fd + FD_FILE_BASE, &fstat); + if (rc != 0) + return (-1); + file_list[idx_fd]->offset = fstat.st_size; + } + return (idx_fd + FD_FILE_BASE); org_func: @@ -2011,10 +2127,10 @@ new_close_common(int (*next_close)(int fd), int fd) } else if (fd_directed >= FD_FILE_BASE) { /* This fd is a kernel fd. There was a duplicate fd created. */ if (fd < FD_FILE_BASE) - return close_dup_fd_src(next_close, fd); + return close_dup_fd(next_close, fd, true); /* This fd is a fake fd. There exists a associated kernel fd with dup2. */ - close_dup_fd_dest_fakefd(fd); + free_fd(fd - FD_FILE_BASE, false); return 0; } @@ -2498,6 +2614,17 @@ new_fxstatat(int ver, int dirfd, const char *path, struct stat *stat_buf, int fl return new_xstat(1, path, stat_buf); } + if (dirfd >= FD_FILE_BASE && dirfd < FD_DIR_BASE) { + if (path[0] == 0 && flags & AT_EMPTY_PATH) + /* same as fstat for a file. May need further work to handle flags */ + return new_fxstat(ver, dirfd, stat_buf); + else if (path[0] == 0) + error = ENOENT; + else + error = ENOTDIR; + goto out_err; + } + idx_dfs = check_path_with_dirfd(dirfd, &full_path, path, &error); if (error) goto out_err; @@ -2540,6 +2667,17 @@ new_fstatat(int dirfd, const char *__restrict path, struct stat *__restrict stat return new_xstat(1, path, stat_buf); } + if (dirfd >= FD_FILE_BASE && dirfd < FD_DIR_BASE) { + if (path[0] == 0 && flags & AT_EMPTY_PATH) + /* same as fstat for a file. May need further work to handle flags */ + return fstat(dirfd, stat_buf); + else if (path[0] == 0) + error = ENOENT; + else + error = ENOTDIR; + goto out_err; + } + idx_dfs = check_path_with_dirfd(dirfd, &full_path, path, &error); if (error) goto out_err; @@ -3955,6 +4093,8 @@ getcwd(char *buf, size_t size) if (buf == NULL) { size_t len; + if (size == 0) + size = PATH_MAX; len = strnlen(cur_dir, size); if (len >= size) { errno = ERANGE; @@ -4075,14 +4215,12 @@ faccessat(int dirfd, const char *path, int mode, int flags) int chdir(const char *path) { - int is_target_path, rc, len_str, errno_save; + int is_target_path, rc, len_str; dfs_obj_t *parent; - struct stat stat_buf; struct dfs_mt *dfs_mt; char item_name[DFS_MAX_NAME]; - char *parent_dir = NULL; - char *full_path = NULL; - bool is_root; + char *parent_dir = NULL; + char *full_path = NULL; if (next_chdir == NULL) { next_chdir = dlsym(RTLD_NEXT, "chdir"); @@ -4095,36 +4233,22 @@ chdir(const char *path) &full_path, &dfs_mt); if (rc) D_GOTO(out_err, rc); - if (!is_target_path) { - FREE(parent_dir); - rc = next_chdir(path); - errno_save = errno; - if (rc == 0) - update_cwd(); - errno = errno_save; - return rc; - } - if (!parent && (strncmp(item_name, "/", 2) == 0)) { - is_root = true; - rc = dfs_stat(dfs_mt->dfs, NULL, NULL, &stat_buf); - } else { - is_root = false; - rc = dfs_stat(dfs_mt->dfs, parent, item_name, &stat_buf); - } + rc = next_chdir(path); if (rc) - D_GOTO(out_err, rc); - if (!S_ISDIR(stat_buf.st_mode)) { - D_DEBUG(DB_ANY, "%s is not a directory: %d (%s)\n", path, ENOTDIR, - strerror(ENOTDIR)); - D_GOTO(out_err, rc = ENOTDIR); + D_GOTO(out_err, rc = errno); + + if (!is_target_path) { + strncpy(cur_dir, full_path, DFS_MAX_PATH); + if (cur_dir[DFS_MAX_PATH - 1] != 0) { + D_DEBUG(DB_ANY, "path is too long: %d (%s)\n", ENAMETOOLONG, + strerror(ENAMETOOLONG)); + D_GOTO(out_err, rc = ENAMETOOLONG); + } + D_GOTO(out, rc); } - if (is_root) - rc = dfs_access(dfs_mt->dfs, NULL, NULL, X_OK); - else - rc = dfs_access(dfs_mt->dfs, parent, item_name, X_OK); - if (rc) - D_GOTO(out_err, rc); + + /* assuming the path exists and it is backed by dfuse */ len_str = snprintf(cur_dir, DFS_MAX_PATH, "%s%s", dfs_mt->fs_root, full_path); if (len_str >= DFS_MAX_PATH) { D_DEBUG(DB_ANY, "path is too long: %d (%s)\n", ENAMETOOLONG, @@ -4132,6 +4256,7 @@ chdir(const char *path) D_GOTO(out_err, rc = ENAMETOOLONG); } +out: FREE(parent_dir); return 0; @@ -4144,6 +4269,7 @@ chdir(const char *path) int fchdir(int dirfd) { + int rc; char *pt_end = NULL; if (next_fchdir == NULL) { @@ -4156,6 +4282,15 @@ fchdir(int dirfd) if (dirfd < FD_DIR_BASE) return next_fchdir(dirfd); + /* assume dfuse is running. call chdir() to update cwd. */ + if (next_chdir == NULL) { + next_chdir = dlsym(RTLD_NEXT, "chdir"); + D_ASSERT(next_chdir != NULL); + } + rc = next_chdir(dir_list[dirfd - FD_DIR_BASE]->path); + if (rc) + return rc; + pt_end = stpncpy(cur_dir, dir_list[dirfd - FD_DIR_BASE]->path, DFS_MAX_PATH - 1); if ((long int)(pt_end - cur_dir) >= DFS_MAX_PATH - 1) { D_DEBUG(DB_ANY, "path is too long: %d (%s)\n", ENAMETOOLONG, @@ -4979,7 +5114,7 @@ dup(int oldfd) int dup2(int oldfd, int newfd) { - int fd, fd_directed, idx, rc, errno_save; + int fd, oldfd_directed, newfd_directed, fd_directed, idx, rc, errno_save; /* Need more work later. */ if (next_dup2 == NULL) { @@ -4995,16 +5130,27 @@ dup2(int oldfd, int newfd) else return newfd; } - if ((oldfd < FD_FILE_BASE) && (newfd < FD_FILE_BASE)) + oldfd_directed = query_fd_forward_dest(oldfd); + newfd_directed = query_fd_forward_dest(newfd); + if ((oldfd_directed < FD_FILE_BASE) && (oldfd < FD_FILE_BASE) && + (newfd_directed < FD_FILE_BASE) && (newfd < FD_FILE_BASE)) return next_dup2(oldfd, newfd); + if (oldfd_directed >= FD_FILE_BASE && oldfd < FD_FILE_BASE) + oldfd = oldfd_directed; + if (newfd >= FD_FILE_BASE) { DS_ERROR(ENOTSUP, "unimplemented yet for newfd >= FD_FILE_BASE"); errno = ENOTSUP; return -1; } fd_directed = query_fd_forward_dest(newfd); - if (fd_directed >= FD_FILE_BASE) { + if (fd_directed >= FD_FILE_BASE && newfd < FD_FILE_BASE && oldfd_directed < FD_FILE_BASE && + oldfd < FD_FILE_BASE) { + /* need to remove newfd from forward list and decrease refcount in file_list[] */ + close_dup_fd(libc_close, newfd, false); + return next_dup2(oldfd, newfd); + } else if (fd_directed >= FD_FILE_BASE) { DS_ERROR(ENOTSUP, "unimplemented yet for fd_directed >= FD_FILE_BASE"); errno = ENOTSUP; return -1; @@ -5015,13 +5161,22 @@ dup2(int oldfd, int newfd) else fd_directed = query_fd_forward_dest(oldfd); if (fd_directed >= FD_FILE_BASE) { - rc = close(newfd); - if (rc != 0 && errno != EBADF) - return -1; - fd = allocate_a_fd_from_kernel(); + int fd_tmp; + + fd_tmp = allocate_a_fd_from_kernel(); + if (fd_tmp < 0) { + /* failed to allocate an fd from kernel */ + errno_save = errno; + DS_ERROR(errno_save, "failed to get a fd from kernel"); + errno = errno_save; + return (-1); + } + /* rely on dup2() to get the desired fd */ + fd = next_dup2(fd_tmp, newfd); if (fd < 0) { /* failed to allocate an fd from kernel */ errno_save = errno; + close(fd_tmp); DS_ERROR(errno_save, "failed to get a fd from kernel"); errno = errno_save; return (-1); @@ -5031,6 +5186,9 @@ dup2(int oldfd, int newfd) errno = EBUSY; return (-1); } + rc = libc_close(fd_tmp); + if (rc != 0) + return -1; idx = allocate_dup2ed_fd(fd, fd_directed); if (idx >= 0) return fd; @@ -5515,11 +5673,12 @@ init_myhook(void) else daos_debug_inited = true; - env_log = getenv("D_IL_REPORT"); + d_agetenv_str(&env_log, "D_IL_REPORT"); if (env_log) { report = true; if (strncmp(env_log, "0", 2) == 0 || strncasecmp(env_log, "false", 6) == 0) report = false; + d_freeenv_str(&env_log); } /* Find dfuse mounts from /proc/mounts */ @@ -5542,6 +5701,10 @@ init_myhook(void) } update_cwd(); + rc = D_MUTEX_INIT(&lock_reserve_fd, NULL); + if (rc) + return; + rc = D_MUTEX_INIT(&lock_dfs, NULL); if (rc) return; @@ -5673,7 +5836,7 @@ close_all_fd(void) for (i = 0; i <= last_fd; i++) { if (file_list[i]) - free_fd(i); + free_fd(i, false); } } @@ -5723,6 +5886,7 @@ finalize_myhook(void) finalize_dfs(); D_MUTEX_DESTROY(&lock_eqh); + D_MUTEX_DESTROY(&lock_reserve_fd); D_MUTEX_DESTROY(&lock_dfs); D_MUTEX_DESTROY(&lock_dirfd); D_MUTEX_DESTROY(&lock_fd); @@ -5804,6 +5968,8 @@ finalize_dfs(void) for (i = 0; i < num_dfs; i++) { if (dfs_list[i].dfs_dir_hash == NULL) { D_FREE(dfs_list[i].fs_root); + D_FREE(dfs_list[i].pool); + D_FREE(dfs_list[i].cont); continue; } @@ -5835,11 +6001,14 @@ finalize_dfs(void) continue; } D_FREE(dfs_list[i].fs_root); + D_FREE(dfs_list[i].pool); + D_FREE(dfs_list[i].cont); } - if (daos_inited) { + if (atomic_load_relaxed(&daos_inited)) { uint32_t init_cnt, j; + free_reserved_low_fd(); init_cnt = atomic_load_relaxed(&daos_init_cnt); for (j = 0; j < init_cnt; j++) { rc = daos_fini(); diff --git a/src/client/pydaos/pydaos_shim.c b/src/client/pydaos/pydaos_shim.c index 5349b08d03a..1100ec7a368 100644 --- a/src/client/pydaos/pydaos_shim.c +++ b/src/client/pydaos/pydaos_shim.c @@ -106,7 +106,7 @@ __shim_handle__daos_init(PyObject *self, PyObject *args) rc = daos_init(); if ((rc == 0) && (use_glob_eq == 0)) { - override = getenv("PYDAOS_GLOB_EQ"); + d_agetenv_str(&override, "PYDAOS_GLOB_EQ"); if ((override == NULL) || strcmp(override, "0")) { use_glob_eq = 1; ret = daos_eq_create(&glob_eq); @@ -115,6 +115,7 @@ __shim_handle__daos_init(PyObject *self, PyObject *args) use_glob_eq = 0; } } + d_freeenv_str(&override); } return PyInt_FromLong(rc); diff --git a/src/common/debug.c b/src/common/debug.c index 5096e3ec92c..b586f0e50d3 100644 --- a/src/common/debug.c +++ b/src/common/debug.c @@ -104,14 +104,16 @@ unsigned int daos_io_bypass; static void io_bypass_init(void) { - char *str = getenv(DENV_IO_BYPASS); - char *tok; - char *saved_ptr; + char *str; + char *tok; + char *saved_ptr; + char *env; - if (!str) + d_agetenv_str(&env, DENV_IO_BYPASS); + if (env == NULL) return; - tok = strtok_r(str, ",", &saved_ptr); + tok = strtok_r(env, ",", &saved_ptr); while (tok) { struct io_bypass *iob; @@ -129,6 +131,7 @@ io_bypass_init(void) } tok = str; }; + d_freeenv_str(&env); } void @@ -162,17 +165,18 @@ daos_debug_init_ex(char *logfile, d_dbug_t logmask) } /* honor the env variable first */ - logfile = getenv(D_LOG_FILE_ENV); + rc = d_agetenv_str(&logfile, D_LOG_FILE_ENV); if (logfile == NULL || strlen(logfile) == 0) { flags |= DLOG_FLV_STDOUT; - logfile = NULL; + d_freeenv_str(&logfile); } else if (!strncmp(logfile, "/dev/null", 9)) { /* Don't set up logging or log to stdout if the log file is /dev/null */ - logfile = NULL; + d_freeenv_str(&logfile); } rc = d_log_init_adv("DAOS", logfile, flags, logmask, DLOG_CRIT, log_id_cb); + d_freeenv_str(&logfile); if (rc != 0) { D_PRINT_ERR("Failed to init DAOS debug log: "DF_RC"\n", DP_RC(rc)); diff --git a/src/common/misc.c b/src/common/misc.c index f7d6b1ddad0..afd8ed8fa38 100644 --- a/src/common/misc.c +++ b/src/common/misc.c @@ -684,13 +684,15 @@ daos_crt_init_opt_get(bool server, int ctx_nr) * 1) now sockets provider cannot create more than 16 contexts for SEP * 2) some problems if SEP communicates with regular EP. */ - addr_env = (crt_phy_addr_t)getenv(CRT_PHY_ADDR_ENV); + d_agetenv_str(&addr_env, CRT_PHY_ADDR_ENV); if (addr_env != NULL && strncmp(addr_env, CRT_SOCKET_PROV, strlen(CRT_SOCKET_PROV)) == 0) { D_INFO("for sockets provider force it to use regular EP.\n"); daos_crt_init_opt.cio_use_sep = 0; + d_freeenv_str(&addr_env); goto out; } + d_freeenv_str(&addr_env); daos_crt_init_opt.cio_use_sep = 1; diff --git a/src/control/server/ctl_storage_rpc.go b/src/control/server/ctl_storage_rpc.go index 178304b8eb2..3b6941ebbc4 100644 --- a/src/control/server/ctl_storage_rpc.go +++ b/src/control/server/ctl_storage_rpc.go @@ -114,7 +114,7 @@ func bdevScanToProtoResp(scan scanBdevsFn, bdevCfgs storage.TierConfigs) (*ctlpb } if bdevCfgs.HaveRealNVMe() { - // Update proto Ctrlrs with role info for offline display. + // Update proto Ctrlrs with role info and normal (DAOS) state for off-line display. for _, c := range pbCtrlrs { pciAddrStr, err := ctrlrToPciStr(c) if err != nil { @@ -133,6 +133,7 @@ func bdevScanToProtoResp(scan scanBdevsFn, bdevCfgs storage.TierConfigs) (*ctlpb RoleBits: uint32(bc.Bdev.DeviceRoles.OptionBits), Rank: uint32(ranklist.NilRank), }) + c.DevState = ctlpb.NvmeDevState_NORMAL } } diff --git a/src/control/server/ctl_storage_rpc_test.go b/src/control/server/ctl_storage_rpc_test.go index 56142173835..2dfc97cb310 100644 --- a/src/control/server/ctl_storage_rpc_test.go +++ b/src/control/server/ctl_storage_rpc_test.go @@ -482,7 +482,8 @@ func TestServer_bdevScan(t *testing.T) { Ctrlrs: proto.NvmeControllers{ func() *ctlpb.NvmeController { nc := &ctlpb.NvmeController{ - PciAddr: "050505:01:00.0", + PciAddr: "050505:01:00.0", + DevState: ctlpb.NvmeDevState_NORMAL, } nc.SmdDevices = []*ctlpb.SmdDevice{ {Rank: uint32(ranklist.NilRank)}, @@ -491,7 +492,8 @@ func TestServer_bdevScan(t *testing.T) { }(), func() *ctlpb.NvmeController { nc := &ctlpb.NvmeController{ - PciAddr: "050505:03:00.0", + PciAddr: "050505:03:00.0", + DevState: ctlpb.NvmeDevState_NORMAL, } nc.SmdDevices = []*ctlpb.SmdDevice{ {Rank: uint32(ranklist.NilRank)}, diff --git a/src/control/server/instance_storage_rpc.go b/src/control/server/instance_storage_rpc.go index 2634d3768b9..0b5ab937a39 100644 --- a/src/control/server/instance_storage_rpc.go +++ b/src/control/server/instance_storage_rpc.go @@ -9,6 +9,7 @@ package server import ( "context" "fmt" + "sort" "time" "github.com/pkg/errors" @@ -202,10 +203,6 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc return nil, errors.Errorf("smd %q has no ctrlr ref", sd.Uuid) } - if !sd.Ctrlr.IsScannable() { - engine.Debugf("smd %q skip ctrlr %+v with bad state", sd.Uuid, sd.Ctrlr) - continue - } addr := sd.Ctrlr.PciAddr if _, exists := seenCtrlrs[addr]; !exists { @@ -230,6 +227,12 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc Rank: engineRank.Uint32(), } + if !sd.Ctrlr.IsScannable() { + engine.Debugf("smd %q partial update of ctrlr %+v with bad state", + sd.Uuid, sd.Ctrlr) + continue + } + // Populate health if requested. healthUpdated := false if pbReq.Health && c.HealthStats == nil { @@ -265,8 +268,14 @@ func scanEngineBdevsOverDrpc(ctx context.Context, engine Engine, pbReq *ctlpb.Sc c.SmdDevices = append(c.SmdDevices, nsd) } - for _, c := range seenCtrlrs { - engine.Tracef("nvme ssd scanned: %+v", c) + var keys []string + for k := range seenCtrlrs { + keys = append(keys, k) + } + sort.Strings(keys) + for _, k := range keys { + c := seenCtrlrs[k] + engine.Tracef("bdev discovered: %+v", c) pbResp.Ctrlrs = append(pbResp.Ctrlrs, c) } @@ -334,36 +343,45 @@ func bdevScanEngine(ctx context.Context, engine Engine, req *ctlpb.ScanNvmeReq) return nil, err } - nrScannedBdevs, err := getEffCtrlrCount(resp.Ctrlrs) + // Compare number of VMD domain addresses rather than the number of backing devices found + // behind it as the domain is what is specified in the server config file. + nrBdevs, err := getEffCtrlrCount(resp.Ctrlrs) if err != nil { return nil, err } - if nrScannedBdevs == nrCfgBdevs { - return resp, nil - } // Retry once if engine provider scan returns unexpected number of controllers in case // engines claimed devices between when started state was checked and scan was executed. - if !isStarted { + if nrBdevs != nrCfgBdevs && !isStarted { engine.Debugf("retrying engine bdev scan as unexpected nr returned, want %d got %d", - nrCfgBdevs, nrScannedBdevs) + nrCfgBdevs, nrBdevs) resp, err = bdevScanEngineAssigned(ctx, engine, req, bdevCfgs, &isStarted) if err != nil { return nil, err } - nrScannedBdevs, err := getEffCtrlrCount(resp.Ctrlrs) + nrBdevs, err = getEffCtrlrCount(resp.Ctrlrs) if err != nil { return nil, err } - if nrScannedBdevs == nrCfgBdevs { - return resp, nil - } } - engine.Debugf("engine bdev scan returned unexpected nr, want %d got %d", nrCfgBdevs, - nrScannedBdevs) + if nrBdevs != nrCfgBdevs { + engine.Debugf("engine bdev scan returned unexpected nr, want %d got %d", + nrCfgBdevs, nrBdevs) + } + + // Filter devices in an unusable state from the response. + outCtrlrs := make([]*ctlpb.NvmeController, 0, len(resp.Ctrlrs)) + for _, c := range resp.Ctrlrs { + if c.IsScannable() { + outCtrlrs = append(outCtrlrs, c) + } else { + engine.Tracef("excluding bdev from scan results: %+v", c) + } + } + resp.Ctrlrs = outCtrlrs return resp, nil } diff --git a/src/control/server/instance_storage_rpc_test.go b/src/control/server/instance_storage_rpc_test.go index 8af143ec2fb..6a15b247e60 100644 --- a/src/control/server/instance_storage_rpc_test.go +++ b/src/control/server/instance_storage_rpc_test.go @@ -30,6 +30,8 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { withState := func(ctrlr *ctlpb.NvmeController, state ctlpb.NvmeDevState) *ctlpb.NvmeController { ctrlr.DevState = state ctrlr.HealthStats = nil + // scanEngineBdevsOverDrpc will always populate RoleBits in ctrlr.SmdDevices + ctrlr.SmdDevices = []*ctlpb.SmdDevice{{RoleBits: 7}} return ctrlr } withDevState := func(smd *ctlpb.SmdDevice, state ctlpb.NvmeDevState) *ctlpb.SmdDevice { @@ -137,15 +139,22 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { engStopped: true, provRes: &storage.BdevScanResponse{ Controllers: storage.NvmeControllers{ - &storage.NvmeController{PciAddr: "050505:01:00.0"}, - &storage.NvmeController{PciAddr: "050505:03:00.0"}, + &storage.NvmeController{ + PciAddr: "050505:01:00.0", + NvmeState: storage.NvmeStateNormal, + }, + &storage.NvmeController{ + PciAddr: "050505:03:00.0", + NvmeState: storage.NvmeStateNormal, + }, }, }, expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ func() *ctlpb.NvmeController { nc := &ctlpb.NvmeController{ - PciAddr: "050505:01:00.0", + PciAddr: "050505:01:00.0", + DevState: ctlpb.NvmeDevState_NORMAL, } nc.SmdDevices = []*ctlpb.SmdDevice{ {Rank: uint32(ranklist.NilRank)}, @@ -154,7 +163,8 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { }(), func() *ctlpb.NvmeController { nc := &ctlpb.NvmeController{ - PciAddr: "050505:03:00.0", + PciAddr: "050505:03:00.0", + DevState: ctlpb.NvmeDevState_NORMAL, } nc.SmdDevices = []*ctlpb.SmdDevice{ {Rank: uint32(ranklist.NilRank)}, @@ -275,26 +285,39 @@ func TestIOEngineInstance_bdevScanEngine(t *testing.T) { }, "scan over drpc; only ctrlrs with valid states shown": { req: ctlpb.ScanNvmeReq{}, + bdevAddrs: []string{ + test.MockPCIAddr(1), test.MockPCIAddr(2), + test.MockPCIAddr(1), test.MockPCIAddr(2), + test.MockPCIAddr(5), + }, smdRes: &ctlpb.SmdDevResp{ Devices: proto.SmdDevices{ - withDevState(proto.MockSmdDevice(storage.MockNvmeController(1), 1), + withDevState(proto.MockSmdDevice( + storage.MockNvmeController(1), 1), ctlpb.NvmeDevState_UNPLUGGED), - withDevState(proto.MockSmdDevice(storage.MockNvmeController(2), 2), + withDevState(proto.MockSmdDevice( + storage.MockNvmeController(2), 2), ctlpb.NvmeDevState_UNKNOWN), - withDevState(proto.MockSmdDevice(storage.MockNvmeController(3), 3), + withDevState(proto.MockSmdDevice( + storage.MockNvmeController(3), 3), ctlpb.NvmeDevState_NORMAL), - withDevState(proto.MockSmdDevice(storage.MockNvmeController(4), 4), + withDevState(proto.MockSmdDevice( + storage.MockNvmeController(4), 4), ctlpb.NvmeDevState_NEW), - withDevState(proto.MockSmdDevice(storage.MockNvmeController(5), 5), + withDevState(proto.MockSmdDevice( + storage.MockNvmeController(5), 5), ctlpb.NvmeDevState_EVICTED), }, }, healthRes: healthRespWithUsage(), expResp: &ctlpb.ScanNvmeResp{ Ctrlrs: proto.NvmeControllers{ - withState(proto.MockNvmeController(3), ctlpb.NvmeDevState_NORMAL), - withState(proto.MockNvmeController(4), ctlpb.NvmeDevState_NEW), - withState(proto.MockNvmeController(5), ctlpb.NvmeDevState_EVICTED), + withState(proto.MockNvmeController(3), + ctlpb.NvmeDevState_NORMAL), + withState(proto.MockNvmeController(4), + ctlpb.NvmeDevState_NEW), + withState(proto.MockNvmeController(5), + ctlpb.NvmeDevState_EVICTED), }, State: new(ctlpb.ResponseState), }, diff --git a/src/engine/init.c b/src/engine/init.c index 23379878700..0f705e5366b 100644 --- a/src/engine/init.c +++ b/src/engine/init.c @@ -432,14 +432,15 @@ dss_init_state_set(enum dss_init_state state) static int abt_max_num_xstreams(void) { - char *env; + unsigned num_xstreams = 0; - env = getenv("ABT_MAX_NUM_XSTREAMS"); - if (env == NULL) - env = getenv("ABT_ENV_MAX_NUM_XSTREAMS"); - if (env != NULL) - return atoi(env); - return 0; + if (d_isenv_def("ABT_MAX_NUM_XSTREAMS")) + d_getenv_uint("ABT_MAX_NUM_XSTREAMS", &num_xstreams); + else + d_getenv_uint("ABT_ENV_MAX_NUM_XSTREAMS", &num_xstreams); + D_ASSERT(num_xstreams <= INT_MAX); + + return num_xstreams; } static int diff --git a/src/engine/srv.c b/src/engine/srv.c index df0733ed638..0afc13861d1 100644 --- a/src/engine/srv.c +++ b/src/engine/srv.c @@ -1048,13 +1048,14 @@ dss_xstreams_init(void) sched_relax_intvl); } - env = getenv("DAOS_SCHED_RELAX_MODE"); + d_agetenv_str(&env, "DAOS_SCHED_RELAX_MODE"); if (env) { sched_relax_mode = sched_relax_str2mode(env); if (sched_relax_mode == SCHED_RELAX_MODE_INVALID) { D_WARN("Invalid relax mode [%s]\n", env); sched_relax_mode = SCHED_RELAX_MODE_NET; } + d_freeenv_str(&env); } D_INFO("CPU relax mode is set to [%s]\n", sched_relax_mode2str(sched_relax_mode)); diff --git a/src/gurt/debug.c b/src/gurt/debug.c index 4fb112b7dcc..bc672c03067 100644 --- a/src/gurt/debug.c +++ b/src/gurt/debug.c @@ -380,7 +380,7 @@ debug_prio_err_load_env(void) char *env; int i; - env = getenv(DD_STDERR_ENV); + d_agetenv_str(&env, DD_STDERR_ENV); if (env == NULL) return; @@ -395,6 +395,7 @@ debug_prio_err_load_env(void) /* invalid DD_STDERR option */ if (d_dbglog_data.dd_prio_err == 0) D_PRINT_ERR("DD_STDERR = %s - invalid option\n", env); + d_freeenv_str(&env); } void @@ -415,7 +416,16 @@ d_log_sync_mask_ex(const char *log_mask, const char *dd_mask) void d_log_sync_mask(void) { - d_log_sync_mask_ex(getenv(D_LOG_MASK_ENV), getenv(DD_MASK_ENV)); + char *log_mask; + char *dd_mask; + + d_agetenv_str(&log_mask, D_LOG_MASK_ENV); + d_agetenv_str(&dd_mask, DD_MASK_ENV); + + d_log_sync_mask_ex(log_mask, dd_mask); + + d_freeenv_str(&dd_mask); + d_freeenv_str(&log_mask); } /** @@ -540,14 +550,15 @@ d_log_init(void) int flags = DLOG_FLV_LOGPID | DLOG_FLV_FAC | DLOG_FLV_TAG; int rc; - log_file = getenv(D_LOG_FILE_ENV); + d_agetenv_str(&log_file, D_LOG_FILE_ENV); if (log_file == NULL || strlen(log_file) == 0) { flags |= DLOG_FLV_STDOUT; - log_file = NULL; + d_freeenv_str(&log_file); } rc = d_log_init_adv("CaRT", log_file, flags, DLOG_WARN, DLOG_EMERG, NULL); + d_freeenv_str(&log_file); if (rc != DER_SUCCESS) { D_PRINT_ERR("d_log_init_adv failed, rc: %d.\n", rc); D_GOTO(out, rc); diff --git a/src/gurt/dlog.c b/src/gurt/dlog.c index 7cbce2fa7b6..2f1324463c0 100644 --- a/src/gurt/dlog.c +++ b/src/gurt/dlog.c @@ -847,20 +847,20 @@ d_log_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, if (pri != -1) mst.flush_pri = pri; - d_free_env_str(&env); + d_freeenv_str(&env); } d_agetenv_str(&env, D_LOG_TRUNCATE_ENV); if (env != NULL && atoi(env) > 0) truncate = 1; - d_free_env_str(&env); + d_freeenv_str(&env); d_agetenv_str(&env, D_LOG_SIZE_ENV); if (env != NULL) { log_size = d_getenv_size(env); if (log_size < LOG_SIZE_MIN) log_size = LOG_SIZE_MIN; - d_free_env_str(&env); + d_freeenv_str(&env); } d_agetenv_str(&env, D_LOG_FILE_APPEND_PID_ENV); @@ -875,12 +875,12 @@ d_log_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, "continuing.\n"); } } - d_free_env_str(&env); + d_freeenv_str(&env); d_agetenv_str(&env, D_LOG_FILE_APPEND_RANK_ENV); if (env && strcmp(env, "0") != 0) mst.append_rank = true; - d_free_env_str(&env); + d_freeenv_str(&env); /* quick sanity check (mst.tag is non-null if already open) */ if (d_log_xst.tag || !tag || @@ -918,7 +918,7 @@ d_log_open(char *tag, int maxfac_hint, int default_mask, int stderr_mask, d_agetenv_str(&env, D_LOG_STDERR_IN_LOG_ENV); if (env != NULL && atoi(env) > 0) merge_stderr = true; - d_free_env_str(&env); + d_freeenv_str(&env); if (!truncate) log_flags |= O_APPEND; @@ -1107,7 +1107,7 @@ bool d_logfac_is_enabled(const char *fac_name) rc = true; out: - d_free_env_str(&ddsubsys_env); + d_freeenv_str(&ddsubsys_env); return rc; } diff --git a/src/gurt/fault_inject.c b/src/gurt/fault_inject.c index 4ffbd2c40e4..f5225bab3dd 100644 --- a/src/gurt/fault_inject.c +++ b/src/gurt/fault_inject.c @@ -616,7 +616,7 @@ d_fault_inject_init(void) out: if (fp) fclose(fp); - d_free_env_str(&config_file); + d_freeenv_str(&config_file); return rc; } diff --git a/src/gurt/misc.c b/src/gurt/misc.c index 3b287ca73ff..d92055a905d 100644 --- a/src/gurt/misc.c +++ b/src/gurt/misc.c @@ -1120,7 +1120,7 @@ d_agetenv_str(char **str_val, const char *name) * \param[in,out] str_val Copy of an environment string value. */ void -d_free_env_str(char **str_val) +d_freeenv_str(char **str_val) { assert(str_val != NULL); diff --git a/src/gurt/tests/test_gurt.c b/src/gurt/tests/test_gurt.c index ebb9a0ec701..e9f8f435486 100644 --- a/src/gurt/tests/test_gurt.c +++ b/src/gurt/tests/test_gurt.c @@ -2140,7 +2140,7 @@ test_d_agetenv_str(void **state) assert_int_equal(rc, -DER_SUCCESS); assert_non_null(env); assert_string_equal(env, "bar"); - d_free_env_str(&env); + d_freeenv_str(&env); assert_null(env); getenv_return = ""; @@ -2148,7 +2148,7 @@ test_d_agetenv_str(void **state) assert_int_equal(rc, -DER_SUCCESS); assert_non_null(env); assert_string_equal(env, ""); - d_free_env_str(&env); + d_freeenv_str(&env); assert_null(env); getenv_return = NULL; diff --git a/src/include/gurt/common.h b/src/include/gurt/common.h index 1cf40fc3292..164421174a0 100644 --- a/src/include/gurt/common.h +++ b/src/include/gurt/common.h @@ -581,7 +581,7 @@ d_getenv_str(char *str_val, size_t str_size, const char *name); int d_agetenv_str(char **str_val, const char *name); void -d_free_env_str(char **str_val); +d_freeenv_str(char **str_val); int d_getenv_bool(const char *name, bool *bool_val); int diff --git a/src/mgmt/cli_mgmt.c b/src/mgmt/cli_mgmt.c index 57cf0faa723..78db4c699e4 100644 --- a/src/mgmt/cli_mgmt.c +++ b/src/mgmt/cli_mgmt.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2023 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -449,13 +449,16 @@ _split_env(char *env, char **name, char **value) */ int dc_mgmt_net_cfg(const char *name) { - int rc; - char buf[SYS_INFO_BUF_SIZE]; - char *crt_timeout; - char *ofi_interface; - char *ofi_domain; - char *cli_srx_set; - struct dc_mgmt_sys_info info; + int rc; + char *crt_phy_addr_str; + char *crt_ctx_share_addr = NULL; + char *cli_srx_set = NULL; + char *crt_timeout = NULL; + char *ofi_interface; + char *ofi_interface_env = NULL; + char *ofi_domain = ""; + char *ofi_domain_env = NULL; + struct dc_mgmt_sys_info info; Mgmt__GetAttachInfoResp *resp; /* Query the agent for the CaRT network configuration parameters */ @@ -491,26 +494,34 @@ int dc_mgmt_net_cfg(const char *name) g_num_serv_ranks = resp->n_rank_uris; D_INFO("Setting number of server ranks to %d\n", g_num_serv_ranks); /* These two are always set */ - rc = d_setenv("CRT_PHY_ADDR_STR", info.provider, 1); + crt_phy_addr_str = info.provider; + rc = d_setenv("CRT_PHY_ADDR_STR", crt_phy_addr_str, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); - sprintf(buf, "%d", info.crt_ctx_share_addr); - rc = d_setenv("CRT_CTX_SHARE_ADDR", buf, 1); + rc = asprintf(&crt_ctx_share_addr, "%d", info.crt_ctx_share_addr); + if (rc < 0) { + crt_ctx_share_addr = NULL; + D_GOTO(cleanup, rc = -DER_NOMEM); + } + rc = d_setenv("CRT_CTX_SHARE_ADDR", crt_ctx_share_addr, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); /* If the server has set this, the client must use the same value. */ if (info.srv_srx_set != -1) { - sprintf(buf, "%d", info.srv_srx_set); - rc = d_setenv("FI_OFI_RXM_USE_SRX", buf, 1); + rc = asprintf(&cli_srx_set, "%d", info.srv_srx_set); + if (rc < 0) { + cli_srx_set = NULL; + D_GOTO(cleanup, rc = -DER_NOMEM); + } + rc = d_setenv("FI_OFI_RXM_USE_SRX", cli_srx_set, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); - D_INFO("Using server's value for FI_OFI_RXM_USE_SRX: %s\n", - buf); + D_INFO("Using server's value for FI_OFI_RXM_USE_SRX: %s\n", cli_srx_set); } else { /* Client may not set it if the server hasn't. */ - cli_srx_set = getenv("FI_OFI_RXM_USE_SRX"); + d_agetenv_str(&cli_srx_set, "FI_OFI_RXM_USE_SRX"); if (cli_srx_set) { D_ERROR("Client set FI_OFI_RXM_USE_SRX to %s, " "but server is unset!\n", cli_srx_set); @@ -519,21 +530,26 @@ int dc_mgmt_net_cfg(const char *name) } /* Allow client env overrides for these three */ - crt_timeout = getenv("CRT_TIMEOUT"); + d_agetenv_str(&crt_timeout, "CRT_TIMEOUT"); if (!crt_timeout) { - sprintf(buf, "%d", info.crt_timeout); - rc = d_setenv("CRT_TIMEOUT", buf, 1); + rc = asprintf(&crt_timeout, "%d", info.crt_timeout); + if (rc < 0) { + crt_timeout = NULL; + D_GOTO(cleanup, rc = -DER_NOMEM); + } + D_INFO("setenv CRT_TIMEOUT=%s\n", crt_timeout); + rc = d_setenv("CRT_TIMEOUT", crt_timeout, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); } else { - D_INFO("Using client provided CRT_TIMEOUT: %s\n", - crt_timeout); + D_DEBUG(DB_MGMT, "Using client provided CRT_TIMEOUT: %s\n", crt_timeout); } - ofi_interface = getenv("OFI_INTERFACE"); - ofi_domain = getenv("OFI_DOMAIN"); - if (!ofi_interface) { - rc = d_setenv("OFI_INTERFACE", info.interface, 1); + d_agetenv_str(&ofi_interface_env, "OFI_INTERFACE"); + d_agetenv_str(&ofi_domain_env, "OFI_DOMAIN"); + if (!ofi_interface_env) { + ofi_interface = info.interface; + rc = d_setenv("OFI_INTERFACE", ofi_interface, 1); if (rc != 0) D_GOTO(cleanup, rc = d_errno2der(errno)); @@ -541,31 +557,39 @@ int dc_mgmt_net_cfg(const char *name) * If we use the agent as the source, client env shouldn't be allowed to override * the domain. Otherwise we could get a mismatch between interface and domain. */ - if (ofi_domain) + ofi_domain = info.domain; + if (ofi_domain_env) D_WARN("Ignoring OFI_DOMAIN '%s' because OFI_INTERFACE is not set; using " "automatic configuration instead\n", ofi_domain); - rc = d_setenv("OFI_DOMAIN", info.domain, 1); - if (rc != 0) + rc = d_setenv("OFI_DOMAIN", ofi_domain, 1); + if (rc != 0) { D_GOTO(cleanup, rc = d_errno2der(errno)); + } } else { + ofi_interface = ofi_interface_env; D_INFO("Using client provided OFI_INTERFACE: %s\n", ofi_interface); /* If the client env didn't provide a domain, we can assume we don't need one. */ - if (ofi_domain) + if (ofi_domain_env) { + ofi_domain = ofi_domain_env; D_INFO("Using client provided OFI_DOMAIN: %s\n", ofi_domain); + } } - D_INFO("Network interface: %s, Domain: %s\n", getenv("OFI_INTERFACE"), - getenv("OFI_DOMAIN")); + D_INFO("Network interface: %s, Domain: %s\n", ofi_interface, ofi_domain); D_DEBUG(DB_MGMT, "CaRT initialization with:\n" "\tCRT_PHY_ADDR_STR: %s, " "CRT_CTX_SHARE_ADDR: %s, CRT_TIMEOUT: %s\n", - getenv("CRT_PHY_ADDR_STR"), - getenv("CRT_CTX_SHARE_ADDR"), getenv("CRT_TIMEOUT")); + crt_phy_addr_str, crt_ctx_share_addr, crt_timeout); cleanup: + d_freeenv_str(&ofi_domain_env); + d_freeenv_str(&ofi_interface_env); + d_freeenv_str(&crt_timeout); + d_freeenv_str(&cli_srx_set); + d_freeenv_str(&crt_ctx_share_addr); put_attach_info(&info, resp); return rc; @@ -585,14 +609,16 @@ int dc_mgmt_net_cfg_check(const char *name) /* Client may not set it if the server hasn't. */ if (info.srv_srx_set == -1) { - cli_srx_set = getenv("FI_OFI_RXM_USE_SRX"); + d_agetenv_str(&cli_srx_set, "FI_OFI_RXM_USE_SRX"); if (cli_srx_set) { D_ERROR("Client set FI_OFI_RXM_USE_SRX to %s, " "but server is unset!\n", cli_srx_set); + d_freeenv_str(&cli_srx_set); rc = -DER_INVAL; goto out; } } + rc = 0; out: put_attach_info(&info, resp); diff --git a/src/pool/srv_pool.c b/src/pool/srv_pool.c index 1bf8b82ded4..eee51bfc695 100644 --- a/src/pool/srv_pool.c +++ b/src/pool/srv_pool.c @@ -7147,12 +7147,14 @@ pool_svc_update_map(struct pool_svc *svc, crt_opcode_t opc, bool exclude_rank, D_GOTO(out, rc); } - env = getenv(REBUILD_ENV); + d_agetenv_str(&env, REBUILD_ENV); if ((env && !strcasecmp(env, REBUILD_ENV_DISABLED)) || daos_fail_check(DAOS_REBUILD_DISABLE)) { D_DEBUG(DB_TRACE, "Rebuild is disabled\n"); + d_freeenv_str(&env); D_GOTO(out, rc = 0); } + d_freeenv_str(&env); rc = ds_pool_iv_prop_fetch(svc->ps_pool, &prop); if (rc) diff --git a/src/rsvc/srv.c b/src/rsvc/srv.c index 7e90aa64d2d..97a03f1f013 100644 --- a/src/rsvc/srv.c +++ b/src/rsvc/srv.c @@ -1389,10 +1389,11 @@ ds_rsvc_get_md_cap(void) char *v; int n; - v = getenv(DAOS_MD_CAP_ENV); /* in MB */ + d_agetenv_str(&v, DAOS_MD_CAP_ENV); /* in MB */ if (v == NULL) return size_default; n = atoi(v); + d_freeenv_str(&v); if ((n << 20) < MINIMUM_DAOS_MD_CAP_SIZE) { D_ERROR("metadata capacity too low; using %zu MB\n", size_default >> 20); diff --git a/src/tests/ftest/cart/iv_server.c b/src/tests/ftest/cart/iv_server.c index bdc549cee19..1e823972362 100644 --- a/src/tests/ftest/cart/iv_server.c +++ b/src/tests/ftest/cart/iv_server.c @@ -1241,13 +1241,14 @@ int main(int argc, char **argv) return -1; } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); if (env_self_rank == NULL) { printf("CRT_L_RANK was not set\n"); return -1; } my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); @@ -1274,7 +1275,7 @@ int main(int argc, char **argv) init_work_contexts(); /* Load the group configuration file */ - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + rc = d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); if (grp_cfg_file == NULL) { D_ERROR("CRT_L_GRP_CFG was not set\n"); assert(0); @@ -1288,6 +1289,7 @@ int main(int argc, char **argv) D_ERROR("Failed to load group file %s\n", grp_cfg_file); assert(0); } + d_freeenv_str(&grp_cfg_file); /* Start the server for myself */ DBG_PRINT("Server starting, self_rank=%d\n", my_rank); diff --git a/src/tests/ftest/cart/no_pmix_corpc_errors.c b/src/tests/ftest/cart/no_pmix_corpc_errors.c index be8886e6d22..c69e4d900bd 100644 --- a/src/tests/ftest/cart/no_pmix_corpc_errors.c +++ b/src/tests/ftest/cart/no_pmix_corpc_errors.c @@ -271,8 +271,9 @@ int main(int argc, char **argv) crtu_set_shutdown_delay(2); } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); @@ -326,7 +327,7 @@ int main(int argc, char **argv) } } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); rc = crt_rank_self_set(my_rank, 1 /* group_version_min */); if (rc != 0) { @@ -351,6 +352,7 @@ int main(int argc, char **argv) DBG_PRINT("self_rank=%d uri=%s grp_cfg_file=%s\n", my_rank, my_uri, grp_cfg_file); + d_freeenv_str(&grp_cfg_file); D_FREE(my_uri); rc = crt_group_size(NULL, &grp_size); diff --git a/src/tests/ftest/cart/no_pmix_group_test.c b/src/tests/ftest/cart/no_pmix_group_test.c index ecc6881dc55..7290b478aa5 100644 --- a/src/tests/ftest/cart/no_pmix_group_test.c +++ b/src/tests/ftest/cart/no_pmix_group_test.c @@ -318,8 +318,9 @@ int main(int argc, char **argv) int num_attach_retries = 20; uint32_t primary_grp_version = 1; - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* When under valgrind bump expected timeouts to 60 seconds */ if (D_ON_VALGRIND) { @@ -382,7 +383,7 @@ int main(int argc, char **argv) } } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); rc = crt_rank_self_set(my_rank, primary_grp_version); if (rc != 0) { @@ -407,6 +408,7 @@ int main(int argc, char **argv) DBG_PRINT("self_rank=%d uri=%s grp_cfg_file=%s\n", my_rank, my_uri, grp_cfg_file); + d_freeenv_str(&grp_cfg_file); D_FREE(my_uri); rc = crt_group_size(NULL, &grp_size); diff --git a/src/tests/ftest/cart/no_pmix_group_version.c b/src/tests/ftest/cart/no_pmix_group_version.c index 0f6aa9aedcc..f3fab3dce0c 100644 --- a/src/tests/ftest/cart/no_pmix_group_version.c +++ b/src/tests/ftest/cart/no_pmix_group_version.c @@ -268,8 +268,9 @@ int main(int argc, char **argv) int rc; int num_attach_retries = 20; - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* When under valgrind bump expected timeouts to 60 seconds */ if (D_ON_VALGRIND) { @@ -326,7 +327,7 @@ int main(int argc, char **argv) } } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); rc = crt_rank_self_set(my_rank, 1 /* group_version_min */); if (rc != 0) { @@ -351,6 +352,7 @@ int main(int argc, char **argv) DBG_PRINT("self_rank=%d uri=%s grp_cfg_file=%s\n", my_rank, my_uri, grp_cfg_file); + d_freeenv_str(&grp_cfg_file); D_FREE(my_uri); rc = crt_group_size(NULL, &grp_size); diff --git a/src/tests/ftest/cart/no_pmix_launcher_client.c b/src/tests/ftest/cart/no_pmix_launcher_client.c index 4c913226f77..24999ee51c3 100644 --- a/src/tests/ftest/cart/no_pmix_launcher_client.c +++ b/src/tests/ftest/cart/no_pmix_launcher_client.c @@ -107,7 +107,7 @@ int main(int argc, char **argv) progress_function, &crt_ctx); assert(rc == 0); - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); DBG_PRINT("Client starting with cfg_file=%s\n", grp_cfg_file); /* load group info from a config file and delete file upon return */ @@ -116,6 +116,7 @@ int main(int argc, char **argv) D_ERROR("crtu_load_group_from_file() failed; rc=%d\n", rc); assert(0); } + d_freeenv_str(&grp_cfg_file); rc = crt_group_size(grp, &grp_size); if (rc != 0) { diff --git a/src/tests/ftest/cart/no_pmix_launcher_server.c b/src/tests/ftest/cart/no_pmix_launcher_server.c index eda6ac14a05..e19ce0810cc 100644 --- a/src/tests/ftest/cart/no_pmix_launcher_server.c +++ b/src/tests/ftest/cart/no_pmix_launcher_server.c @@ -33,8 +33,9 @@ int main(int argc, char **argv) uint32_t grp_size; int rc; - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); @@ -83,7 +84,7 @@ int main(int argc, char **argv) } } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); if (grp_cfg_file == NULL) { D_ERROR("CRT_L_GRP_CFG was not set\n"); assert(0); @@ -105,6 +106,7 @@ int main(int argc, char **argv) DBG_PRINT("self_rank=%d uri=%s grp_cfg_file=%s\n", my_rank, my_uri, grp_cfg_file); + d_freeenv_str(&grp_cfg_file); D_FREE(my_uri); rc = crt_group_size(NULL, &grp_size); diff --git a/src/tests/ftest/cart/test_corpc_exclusive.c b/src/tests/ftest/cart/test_corpc_exclusive.c index 5f563d4d4b0..b3d81d857f7 100644 --- a/src/tests/ftest/cart/test_corpc_exclusive.c +++ b/src/tests/ftest/cart/test_corpc_exclusive.c @@ -103,8 +103,9 @@ int main(void) membs.rl_nr = 3; membs.rl_ranks = memb_ranks; - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); @@ -128,7 +129,7 @@ int main(void) assert(0); } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); rc = crt_rank_self_set(my_rank, 1 /* group_version_min */); if (rc != 0) { @@ -146,6 +147,7 @@ int main(void) /* load group info from a config file and delete file upon return */ rc = crtu_load_group_from_file(grp_cfg_file, g_main_ctx, grp, my_rank, true); + d_freeenv_str(&grp_cfg_file); if (rc != 0) { D_ERROR("crtu_load_group_from_file() failed; rc=%d\n", rc); assert(0); diff --git a/src/tests/ftest/cart/test_corpc_prefwd.c b/src/tests/ftest/cart/test_corpc_prefwd.c index 92f43ed5fcd..8aa9480476e 100644 --- a/src/tests/ftest/cart/test_corpc_prefwd.c +++ b/src/tests/ftest/cart/test_corpc_prefwd.c @@ -125,8 +125,9 @@ int main(void) excluded_membs.rl_nr = 1; excluded_membs.rl_ranks = &excluded_ranks; - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); @@ -150,7 +151,7 @@ int main(void) assert(0); } - grp_cfg_file = getenv("CRT_L_GRP_CFG"); + d_agetenv_str(&grp_cfg_file, "CRT_L_GRP_CFG"); rc = crt_rank_self_set(my_rank, 1 /* group_version_min */); if (rc != 0) { @@ -168,6 +169,7 @@ int main(void) /* load group info from a config file and delete file upon return */ rc = crtu_load_group_from_file(grp_cfg_file, g_main_ctx, grp, my_rank, true); + d_freeenv_str(&grp_cfg_file); if (rc != 0) { D_ERROR("crtu_load_group_from_file() failed; rc=%d\n", rc); assert(0); diff --git a/src/tests/ftest/cart/test_ep_cred_server.c b/src/tests/ftest/cart/test_ep_cred_server.c index 9f747aef6c2..29e66ae960d 100644 --- a/src/tests/ftest/cart/test_ep_cred_server.c +++ b/src/tests/ftest/cart/test_ep_cred_server.c @@ -73,8 +73,9 @@ main(int argc, char **argv) return rc; } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 40, true, true); diff --git a/src/tests/ftest/cart/test_group_np_srv.c b/src/tests/ftest/cart/test_group_np_srv.c index e950b03a998..1353be76983 100644 --- a/src/tests/ftest/cart/test_group_np_srv.c +++ b/src/tests/ftest/cart/test_group_np_srv.c @@ -151,8 +151,9 @@ int main(int argc, char **argv) return rc; } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); diff --git a/src/tests/ftest/cart/test_multisend_server.c b/src/tests/ftest/cart/test_multisend_server.c index a0b478d63cb..ee770ec9b4e 100644 --- a/src/tests/ftest/cart/test_multisend_server.c +++ b/src/tests/ftest/cart/test_multisend_server.c @@ -167,8 +167,9 @@ main(int argc, char **argv) return rc; } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 40, true, true); diff --git a/src/tests/ftest/cart/test_proto_server.c b/src/tests/ftest/cart/test_proto_server.c index 5f3470c15d7..5fb587b94df 100644 --- a/src/tests/ftest/cart/test_proto_server.c +++ b/src/tests/ftest/cart/test_proto_server.c @@ -83,8 +83,9 @@ main(int argc, char **argv) return rc; } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 40, true, true); diff --git a/src/tests/ftest/cart/test_rpc_to_ghost_rank.c b/src/tests/ftest/cart/test_rpc_to_ghost_rank.c index e3a6cf95083..0e7741d364b 100644 --- a/src/tests/ftest/cart/test_rpc_to_ghost_rank.c +++ b/src/tests/ftest/cart/test_rpc_to_ghost_rank.c @@ -507,8 +507,10 @@ int main(int argc, char **argv) return rc; } - env_self_rank = getenv("CRT_L_RANK"); + d_agetenv_str(&env_self_rank, "CRT_L_RANK"); my_rank = atoi(env_self_rank); + d_freeenv_str(&env_self_rank); + /* rank, num_attach_retries, is_server, assert_on_error */ crtu_test_init(my_rank, 20, true, true); diff --git a/src/tests/ftest/checksum/csum_error_logging.py b/src/tests/ftest/checksum/csum_error_logging.py index 38950af94a8..a512836a5ef 100644 --- a/src/tests/ftest/checksum/csum_error_logging.py +++ b/src/tests/ftest/checksum/csum_error_logging.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -62,17 +62,17 @@ def test_csum_error_logging(self): host_devices = get_dmg_smd_info(dmg.storage_query_list_devices, 'devices') for host, devices in host_devices.items(): for device in devices: - for entry in ('uuid', 'tgt_ids', 'role_bits', 'roles'): + for entry in ('uuid', 'tgt_ids', 'role_bits'): if entry not in device: self.fail( 'Missing {} info from dmg storage query list devices'.format(entry)) self.log.info( - 'Host %s device: uuid=%s, targets=%s, role=%s, role_bits=%s', - host, device['uuid'], device['tgt_ids'], device['roles'], device['role_bits']) + 'Host %s device: uuid=%s, targets=%s, role_bits=%s', + host, device['uuid'], device['tgt_ids'], device['role_bits']) if not device['tgt_ids']: self.log_step('Skipping device without targets on {}'.format(device['uuid'])) continue - if device['roles'] and not int(device['role_bits']) & 1: + if (int(device['role_bits']) > 0) and not int(device['role_bits']) & 1: self.log_step( 'Skipping {} device without data on {}'.format( device['role_bits'], device['uuid'])) diff --git a/src/tests/ftest/container/rf_enforcement.py b/src/tests/ftest/container/rf_enforcement.py index dcacf091398..aafb24bd841 100644 --- a/src/tests/ftest/container/rf_enforcement.py +++ b/src/tests/ftest/container/rf_enforcement.py @@ -40,9 +40,7 @@ def test_container_redundancy_factor_oclass_enforcement(self): :avocado: tags=all,full_regression :avocado: tags=vm - :avocado: tags=container - :avocado: tags=container_rf,cont_rf_oclass_enforcement - :avocado: tags=test_container_redundancy_factor_oclass_enforcement + :avocado: tags=container,container_rf,cont_rf_oclass_enforcement + :avocado: tags=ContRfEnforce,test_container_redundancy_factor_oclass_enforcement """ - self.mode = "cont_rf_enforcement" - self.execute_cont_rf_test() + self.execute_cont_rf_test(mode="cont_rf_enforcement") diff --git a/src/tests/ftest/daos_test/dfuse.py b/src/tests/ftest/daos_test/dfuse.py index 2af81e67427..39d7e4359f5 100644 --- a/src/tests/ftest/daos_test/dfuse.py +++ b/src/tests/ftest/daos_test/dfuse.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2021-2023 Intel Corporation. + (C) Copyright 2021-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -93,6 +93,8 @@ def run_test(self, il_lib=None): daos_test_env['D_LOG_MASK'] = 'INFO,IL=DEBUG' command = [self.daos_test, '--test-dir', mount_dir, '--io', '--stream'] + if use_dfuse: + command.append('--lowfd') if cache_mode != 'writeback': command.append('--metadata') diff --git a/src/tests/ftest/deployment/server_rank_failure.yaml b/src/tests/ftest/deployment/server_rank_failure.yaml index 5d422648eed..2f897404598 100644 --- a/src/tests/ftest/deployment/server_rank_failure.yaml +++ b/src/tests/ftest/deployment/server_rank_failure.yaml @@ -36,12 +36,9 @@ server_config: pool_size_ratio_80: size: 80% - control_method: dmg rebuild_timeout: 960 - svcn: 5 pool_size_value: size: 500G - control_method: dmg rebuild_timeout: 240 container: diff --git a/src/tests/ftest/dfuse/daos_build.py b/src/tests/ftest/dfuse/daos_build.py index 79289adc833..99f2823d50f 100644 --- a/src/tests/ftest/dfuse/daos_build.py +++ b/src/tests/ftest/dfuse/daos_build.py @@ -1,5 +1,5 @@ """ - (C) Copyright 2020-2023 Intel Corporation. + (C) Copyright 2020-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ @@ -26,7 +26,7 @@ def test_dfuse_daos_build_wb(self): Mount dfuse Checkout and build DAOS sources. - :avocado: tags=all,daily_regression + :avocado: tags=all,pr,daily_regression :avocado: tags=hw,medium :avocado: tags=daosio,dfuse,daos_cmd :avocado: tags=DaosBuild,test_dfuse_daos_build_wb @@ -58,9 +58,9 @@ def test_dfuse_daos_build_wt_il(self): Mount dfuse Checkout and build DAOS sources. - :avocado: tags=all,daily_regression + :avocado: tags=all,full_regression :avocado: tags=vm - :avocado: tags=daosio,dfuse + :avocado: tags=daosio,dfuse,il,dfs :avocado: tags=DaosBuild,test_dfuse_daos_build_wt_il """ self.run_build_test("writethrough", True, run_on_vms=True) @@ -106,7 +106,7 @@ def test_dfuse_daos_build_nocache(self): Mount dfuse Checkout and build DAOS sources. - :avocado: tags=all,daily_regression + :avocado: tags=all,full_regression :avocado: tags=hw,medium :avocado: tags=daosio,dfuse :avocado: tags=DaosBuild,test_dfuse_daos_build_nocache diff --git a/src/tests/ftest/erasurecode/multiple_failure.yaml b/src/tests/ftest/erasurecode/multiple_failure.yaml index 73ceb3bfdc0..78f132474b5 100644 --- a/src/tests/ftest/erasurecode/multiple_failure.yaml +++ b/src/tests/ftest/erasurecode/multiple_failure.yaml @@ -25,8 +25,6 @@ server_config: storage: auto pool: size: 93% - svcn: 5 - control_method: dmg container: type: POSIX control_method: daos diff --git a/src/tests/ftest/erasurecode/offline_rebuild.yaml b/src/tests/ftest/erasurecode/offline_rebuild.yaml index ebd4904ce2c..7d1a6e7aa08 100644 --- a/src/tests/ftest/erasurecode/offline_rebuild.yaml +++ b/src/tests/ftest/erasurecode/offline_rebuild.yaml @@ -32,8 +32,6 @@ server_config: storage: auto pool: size: 93% - svcn: 1 - control_method: dmg container: type: POSIX control_method: daos diff --git a/src/tests/ftest/erasurecode/offline_rebuild_single.yaml b/src/tests/ftest/erasurecode/offline_rebuild_single.yaml index 5c32aaa29ac..8b7fba9ae09 100644 --- a/src/tests/ftest/erasurecode/offline_rebuild_single.yaml +++ b/src/tests/ftest/erasurecode/offline_rebuild_single.yaml @@ -30,8 +30,6 @@ server_config: storage: auto pool: size: 93% - svcn: 1 - control_method: dmg pool_query_timeout: 30 container: type: POSIX diff --git a/src/tests/ftest/erasurecode/online_rebuild.yaml b/src/tests/ftest/erasurecode/online_rebuild.yaml index fbfa7493447..f5ea4768df7 100644 --- a/src/tests/ftest/erasurecode/online_rebuild.yaml +++ b/src/tests/ftest/erasurecode/online_rebuild.yaml @@ -31,8 +31,6 @@ server_config: storage: auto pool: size: 93% - svcn: 1 - control_method: dmg container: type: POSIX control_method: daos diff --git a/src/tests/ftest/erasurecode/online_rebuild_single.yaml b/src/tests/ftest/erasurecode/online_rebuild_single.yaml index 8b0b3f4baf0..7284a1f7134 100644 --- a/src/tests/ftest/erasurecode/online_rebuild_single.yaml +++ b/src/tests/ftest/erasurecode/online_rebuild_single.yaml @@ -30,8 +30,6 @@ server_config: storage: auto pool: size: 93% - svcn: 5 - control_method: dmg pool_query_timeout: 30 properties: rd_fac:2 container: diff --git a/src/tests/ftest/erasurecode/rebuild_disabled.yaml b/src/tests/ftest/erasurecode/rebuild_disabled.yaml index 55939c0b425..e970e043972 100644 --- a/src/tests/ftest/erasurecode/rebuild_disabled.yaml +++ b/src/tests/ftest/erasurecode/rebuild_disabled.yaml @@ -32,8 +32,6 @@ server_config: storage: auto pool: size: 93% - svcn: 1 - control_method: dmg pool_query_timeout: 30 container: type: POSIX diff --git a/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml b/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml index 8a2294753bc..b4154fcc369 100644 --- a/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml +++ b/src/tests/ftest/erasurecode/rebuild_disabled_single.yaml @@ -32,8 +32,6 @@ server_config: storage: auto pool: size: 93% - svcn: 1 - control_method: dmg pool_query_timeout: 30 container: type: POSIX diff --git a/src/tests/ftest/rebuild/cascading_failures.yaml b/src/tests/ftest/rebuild/cascading_failures.yaml index 8b9b272b4bd..c5fbb7e28cd 100644 --- a/src/tests/ftest/rebuild/cascading_failures.yaml +++ b/src/tests/ftest/rebuild/cascading_failures.yaml @@ -15,9 +15,7 @@ server_config: scm_mount: /mnt/daos system_ram_reserved: 1 pool: - scm_size: 1073741824 - svcn: 2 - control_method: dmg + size: 1G pool_query_timeout: 30 properties: rd_fac:2 container: diff --git a/src/tests/ftest/rebuild/container_rf.py b/src/tests/ftest/rebuild/container_rf.py index 51037212556..c0daa01f6be 100644 --- a/src/tests/ftest/rebuild/container_rf.py +++ b/src/tests/ftest/rebuild/container_rf.py @@ -39,5 +39,4 @@ def test_rebuild_with_container_rf(self): :avocado: tags=container,rebuild,container_rf :avocado: tags=RbldContRfTest,test_rebuild_with_container_rf """ - self.mode = "cont_rf_with_rebuild" - self.execute_cont_rf_test() + self.execute_cont_rf_test(mode="cont_rf_with_rebuild") diff --git a/src/tests/ftest/rebuild/container_rf.yaml b/src/tests/ftest/rebuild/container_rf.yaml index eb436baa2d3..f1333dbbadc 100644 --- a/src/tests/ftest/rebuild/container_rf.yaml +++ b/src/tests/ftest/rebuild/container_rf.yaml @@ -1,5 +1,5 @@ hosts: - test_servers: 6 + test_servers: 7 test_clients: 1 timeout: 480 server_config: @@ -15,9 +15,8 @@ server_config: scm_mount: /mnt/daos system_ram_reserved: 1 pool: - scm_size: 1073741824 - svcn: 3 - control_method: dmg + size: 1G + svcn: 7 # To match number of servers pool_query_timeout: 30 container: control_method: daos diff --git a/src/tests/ftest/rebuild/delete_objects.yaml b/src/tests/ftest/rebuild/delete_objects.yaml index bb99fe48d70..e80a2aec9c4 100644 --- a/src/tests/ftest/rebuild/delete_objects.yaml +++ b/src/tests/ftest/rebuild/delete_objects.yaml @@ -16,9 +16,7 @@ server_config: system_ram_reserved: 1 pool: scm_size: 1073741824 - svcn: 2 debug: true - control_method: dmg pool_query_timeout: 30 properties: rd_fac:2 container: diff --git a/src/tests/ftest/rebuild/read_array.yaml b/src/tests/ftest/rebuild/read_array.yaml index 6bb9e559a17..b7a64dfc3e2 100644 --- a/src/tests/ftest/rebuild/read_array.yaml +++ b/src/tests/ftest/rebuild/read_array.yaml @@ -19,9 +19,7 @@ server_config: scm_mount: /mnt/daos system_ram_reserved: 1 pool: - scm_size: 1073741824 - svcn: 2 - control_method: dmg + size: 1G pool_query_timeout: 30 properties: rd_fac:2 container: diff --git a/src/tests/ftest/rebuild/widely_striped.yaml b/src/tests/ftest/rebuild/widely_striped.yaml index fdeb6090d11..40853a14b20 100644 --- a/src/tests/ftest/rebuild/widely_striped.yaml +++ b/src/tests/ftest/rebuild/widely_striped.yaml @@ -18,15 +18,14 @@ testparams: pool: scm_size: 10G nvme_size: 60G - svcn: 5 - control_method: dmg rebuild_timeout: 240 pool_query_timeout: 60 + properties: rd_fac:2 container: type: POSIX control_method: daos oclass: RP_3G1 - properties: "rd_fac:2" + properties: rd_fac:2 mdtest: api: DFS client_processes: diff --git a/src/tests/ftest/rebuild/with_ior.yaml b/src/tests/ftest/rebuild/with_ior.yaml index d58b65f9ea0..44e02c61397 100644 --- a/src/tests/ftest/rebuild/with_ior.yaml +++ b/src/tests/ftest/rebuild/with_ior.yaml @@ -29,8 +29,7 @@ server_config: pool: scm_size: 6G - svcn: 3 - control_method: dmg + properties: rd_fac:1 pool_query_timeout: 30 container: diff --git a/src/tests/ftest/util/container_rf_test_base.py b/src/tests/ftest/util/container_rf_test_base.py index cf89ea6355c..37dbc8a397f 100644 --- a/src/tests/ftest/util/container_rf_test_base.py +++ b/src/tests/ftest/util/container_rf_test_base.py @@ -1,11 +1,10 @@ """ - (C) Copyright 2019-2023 Intel Corporation. + (C) Copyright 2019-2024 Intel Corporation. SPDX-License-Identifier: BSD-2-Clause-Patent """ import re -from daos_utils import DaosCommand from general_utils import DaosTestError from rebuild_test_base import RebuildTestBase @@ -15,21 +14,6 @@ class ContRedundancyFactor(RebuildTestBase): :avocado: recursive """ - - def __init__(self, *args, **kwargs): - """Initialize a CascadingFailures object.""" - super().__init__(*args, **kwargs) - self.mode = None - self.daos_cmd = None - - def create_test_container(self): - """Create a container and write objects.""" - self.log.info( - "==>(1)Create pool and container with redundant factor," - " start background IO object write") - self.container.create() - self.container.write_objects(self.inputs.rank.value[0], self.inputs.object_class.value) - def verify_rank_has_objects(self): """Verify the first rank to be excluded has at least one object.""" rank_list = self.container.get_target_rank_lists(" before rebuild") @@ -61,8 +45,7 @@ def verify_cont_rf_healthstatus(self, expected_rf, expected_health): actual_rf = None actual_health = None - cont_props = self.daos_cmd.container_get_prop( - pool=self.pool.uuid, cont=self.container.uuid, properties=["rd_fac", "status"]) + cont_props = self.container.get_prop(properties=["rd_fac", "status"]) for cont_prop in cont_props["response"]: if cont_prop["name"] == "rd_fac": actual_rf = cont_prop["value"] @@ -144,19 +127,20 @@ def create_test_container_and_write_obj(self, negative_test=False): self.fail("#Negative test, container redundancy factor " "test failed, return error RC: -1003 not found") - def execute_cont_rf_test(self, create_container=True): + def execute_cont_rf_test(self, create_container=True, mode=None): """Execute the rebuild test steps for container rd_fac test. Args: create_container (bool, optional): should the test create a container. Defaults to True. + mode (str): either "cont_rf_with_rebuild" or "cont_rf_enforcement" """ # Get the test params and var self.setup_test_pool() - self.daos_cmd = DaosCommand(self.bin) if create_container: self.setup_test_container() oclass = self.inputs.object_class.value + # Negative testing pertains to RF enforcement when creating objects - not rebuild negative_test = True rd_fac = ''.join(self.container.properties.value.split(":")) rf_match = re.search(r"rd_fac([0-9]+)", rd_fac) @@ -171,7 +155,8 @@ def execute_cont_rf_test(self, create_container=True): self.create_test_pool() # Create a container and write objects self.create_test_container_and_write_obj(negative_test) - if self.mode == "cont_rf_with_rebuild": + + if mode == "cont_rf_with_rebuild": num_of_ranks = len(self.inputs.rank.value) if num_of_ranks > rf_num: expect_cont_status = "UNCLEAN" @@ -196,7 +181,7 @@ def execute_cont_rf_test(self, create_container=True): self.log.info("==>(7)Check for container data if the container is healthy.") self.verify_container_data() self.log.info("Test passed") - elif self.mode == "cont_rf_enforcement": + elif mode == "cont_rf_enforcement": self.log.info("Container rd_fac test passed") else: - self.fail("#Unsupported container_rf test mode") + self.fail(f"Unsupported container_rf test mode: {mode}") diff --git a/src/tests/ftest/util/test_utils_pool.py b/src/tests/ftest/util/test_utils_pool.py index 1f27ea9dd69..48e79d61994 100644 --- a/src/tests/ftest/util/test_utils_pool.py +++ b/src/tests/ftest/util/test_utils_pool.py @@ -377,7 +377,7 @@ def create(self): self.pool = TestPool(self.context, DmgCommand(self.bin)) If it wants to use --nsvc option, it needs to set the value to - svcn.value. Otherwise, 1 is used. If it wants to use --group, it needs + svcn.value. If it wants to use --group, it needs to set groupname.value. If it wants to use --user, it needs to set username.value. If it wants to add other options, directly set it to self.dmg.action_command. Refer dmg_utils.py pool_create method for diff --git a/src/tests/suite/daos_checksum.c b/src/tests/suite/daos_checksum.c index f574635c306..12757f5ce4d 100644 --- a/src/tests/suite/daos_checksum.c +++ b/src/tests/suite/daos_checksum.c @@ -2885,7 +2885,7 @@ run_daos_checksum_test(int rank, int size, int *sub_tests, int sub_tests_size) } if (sub_tests_size == 0) { - if (getenv("DAOS_CSUM_TEST_ALL_TYPE")) { + if (d_isenv_def("DAOS_CSUM_TEST_ALL_TYPE")) { for (i = DAOS_PROP_CO_CSUM_OFF + 1; i <= DAOS_PROP_CO_CSUM_ADLER32; i++) { dts_csum_prop_type = i; diff --git a/src/tests/suite/daos_nvme_recovery.c b/src/tests/suite/daos_nvme_recovery.c index a9c2f2a0a82..54581412902 100644 --- a/src/tests/suite/daos_nvme_recovery.c +++ b/src/tests/suite/daos_nvme_recovery.c @@ -87,9 +87,10 @@ nvme_fault_reaction(void **state, int mode) daos_size_t nvme_size; /* Use the SCM size if set with environment */ - env = getenv("POOL_SCM_SIZE"); + d_agetenv_str(&env, "POOL_SCM_SIZE"); if (env) { size_gb = atoi(env); + d_freeenv_str(&env); if (size_gb != 0) scm_size = (daos_size_t)size_gb << 30; } diff --git a/src/tests/suite/daos_test_common.c b/src/tests/suite/daos_test_common.c index 7de287713ce..ac7e5c2015a 100644 --- a/src/tests/suite/daos_test_common.c +++ b/src/tests/suite/daos_test_common.c @@ -70,9 +70,10 @@ test_setup_pool_create(void **state, struct test_pool *ipool, daos_size_t nvme_size; d_rank_list_t *rank_list = NULL; - env = getenv("POOL_SCM_SIZE"); + d_agetenv_str(&env, "POOL_SCM_SIZE"); if (env) { size_gb = atoi(env); + d_freeenv_str(&env); if (size_gb != 0) outpool->pool_size = (daos_size_t)size_gb << 30; @@ -85,9 +86,10 @@ test_setup_pool_create(void **state, struct test_pool *ipool, * Set env POOL_NVME_SIZE to overwrite the default NVMe size. */ nvme_size = outpool->pool_size * 4; - env = getenv("POOL_NVME_SIZE"); + d_agetenv_str(&env, "POOL_NVME_SIZE"); if (env) { size_gb = atoi(env); + d_freeenv_str(&env); nvme_size = (daos_size_t)size_gb << 30; } diff --git a/src/tests/suite/dfs_test.c b/src/tests/suite/dfs_test.c index 217f30ad178..29e37a0b759 100644 --- a/src/tests/suite/dfs_test.c +++ b/src/tests/suite/dfs_test.c @@ -168,14 +168,16 @@ main(int argc, char **argv) } /** if writing XML, force all ranks other than rank 0 to use stdout to avoid conflicts */ - cmocka_message_output = getenv("CMOCKA_MESSAGE_OUTPUT"); + d_agetenv_str(&cmocka_message_output, "CMOCKA_MESSAGE_OUTPUT"); if (rank != 0 && cmocka_message_output && strcasecmp(cmocka_message_output, "xml") == 0) { + d_freeenv_str(&cmocka_message_output); rc = d_setenv("CMOCKA_MESSAGE_OUTPUT", "stdout", 1); if (rc) { print_message("d_setenv() failed with %d\n", rc); return -1; } } + d_freeenv_str(&cmocka_message_output); nr_failed = run_specified_tests(tests, rank, size, NULL, 0); diff --git a/src/tests/suite/dfuse_test.c b/src/tests/suite/dfuse_test.c index bcff68d5ecf..28cc137f45e 100644 --- a/src/tests/suite/dfuse_test.c +++ b/src/tests/suite/dfuse_test.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2021-2023 Intel Corporation. + * (C) Copyright 2021-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -34,7 +34,7 @@ /* Tests can be run by specifying the appropriate argument for a test or all will be run if no test * is specified. */ -static const char *all_tests = "ismd"; +static const char *all_tests = "ismdl"; static void print_usage() @@ -47,6 +47,7 @@ print_usage() print_message("dfuse_test -s|--stream\n"); print_message("dfuse_test -m|--metadata\n"); print_message("dfuse_test -d|--directory\n"); + print_message("dfuse_test -l|--lowfd\n"); print_message("Default runs all tests\n=============\n"); print_message("\n=============================\n"); } @@ -146,6 +147,21 @@ do_openat(void **state) assert_return_code(rc, errno); assert_int_equal(stbuf.st_size, stbuf0.st_size); + /* cornercase: fd for a regular file is passed into fstatat(). Path is empty. */ + rc = fstatat(fd, "", &stbuf0, AT_EMPTY_PATH); + assert_return_code(rc, errno); + assert_int_equal(stbuf.st_size, stbuf0.st_size); + + /* expected to fail */ + rc = fstatat(fd, "", &stbuf0, 0); + assert_int_equal(rc, -1); + assert_int_equal(errno, ENOENT); + + /* expected to fail */ + rc = fstatat(fd, "entry", &stbuf0, 0); + assert_int_equal(rc, -1); + assert_int_equal(errno, ENOTDIR); + rc = close(fd); assert_return_code(rc, errno); @@ -494,6 +510,67 @@ do_directory(void **state) assert_return_code(rc, errno); } +#define MIN_DAOS_FD 10 +/* + * Check whether daos network context uses low fds 0~9. + */ +void +do_lowfd(void **state) +{ + int fd; + int rc; + int i; + bool pil4dfs_loaded = false; + char *env_ldpreload; + char fd_path[64]; + char *path; + + env_ldpreload = getenv("LD_PRELOAD"); + if (env_ldpreload == NULL) + return; + + if (strstr(env_ldpreload, "libpil4dfs.so")) + pil4dfs_loaded = true; + else + /* libioil cannot pass this test since low fds are only temporarily blocked */ + return; + + /* first time access a dir on DFS mount to trigger daos_init() */ + fd = open(test_dir, O_PATH | O_DIRECTORY); + assert_return_code(fd, errno); + + rc = close(fd); + assert_return_code(rc, errno); + + /* open the root dir and print fd */ + fd = open("/", O_PATH | O_DIRECTORY); + assert_return_code(fd, errno); + printf("fd = %d\n", fd); + rc = close(fd); + assert_return_code(rc, errno); + if (pil4dfs_loaded) + assert_true(fd >= MIN_DAOS_FD); + + /* now check whether daos uses low fds */ + path = malloc(PATH_MAX); + assert_non_null(path); + for (i = 0; i < MIN_DAOS_FD; i++) { + snprintf(fd_path, sizeof(fd_path) - 1, "/proc/self/fd/%d", i); + rc = readlink(fd_path, path, PATH_MAX - 1); + /* libioil only temporarily block low fds during daos_init(). + * libpil4dfs blocks low fds before daos_init() and does not free + * them until applications end. + */ + if (!pil4dfs_loaded && rc == -1 && errno == ENOENT) + continue; + assert_true(rc > 0); + path[rc] = 0; + assert_true(strstr(path, "socket:") == NULL); + assert_true(strstr(path, "anon_inode:") == NULL); + } + free(path); +} + static int run_specified_tests(const char *tests, int *sub_tests, int sub_tests_size) { @@ -543,6 +620,15 @@ run_specified_tests(const char *tests, int *sub_tests, int sub_tests_size) }; nr_failed += cmocka_run_group_tests(readdir_tests, NULL, NULL); break; + case 'l': + printf("\n\n================="); + printf("dfuse low fd tests"); + printf("=====================\n"); + const struct CMUnitTest lowfd_tests[] = { + cmocka_unit_test(do_lowfd), + }; + nr_failed += cmocka_run_group_tests(lowfd_tests, NULL, NULL); + break; default: assert_true(0); @@ -568,9 +654,10 @@ main(int argc, char **argv) {"stream", no_argument, NULL, 's'}, {"metadata", no_argument, NULL, 'm'}, {"directory", no_argument, NULL, 'd'}, + {"lowfd", no_argument, NULL, 'l'}, {NULL, 0, NULL, 0}}; - while ((opt = getopt_long(argc, argv, "aM:imsd", long_options, &index)) != -1) { + while ((opt = getopt_long(argc, argv, "aM:imsdl", long_options, &index)) != -1) { if (strchr(all_tests, opt) != NULL) { tests[ntests] = opt; ntests++; diff --git a/src/utils/self_test/self_test.c b/src/utils/self_test/self_test.c index 7c9d6e592d5..a3157dc01f2 100644 --- a/src/utils/self_test/self_test.c +++ b/src/utils/self_test/self_test.c @@ -1,5 +1,5 @@ /* - * (C) Copyright 2016-2022 Intel Corporation. + * (C) Copyright 2016-2024 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -1827,18 +1827,16 @@ int main(int argc, char *argv[]) } if (use_daos_agent_vars == false) { - char *env; char *attach_path; + char *attach_path_env = NULL; - env = getenv("CRT_PHY_ADDR_STR"); - if (env == NULL) { + if (!d_isenv_def("CRT_PHY_ADDR_STR")) { printf("Error: provider (CRT_PHY_ADDR_STR) is not set\n"); printf("Example: export CRT_PHY_ADDR_STR='ofi+tcp'\n"); D_GOTO(cleanup, ret = -DER_INVAL); } - env = getenv("OFI_INTERFACE"); - if (env == NULL) { + if (!d_isenv_def("OFI_INTERFACE")) { printf("Error: interface (OFI_INTERFACE) is not set\n"); printf("Example: export OFI_INTERFACE=eth0\n"); D_GOTO(cleanup, ret = -DER_INVAL); @@ -1847,14 +1845,17 @@ int main(int argc, char *argv[]) if (attach_info_path) attach_path = attach_info_path; else { - attach_path = getenv("CRT_ATTACH_INFO_PATH"); + d_agetenv_str(&attach_path_env, "CRT_ATTACH_INFO_PATH"); + attach_path = attach_path_env; if (!attach_path) attach_path = "/tmp"; } + D_ASSERT(attach_path != NULL); printf("Warning: running without daos_agent connection (-u option); " "Using attachment file %s/%s.attach_info_tmp instead\n", attach_path, dest_name ? dest_name : default_dest_name); + d_freeenv_str(&attach_path_env); } /******************** Parse message sizes argument ********************/ diff --git a/src/vos/vos_common.c b/src/vos/vos_common.c index cf28bf0d573..ea4cdd45c12 100644 --- a/src/vos/vos_common.c +++ b/src/vos/vos_common.c @@ -987,7 +987,7 @@ vos_self_init(const char *db_path, bool use_sys_db, int tgt_id) goto failed; } - evt_mode = getenv("DAOS_EVTREE_MODE"); + rc = d_agetenv_str(&evt_mode, "DAOS_EVTREE_MODE"); if (evt_mode) { if (strcasecmp("soff", evt_mode) == 0) { vos_evt_feats &= ~EVT_FEATS_SUPPORTED; @@ -996,6 +996,7 @@ vos_self_init(const char *db_path, bool use_sys_db, int tgt_id) vos_evt_feats &= ~EVT_FEATS_SUPPORTED; vos_evt_feats |= EVT_FEAT_SORT_DIST_EVEN; } + d_freeenv_str(&evt_mode); } switch (vos_evt_feats & EVT_FEATS_SUPPORTED) { case EVT_FEAT_SORT_SOFF: