Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-16971 cart: add RPC origin address #15820

Merged
merged 8 commits into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/cart/crt_bulk.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2022 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -183,7 +184,8 @@ crt_bulk_transfer(struct crt_bulk_desc *bulk_desc, crt_bulk_cb_t complete_cb,

rc = crt_hg_bulk_transfer(bulk_desc, complete_cb, arg, opid, false);
if (rc != 0)
DL_ERROR(rc, "crt_hg_bulk_transfer() failed");
DL_ERROR(rc, "%p:%s crt_hg_bulk_transfer() failed", bulk_desc->bd_rpc,
crt_req_origin_addr_get(bulk_desc->bd_rpc));

out:
return rc;
Expand All @@ -203,7 +205,8 @@ crt_bulk_bind_transfer(struct crt_bulk_desc *bulk_desc,

rc = crt_hg_bulk_transfer(bulk_desc, complete_cb, arg, opid, true);
if (rc != 0)
D_ERROR("crt_hg_bulk_transfer() failed, rc: %d.\n", rc);
DL_ERROR(rc, "%p:%s crt_hg_bulk_transfer() failed.", bulk_desc->bd_rpc,
crt_req_origin_addr_get(bulk_desc->bd_rpc));

out:
return rc;
Expand Down
42 changes: 40 additions & 2 deletions src/cart/crt_hg.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -1454,8 +1455,8 @@ crt_hg_reply_send_cb(const struct hg_cb_info *hg_cbinfo)
* see CART-146 for details
*/
if (hg_ret != HG_SUCCESS)
D_WARN("hg_cbinfo->ret: " DF_HG_RC ", opc: %#x.\n",
DP_HG_RC(hg_ret), opc);
D_WARN("hg_cbinfo->ret: " DF_HG_RC ", opc: %#x from %s.\n", DP_HG_RC(hg_ret), opc,
crt_rpc_priv_get_origin_addr(rpc_priv));

/* corresponding to the crt_req_addref in crt_hg_reply_send */
RPC_DECREF(rpc_priv);
Expand Down Expand Up @@ -1867,3 +1868,40 @@ crt_hg_bulk_transfer(struct crt_bulk_desc *bulk_desc, crt_bulk_cb_t complete_cb,
out:
return rc;
}

char *
crt_rpc_priv_get_origin_addr(struct crt_rpc_priv *rpc_priv)
{
const struct hg_info *hg_info;
char addr[48];
hg_size_t addr_size = 48;
int rc;

if (rpc_priv->crp_orig_uri != NULL)
return rpc_priv->crp_orig_uri;

hg_info = HG_Get_info(rpc_priv->crp_hg_hdl);
if (hg_info == NULL)
return "NOINFO";

rc = HG_Addr_to_string(hg_info->hg_class, addr, (hg_size_t *)&addr_size, hg_info->addr);
if (rc != 0)
return "NONE";

D_ALLOC(rpc_priv->crp_orig_uri, addr_size);
if (rpc_priv->crp_orig_uri == NULL)
return "NOMEM";

memcpy(rpc_priv->crp_orig_uri, addr, addr_size);

return rpc_priv->crp_orig_uri;
}

char *
crt_req_origin_addr_get(crt_rpc_t *rpc_pub)
{
struct crt_rpc_priv *rpc_priv;

rpc_priv = container_of(rpc_pub, struct crt_rpc_priv, crp_pub);
return crt_rpc_priv_get_origin_addr(rpc_priv);
}
19 changes: 11 additions & 8 deletions src/cart/crt_internal.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -36,10 +37,11 @@
break; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_DEBUG(mask, (rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_DEBUG(mask, (rpc), \
"[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/* Log an error with an RPC descriptor */
Expand All @@ -49,10 +51,10 @@
char *_opc; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_ERROR((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/* Log a warning with an RPC descriptor */
Expand All @@ -62,10 +64,10 @@
char *_opc; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_WARN((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/* Log an info message with an RPC descriptor */
Expand All @@ -75,11 +77,12 @@
char *_opc; \
\
crt_opc_decode((rpc)->crp_pub.cr_opc, &_module, &_opc); \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d] " fmt, \
D_TRACE_INFO((rpc), "[opc=%#x (%s:%s) rpcid=%#lx rank:tag=%d:%d orig=%s] " fmt, \
(rpc)->crp_pub.cr_opc, _module, _opc, (rpc)->crp_req_hdr.cch_rpcid, \
(rpc)->crp_pub.cr_ep.ep_rank, (rpc)->crp_pub.cr_ep.ep_tag, \
##__VA_ARGS__); \
crt_rpc_priv_get_origin_addr((rpc)), ##__VA_ARGS__); \
} while (0)

/**
* If \a cond is false, this is equivalent to an RPC_ERROR (i.e., \a mask is
* ignored). If \a cond is true, this is equivalent to an RPC_TRACE.
Expand Down
3 changes: 3 additions & 0 deletions src/cart/crt_internal_fns.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -71,4 +72,6 @@ void
crt_trigger_event_cbs(d_rank_t rank, uint64_t incarnation, enum crt_event_source src,
enum crt_event_type type);

char *
crt_rpc_priv_get_origin_addr(struct crt_rpc_priv *rpc_priv);
#endif /* __CRT_INTERNAL_FNS_H__ */
8 changes: 6 additions & 2 deletions src/cart/crt_rpc.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -578,6 +579,9 @@ crt_rpc_priv_free(struct crt_rpc_priv *rpc_priv)

RPC_TRACE(DB_TRACE, rpc_priv, "destroying\n");

if (rpc_priv->crp_orig_uri != NULL)
D_FREE(rpc_priv->crp_orig_uri);

D_FREE(rpc_priv);
}

Expand Down Expand Up @@ -1596,8 +1600,8 @@ crt_reply_send(crt_rpc_t *req)
RPC_TRACE(DB_ALL, rpc_priv, "reply_send\n");
rc = crt_hg_reply_send(rpc_priv);
if (rc != 0)
D_ERROR("crt_hg_reply_send failed, rc: %d,opc: %#x.\n",
rc, rpc_priv->crp_pub.cr_opc);
D_ERROR("crt_hg_reply_send failed, rc: %d,opc: %#x.: %s\n", rc,
rpc_priv->crp_pub.cr_opc, crt_req_origin_addr_get(req));
}

rpc_priv->crp_reply_pending = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/cart/crt_rpc.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -154,6 +155,7 @@ struct crt_rpc_priv {
hg_addr_t crp_hg_addr; /* target na address */
struct crt_hg_hdl *crp_hdl_reuse; /* reused hg_hdl */
crt_phy_addr_t crp_tgt_uri; /* target uri address */
char *crp_orig_uri; /* where the RPC comes from */
crt_rpc_t *crp_ul_req; /* uri lookup request */

uint32_t crp_ul_retry; /* uri lookup retry counter */
Expand Down
10 changes: 10 additions & 0 deletions src/include/cart/api.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -2375,6 +2376,15 @@ int crt_context_quota_limit_get(crt_context_t crt_ctx, crt_quota_type_t quota, i
int
crt_req_get_proto_ver(crt_rpc_t *req);

/**
* Get the rpc origin address.
*
* \param[in] rpc pointer to RPC request
* \return the origin address of the RPC
*/
char *
crt_req_origin_addr_get(crt_rpc_t *rpc);

/** @}
*/

Expand Down
48 changes: 27 additions & 21 deletions src/object/srv_obj.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/**
* (C) Copyright 2016-2024 Intel Corporation.
* (C) Copyright 2025 Google LLC
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -182,9 +183,8 @@ obj_rw_reply(crt_rpc_t *rpc, int status, uint64_t epoch, bool release_input,
orwo->orw_epoch = max(epoch, orwo->orw_epoch);
}

D_DEBUG(DB_IO, "rpc %p opc %d send reply, pmv %d, epoch "DF_X64
", status %d\n", rpc, opc_get(rpc->cr_opc),
ioc->ioc_map_ver, orwo->orw_epoch, status);
D_DEBUG(DB_IO, "rpc %p opc %d send reply, pmv %d, epoch " DF_X64 ", status %d\n", rpc,
opc_get(rpc->cr_opc), ioc->ioc_map_ver, orwo->orw_epoch, status);

if (!ioc->ioc_lost_reply) {
if (release_input)
Expand Down Expand Up @@ -241,11 +241,13 @@ obj_bulk_comp_cb(const struct crt_bulk_cb_info *cb_info)
struct crt_bulk_desc *bulk_desc;
crt_rpc_t *rpc;

if (cb_info->bci_rc != 0)
D_ERROR("bulk transfer failed: %d\n", cb_info->bci_rc);

bulk_desc = cb_info->bci_bulk_desc;
rpc = bulk_desc->bd_rpc;

if (cb_info->bci_rc != 0)
D_ERROR("rpc: %p:%s bulk transfer failed: %d\n", rpc, crt_req_origin_addr_get(rpc),
cb_info->bci_rc);

arg = (struct obj_bulk_args *)cb_info->bci_arg;
/**
* Note: only one thread will access arg.result, so
Expand Down Expand Up @@ -5776,17 +5778,19 @@ ds_obj_coll_punch_handler(crt_rpc_t *rpc)
max_ver = version;

DL_CDEBUG(rc != 0 && rc != -DER_INPROGRESS && rc != -DER_TX_RESTART, DLOG_ERR, DB_IO, rc,
"(%s) handled collective punch RPC %p for obj "DF_UOID" on XS %u/%u in "DF_UUID"/"
DF_UUID"/"DF_UUID" with epc "DF_X64", pmv %u/%u, dti "DF_DTI", bulk_tgt_sz %u, "
"(%s) handled collective punch RPC %p:%s for obj " DF_UOID
" on XS %u/%u in " DF_UUID "/" DF_UUID "/" DF_UUID " with epc " DF_X64
", pmv %u/%u, dti " DF_DTI ", bulk_tgt_sz %u, "
"bulk_tgt_nr %u, tgt_nr %u, forward width %u, forward depth %u, flags %x",
(ocpi->ocpi_flags & ORF_LEADER) ? "leader" :
(ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"), rpc,
DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id, dmi->dmi_tgt_id,
DP_UUID(ocpi->ocpi_po_uuid), DP_UUID(ocpi->ocpi_co_hdl),
DP_UUID(ocpi->ocpi_co_uuid), ocpi->ocpi_epoch,
ocpi->ocpi_map_ver, max_ver, DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz,
ocpi->ocpi_bulk_tgt_nr, (unsigned int)ocpi->ocpi_tgts.ca_count,
ocpi->ocpi_disp_width, ocpi->ocpi_disp_depth, ocpi->ocpi_flags);
(ocpi->ocpi_flags & ORF_LEADER)
? "leader"
: (ocpi->ocpi_tgts.ca_count == 1 ? "non-leader" : "relay-engine"),
rpc, crt_req_origin_addr_get(rpc), DP_UOID(ocpi->ocpi_oid), dmi->dmi_xs_id,
dmi->dmi_tgt_id, DP_UUID(ocpi->ocpi_po_uuid), DP_UUID(ocpi->ocpi_co_hdl),
DP_UUID(ocpi->ocpi_co_uuid), ocpi->ocpi_epoch, ocpi->ocpi_map_ver, max_ver,
DP_DTI(&ocpi->ocpi_xid), ocpi->ocpi_bulk_tgt_sz, ocpi->ocpi_bulk_tgt_nr,
(unsigned int)ocpi->ocpi_tgts.ca_count, ocpi->ocpi_disp_width,
ocpi->ocpi_disp_depth, ocpi->ocpi_flags);

obj_punch_complete(rpc, rc, max_ver);

Expand Down Expand Up @@ -5918,11 +5922,13 @@ ds_obj_coll_query_handler(crt_rpc_t *rpc)
rc = dtx_leader_end(dlh, ioc.ioc_coh, rc);

out:
D_DEBUG(DB_IO, "Handled collective query RPC %p %s forwarding for obj "DF_UOID
" on rank %u XS %u/%u epc "DF_X64" pmv %u, with dti "DF_DTI", dct_nr %u, "
"forward width %u, forward depth %u\n: "DF_RC"\n", rpc,
ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid),
myrank, dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver,
D_DEBUG(DB_IO,
"Handled collective query RPC %p:%s %s forwarding for obj " DF_UOID
" on rank %u XS %u/%u epc " DF_X64 " pmv %u, with dti " DF_DTI ", dct_nr %u, "
"forward width %u, forward depth %u\n: " DF_RC "\n",
rpc, crt_req_origin_addr_get(rpc),
ocqi->ocqi_tgts.ca_count <= 1 ? "without" : "with", DP_UOID(ocqi->ocqi_oid), myrank,
dmi->dmi_xs_id, tgt_id, ocqi->ocqi_epoch, ocqi->ocqi_map_ver,
DP_DTI(&ocqi->ocqi_xid), (unsigned int)ocqi->ocqi_tgts.ca_count,
ocqi->ocqi_disp_width, ocqi->ocqi_disp_depth, DP_RC(rc));

Expand Down