Skip to content

Commit

Permalink
trace: Add tracepoints to RMA read and write operations
Browse files Browse the repository at this point in the history
In a previous commit, RMA iwrite, iwriteInline, and iread API
functions have been implemented. This commit adds nvtx and lttng
tracepoints to these functions.

Signed-off-by: Michael Axtmann <[email protected]>
  • Loading branch information
maxtmann committed Sep 9, 2024
1 parent cb61d87 commit 4e36a0d
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 0 deletions.
10 changes: 10 additions & 0 deletions include/nccl_ofi_tracepoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,16 @@
NCCL_OFI_TRACE_FLUSH_NVTX(request, nccl_req); \
} while(0)

#define NCCL_OFI_TRACE_READ(request, nccl_req) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Read, request, nccl_req); \
NCCL_OFI_TRACE_READ_NVTX(request, nccl_req); \
} while(0)

#define NCCL_OFI_TRACE_WRITE(request, nccl_req) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Write, request, nccl_req); \
NCCL_OFI_TRACE_WRITE_NVTX(request, nccl_req); \
} while(0)

#define NCCL_OFI_TRACE_PENDING_INSERT(request) do { \
lttng_ust_tracepoint(nccl_ofi_plugin, Pending_queue_insert, request); \
NCCL_OFI_TRACE_PENDING_INSERT_NVTX(request); \
Expand Down
26 changes: 26 additions & 0 deletions include/tracing_impl/lttng.h
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,32 @@ LTTNG_UST_TRACEPOINT_EVENT(
)
)

LTTNG_UST_TRACEPOINT_EVENT(
nccl_ofi_plugin,
Read,
LTTNG_UST_TP_ARGS(
void *, request,
void *, nccl_req
),
LTTNG_UST_TP_FIELDS(
lttng_ust_field_integer_hex(uint64_t, request, (uint64_t)request)
lttng_ust_field_integer_hex(uint64_t, nccl_req, (uint64_t)nccl_req)
)
)

LTTNG_UST_TRACEPOINT_EVENT(
nccl_ofi_plugin,
Write,
LTTNG_UST_TP_ARGS(
void *, request,
void *, nccl_req
),
LTTNG_UST_TP_FIELDS(
lttng_ust_field_integer_hex(uint64_t, request, (uint64_t)request)
lttng_ust_field_integer_hex(uint64_t, nccl_req, (uint64_t)nccl_req)
)
)


LTTNG_UST_TRACEPOINT_EVENT(
nccl_ofi_plugin,
Expand Down
10 changes: 10 additions & 0 deletions include/tracing_impl/nvtx.h
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,14 @@ static inline void nvtx_end(nvtxRangeId_t id) {
nvtx_mark_domain(NULL, "Flush", 0xA52A2A); \
} while(0)

#define NCCL_OFI_TRACE_READ_NVTX(request, nccl_req) do { \
nvtx_mark_domain(NULL, "Read", 0xff00ff); \
} while(0)

#define NCCL_OFI_TRACE_WRITE_NVTX(request, nccl_req) do { \
nvtx_mark_domain(NULL, "Write", 0xff00ff); \
} while(0)

#define NCCL_OFI_TRACE_PENDING_INSERT_NVTX(request) do { \
nvtx_mark_domain(NULL, "Pending_insert", 0xFF8C00); \
} while(0)
Expand All @@ -210,6 +218,8 @@ static inline void nvtx_end(nvtxRangeId_t id) {
#define NCCL_OFI_TRACE_RECV_SEGMENT_COMPLETE_NVTX(...)
#define NCCL_OFI_TRACE_EAGER_RECV_NVTX(...)
#define NCCL_OFI_TRACE_FLUSH_NVTX(...)
#define NCCL_OFI_TRACE_READ_NVTX(...)
#define NCCL_OFI_TRACE_WRITE_NVTX(...)
#define NCCL_OFI_TRACE_PENDING_INSERT_NVTX(...)
#define NCCL_OFI_TRACE_PENDING_REMOVE_NVTX(...)

Expand Down
5 changes: 5 additions & 0 deletions src/nccl_ofi_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -3691,6 +3691,8 @@ static int rma_read(nccl_net_ofi_recv_comm_t *recv_comm, void* dest, size_t size
*/
(r_comm->num_inflight_reqs)++;

NCCL_OFI_TRACE_READ(req, base_req);

/* Try posting RMA read */

ret = receive_progress(req, true);
Expand Down Expand Up @@ -5304,6 +5306,8 @@ static int rma_write_impl(nccl_net_ofi_send_comm_t *send_comm, void* src, size_t
*/
(s_comm->num_inflight_reqs)++;

NCCL_OFI_TRACE_WRITE(req, base_req);

/* Try posting RMA write with write_inline interface */

ret = send_progress(req);
Expand All @@ -5314,6 +5318,7 @@ static int rma_write_impl(nccl_net_ofi_send_comm_t *send_comm, void* src, size_t
NCCL_OFI_WARN("Failed to nccl_ofi_deque_insert_back: %d", ret);
goto error;
}
NCCL_OFI_TRACE_PENDING_INSERT(req);
} else if (OFI_UNLIKELY(ret != 0)) {
ret = -ENOTSUP;
goto error;
Expand Down

0 comments on commit 4e36a0d

Please sign in to comment.