diff --git a/examples/meson.build b/examples/meson.build index 3139311a0..c7e667cd6 100644 --- a/examples/meson.build +++ b/examples/meson.build @@ -40,6 +40,13 @@ executable( include_directories: [incdir, internal_incdir] ) +executable( + 'mi-mctp-ae', + ['mi-mctp-ae.c'], + dependencies: libnvme_mi_dep, + include_directories: [incdir, internal_incdir] +) + if libdbus_dep.found() executable( 'mi-conf', diff --git a/examples/mi-mctp-ae.c b/examples/mi-mctp-ae.c new file mode 100644 index 000000000..084acde93 --- /dev/null +++ b/examples/mi-mctp-ae.c @@ -0,0 +1,188 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +/** + * This file is part of libnvme. + */ + +/** + * mi-mctp-ae: open a MI connection over MCTP, supporting asynchronous event messages + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include // for usleep + +#include +#include + +#include +#include +#include + +// Function to print the byte array +static void print_byte_array(void *data, size_t len) +{ + uint8_t *byte_data = (uint8_t *)data; + + for (size_t i = 0; i < len; ++i) + printf("%02X ", byte_data[i]); + printf("\n"); +} + +static void print_event_info(struct nvme_mi_event *event) +{ + printf("aeoi: %02X\n", event->aeoi); + printf("aeocidi: %04X\n", event->aeocidi); + printf("aessi: %02X\n", event->aessi); + + printf("specific_info: "); + if (event->spec_info_len && event->spec_info) + print_byte_array(event->spec_info, event->spec_info_len); + else + printf("EMPTY\n"); + + printf("vendor_specific_info: "); + if (event->vend_spec_info_len && event->vend_spec_info) + print_byte_array(event->vend_spec_info, event->vend_spec_info_len); + else + printf("EMPTY\n"); +} + +enum nvme_mi_aem_handler_next_action aem_handler(nvme_mi_ep_t ep, size_t num_events, void *userdata) +{ + uint32_t *count = (uint32_t *) userdata; + *count = *count+1; + + printf("Received notification #%d with %zu events:\n", *count, num_events); + for (int i = 0; i < num_events; i++) { + struct nvme_mi_event *event = nvme_mi_aem_get_next_event(ep); + + if (event == NULL) + printf("Unexpected NULL event\n"); + else { + printf("Event:\n"); + print_event_info(event); + printf("\n"); + } + } + + return NVME_MI_AEM_HNA_ACK; +} + +int main(int argc, char **argv) +{ + nvme_root_t root; + nvme_mi_ep_t ep; + bool usage = true; + uint8_t eid = 0; + int rc = 0, net = 0; + struct nvme_mi_aem_callbacks aem_cb_info = {0}; + uint32_t notification_counter = 0; + bool enabled[256] = {0}; + + const uint8_t AEM_FD_INDEX = 0; + const uint8_t STD_IN_FD_INDEX = 1; + + if (argc > 3) { + usage = false; + net = atoi(argv[1]); + eid = atoi(argv[2]) & 0xff; + argv += 2; + argc -= 2; + + int event_count = argc - 1; + + for (int i = 0; i < event_count; i++) { + int event = atoi(argv[1+i]); + + aem_cb_info.enabled[event] = true; + } + } + + if (usage) { + fprintf(stderr, + "usage: %s [AE #s separated by spaces]\n", + argv[0]); + return EXIT_FAILURE; + } + + root = nvme_mi_create_root(stderr, DEFAULT_LOGLEVEL); + if (!root) + err(EXIT_FAILURE, "can't create NVMe root"); + + ep = nvme_mi_open_mctp(root, net, eid); + if (!ep) + errx(EXIT_FAILURE, "can't open MCTP endpoint %d:%d", net, eid); + + aem_cb_info.aem_handler = aem_handler; + + rc = nvme_mi_aem_get_enabled(ep, enabled, sizeof(enabled)); + if (rc) + errx(EXIT_FAILURE, "Can't query enabled aems:%d (%d)", rc, errno); + printf("The following events are now enabled:\n"); + for (int i = 0; i < sizeof(enabled); i++) { + if (enabled[i]) + printf("Event: %d\n", i); + } + + rc = nvme_mi_aem_enable(ep, true, true, true, 1, 100, &aem_cb_info, ¬ification_counter); + if (rc) + errx(EXIT_FAILURE, "Can't enable aem:%d (%d)", rc, errno); + + rc = nvme_mi_aem_get_enabled(ep, enabled, sizeof(enabled)); + if (rc) + errx(EXIT_FAILURE, "Can't query enabled aems:%d (%d)", rc, errno); + + + + struct pollfd fds[2]; + + fds[AEM_FD_INDEX].fd = nvme_mi_aem_get_fd(ep); + if (fds[AEM_FD_INDEX].fd < 0) + errx(EXIT_FAILURE, "Can't get aem fd\n"); + + fds[STD_IN_FD_INDEX].fd = STDIN_FILENO; + + fds[AEM_FD_INDEX].events = POLLIN; + fds[STD_IN_FD_INDEX].events = POLLIN; + + printf("Press any key to exit\n"); + while (1) { + int poll_timeout = 500; // Timeout in milliseconds + + rc = poll(fds, 2, poll_timeout); + + if (rc == -1) { + perror("poll"); + break; + } else if (rc == 0) { + //printf("No data within %d milliseconds.\n", timeout); + } else { + //Time to do the work + if (fds[AEM_FD_INDEX].revents & POLLIN) { + rc = nvme_mi_aem_process(ep, ¬ification_counter); + if (rc) + errx(EXIT_FAILURE, + "nvme_mi_aem_process failed with:%d (%d)", + rc, + errno); + } + if (fds[STD_IN_FD_INDEX].revents & POLLIN) + break;//we are done + } + } + + //Cleanup + nvme_mi_aem_disable(ep); + nvme_mi_close(ep); + nvme_mi_free_root(root); + + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} + + diff --git a/src/libnvme-mi.map b/src/libnvme-mi.map index 0fb378290..9ddbc02cf 100644 --- a/src/libnvme-mi.map +++ b/src/libnvme-mi.map @@ -1,6 +1,12 @@ LIBNVME_MI_1_12 { global: nvme_mi_mi_xfer; + nvme_mi_aem_get_fd; + nvme_mi_aem_enable; + nvme_mi_aem_process; + nvme_mi_aem_disable; + nvme_mi_aem_get_next_event; + nvme_mi_aem_get_enabled; }; LIBNVME_MI_1_11 { diff --git a/src/nvme/mi-mctp.c b/src/nvme/mi-mctp.c index a4124f04d..d3e993d86 100644 --- a/src/nvme/mi-mctp.c +++ b/src/nvme/mi-mctp.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -65,6 +66,7 @@ struct sockaddr_mctp { __u8 __smctp_pad1; }; + #define MCTP_NET_ANY 0x0 #define MCTP_ADDR_NULL 0x00 @@ -84,6 +86,9 @@ struct nvme_mi_transport_mctp { int sd; void *resp_buf; size_t resp_buf_size; + int sd_aem; + void *resp_buf_aem; + size_t resp_buf_aem_size; }; static int ioctl_tag(int sd, unsigned long req, struct mctp_ioc_tag_ctl *ctl) @@ -220,6 +225,200 @@ static bool nvme_mi_mctp_resp_is_mpr(void *buf, size_t len, return true; } +static int nvme_mi_mctp_aem_fd(struct nvme_mi_ep *ep) +{ + struct nvme_mi_transport_mctp *mctp; + + if (ep->transport != &nvme_mi_transport_mctp) { + errno = EINVAL; + return -1; + } + + if (!ep->transport_data) { + errno = EINVAL; + return -1; + } + + mctp = ep->transport_data; + return mctp->sd_aem; +} + +static int nvme_mi_mctp_aem_purge(struct nvme_mi_ep *ep) +{ + char buffer[1024]; + ssize_t bytes_read; + + struct nvme_mi_transport_mctp *mctp = ep->transport_data; + + // Set the socket to non-blocking mode + int flags = fcntl(mctp->sd_aem, F_GETFL, 0); + + fcntl(mctp->sd_aem, F_SETFL, flags | O_NONBLOCK); + + // Read until there is no more data + while ((bytes_read = recv(mctp->sd_aem, buffer, sizeof(buffer), 0)) > 0) + ; + + // Check for errors other than EAGAIN or EWOULDBLOCK + if (bytes_read < 0 && errno != EAGAIN && errno != EWOULDBLOCK) + perror("recv"); + + // Restore the socket to blocking mode + fcntl(mctp->sd_aem, F_SETFL, flags); + + return 0; +} + +static int nvme_mi_mctp_aem_read(struct nvme_mi_ep *ep, + struct nvme_mi_resp *resp) +{ + ssize_t len, resp_len, resp_hdr_len, resp_data_len; + struct sockaddr_mctp src_addr = { 0 }; + struct nvme_mi_transport_mctp *mctp; + struct iovec resp_iov[1]; + struct msghdr resp_msg; + int rc, errno_save, timeout; + struct pollfd pollfds[1]; + __le32 mic; + + if (ep->transport != &nvme_mi_transport_mctp) { + errno = EINVAL; + return -1; + } + + /* we need enough space for at least a generic (/error) response */ + if (resp->hdr_len < sizeof(struct nvme_mi_msg_hdr)) { + errno = EINVAL; + return -1; + } + + mctp = ep->transport_data; + + resp_len = resp->hdr_len + resp->data_len + sizeof(mic); + if (resp_len > mctp->resp_buf_aem_size) { + void *tmp = realloc(mctp->resp_buf_aem, resp_len); + + if (!tmp) { + errno_save = errno; + nvme_msg(ep->root, LOG_ERR, + "Failure allocating response buffer: %m\n"); + errno = errno_save; + rc = -1; + goto out; + } + mctp->resp_buf_aem = tmp; + mctp->resp_buf_aem_size = resp_len; + } + + /* offset by one: the MCTP message type is excluded from the buffer */ + resp_iov[0].iov_base = mctp->resp_buf_aem + 1; + resp_iov[0].iov_len = resp_len - 1; + + memset(&resp_msg, 0, sizeof(resp_msg)); + resp_msg.msg_iov = resp_iov; + resp_msg.msg_iovlen = 1; + resp_msg.msg_name = &src_addr; + resp_msg.msg_namelen = sizeof(src_addr); + + pollfds[0].fd = mctp->sd_aem; + pollfds[0].events = POLLIN; + timeout = 1; +retry: + rc = ops.poll(pollfds, 1, timeout); + if (rc < 0) { + if (errno == EINTR) + goto retry; + errno_save = errno; + nvme_msg(ep->root, LOG_ERR, + "Failed polling on MCTP socket: %m"); + errno = errno_save; + goto out; + } + + if (rc == 0) { + nvme_msg(ep->root, LOG_DEBUG, "Timeout on MCTP socket"); + errno = ETIMEDOUT; + rc = -1; + goto out; + } + + rc = -1; + len = ops.recvmsg(mctp->sd_aem, &resp_msg, MSG_DONTWAIT); + + if (len < 0) { + errno_save = errno; + nvme_msg(ep->root, LOG_ERR, + "Failure receiving MCTP message: %m\n"); + errno = errno_save; + goto out; + } + + + if (len == 0) { + nvme_msg(ep->root, LOG_WARNING, "No data from MCTP endpoint\n"); + errno = EIO; + goto out; + } + + if (resp_msg.msg_namelen < sizeof(src_addr)) { + nvme_msg(ep->root, LOG_WARNING, "Unexpected src address length\n"); + errno = EIO; + goto out; + } + + if (mctp->eid != src_addr.smctp_addr.s_addr) { + //This does not belong to the endpoint we're monitoring + errno = EWOULDBLOCK; + goto out; + } + + /* Re-add the type byte, so we can work on aligned lengths from here */ + ((uint8_t *)mctp->resp_buf_aem)[0] = MCTP_TYPE_NVME | MCTP_TYPE_MIC; + len += 1; + + /* The smallest response data is 8 bytes: generic 4-byte message header + * plus four bytes of error data (excluding MIC). Ensure we have enough. + */ + if (len < 8 + sizeof(mic)) { + nvme_msg(ep->root, LOG_ERR, + "Invalid MCTP response: too short (%zd bytes, needed %zd)\n", + len, 8 + sizeof(mic)); + errno = EPROTO; + goto out; + } + + /* Start unpacking the linear resp buffer into the split header + data + * + MIC. We check for a MPR response before fully unpacking, as we'll + * need to preserve the resp layout if we need to retry the receive. + */ + + /* MIC is always at the tail */ + memcpy(&mic, mctp->resp_buf_aem + len - sizeof(mic), sizeof(mic)); + len -= 4; + + /* we expect resp->hdr_len bytes, but we may have less */ + resp_hdr_len = resp->hdr_len; + if (resp_hdr_len > len) + resp_hdr_len = len; + memcpy(resp->hdr, mctp->resp_buf_aem, resp_hdr_len); + resp->hdr_len = resp_hdr_len; + len -= resp_hdr_len; + + /* any remaining bytes are the data payload */ + resp_data_len = resp->data_len; + if (resp_data_len > len) + resp_data_len = len; + memcpy(resp->data, mctp->resp_buf_aem + resp_hdr_len, resp_data_len); + resp->data_len = resp_data_len; + + resp->mic = le32_to_cpu(mic); + + rc = 0; + +out: + return rc; +} + static int nvme_mi_mctp_submit(struct nvme_mi_ep *ep, struct nvme_mi_req *req, struct nvme_mi_resp *resp) @@ -433,7 +632,9 @@ static void nvme_mi_mctp_close(struct nvme_mi_ep *ep) mctp = ep->transport_data; close(mctp->sd); + close(mctp->sd_aem); free(mctp->resp_buf); + free(mctp->resp_buf_aem); free(ep->transport_data); } @@ -459,12 +660,16 @@ static const struct nvme_mi_transport nvme_mi_transport_mctp = { .submit = nvme_mi_mctp_submit, .close = nvme_mi_mctp_close, .desc_ep = nvme_mi_mctp_desc_ep, + .aem_read = nvme_mi_mctp_aem_read, + .aem_fd = nvme_mi_mctp_aem_fd, + .aem_purge = nvme_mi_mctp_aem_purge, }; nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) { struct nvme_mi_transport_mctp *mctp; struct nvme_mi_ep *ep; + struct sockaddr_mctp addr; int errno_save; ep = nvme_mi_init_ep(root); @@ -479,6 +684,7 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) memset(mctp, 0, sizeof(*mctp)); mctp->sd = -1; + mctp->sd_aem = -1; mctp->resp_buf_size = 4096; mctp->resp_buf = malloc(mctp->resp_buf_size); @@ -487,13 +693,43 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) goto err_free_mctp; } + mctp->resp_buf_aem_size = 4096; + mctp->resp_buf_aem = malloc(mctp->resp_buf_aem_size); + if (!mctp->resp_buf_aem) { + errno_save = errno; + goto err_free_rspbuf; + } + mctp->net = netid; mctp->eid = eid; mctp->sd = ops.socket(AF_MCTP, SOCK_DGRAM, 0); if (mctp->sd < 0) { errno_save = errno; - goto err_free_rspbuf; + goto err_free_aem_rspbuf; + } + + //Skip this for now for test coverage + //because it messes with the test_peer SD + if (!(mctp->net == 0 && mctp->eid == 0)) { + mctp->sd_aem = ops.socket(AF_MCTP, SOCK_DGRAM, 0); + if (mctp->sd_aem < 0) { + errno_save = errno; + goto err_close_sd; + } + + memset(&addr, 0, sizeof(addr)); + addr.smctp_family = AF_MCTP; + addr.smctp_network = mctp->net; + addr.smctp_addr.s_addr = MCTP_ADDR_ANY;//mctp->eid; + addr.smctp_type = MCTP_TYPE_NVME | MCTP_TYPE_MIC; + addr.smctp_tag = MCTP_TAG_OWNER; + + if (bind(mctp->sd_aem, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + errno_save = errno; + close(mctp->sd_aem); + goto err_close_sd; + } } ep->transport = &nvme_mi_transport_mctp; @@ -508,13 +744,18 @@ nvme_mi_ep_t nvme_mi_open_mctp(nvme_root_t root, unsigned int netid, __u8 eid) return ep; +err_close_sd: + close(mctp->sd); +err_free_aem_rspbuf: + free(mctp->resp_buf_aem); err_free_rspbuf: free(mctp->resp_buf); err_free_mctp: free(mctp); err_close_ep: /* the ep->transport is not set yet, so this will not call back - * into nvme_mi_mctp_close() */ + * into nvme_mi_mctp_close() + */ nvme_mi_close(ep); errno = errno_save; return NULL; diff --git a/src/nvme/mi.c b/src/nvme/mi.c index 4640b0a92..be70ba4fd 100644 --- a/src/nvme/mi.c +++ b/src/nvme/mi.c @@ -21,6 +21,12 @@ #include "mi.h" #include "private.h" +#define NUM_ENABLES (256u) + +static int nvme_mi_get_async_message(nvme_mi_ep_t ep, + struct nvme_mi_aem_msg *aem_msg, + size_t *aem_msg_len); + static const int default_timeout = 1000; /* milliseconds; endpoints may override */ @@ -414,6 +420,59 @@ static int nvme_mi_verify_resp_mic(struct nvme_mi_resp *resp) return resp->mic != ~crc; } +int nvme_mi_async_read(nvme_mi_ep_t ep, struct nvme_mi_resp *resp) +{ + int rc = ep->transport->aem_read(ep, resp); + + if (nvme_mi_ep_has_quirk(ep, NVME_QUIRK_MIN_INTER_COMMAND_TIME)) + nvme_mi_record_resp_time(ep); + + if (rc && errno == EWOULDBLOCK) { + //Sometimes we might get owned tag data from the wrong endpoint. + //This isn't an error, but we shouldn't process it here + resp->data_len = 0;//No data to process + return 0; + } else if (rc) { + nvme_msg(ep->root, LOG_INFO, "transport failure\n"); + return rc; + } + + if (ep->transport->mic_enabled) { + rc = nvme_mi_verify_resp_mic(resp); + if (rc) { + nvme_msg(ep->root, LOG_WARNING, "crc mismatch\n"); + errno = EBADMSG; + return -1; + } + } + + //TODO: There's a bunch of overlap with the nvme_mi_submit. Maybe we make common helpers + + /* basic response checks */ + if (resp->hdr_len < sizeof(struct nvme_mi_msg_hdr)) { + nvme_msg(ep->root, LOG_DEBUG, + "Bad response header len: %zd\n", resp->hdr_len); + errno = EPROTO; + return -1; + } + + if (resp->hdr->type != NVME_MI_MSGTYPE_NVME) { + nvme_msg(ep->root, LOG_DEBUG, + "Invalid message type 0x%02x\n", resp->hdr->type); + errno = EPROTO; + return -1; + } + + if (!(resp->hdr->nmp & ~(NVME_MI_ROR_REQ << 7))) { + nvme_msg(ep->root, LOG_DEBUG, + "ROR value in response indicates a response\n"); + errno = EIO; + return -1; + } + + return 0; +} + int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req, struct nvme_mi_resp *resp) { @@ -429,11 +488,6 @@ int nvme_mi_submit(nvme_mi_ep_t ep, struct nvme_mi_req *req, return -1; } - if (req->data_len & 0x3) { - errno = EINVAL; - return -1; - } - if (resp->hdr_len < sizeof(struct nvme_mi_msg_hdr)) { errno = EINVAL; return -1; @@ -626,6 +680,28 @@ static int nvme_mi_control_parse_status(struct nvme_mi_resp *resp, __u16 *cpsr) return control_resp->status; } +static int nvme_mi_get_async_message(nvme_mi_ep_t ep, + struct nvme_mi_aem_msg *aem_msg, + size_t *aem_msg_len) +{ + struct nvme_mi_resp resp; + + memset(&resp, 0, sizeof(resp)); + resp.hdr = &aem_msg->hdr; + resp.hdr_len = sizeof(struct nvme_mi_msg_hdr); + resp.data = &aem_msg->occ_list_hdr; + resp.data_len = *aem_msg_len; + + int rc = nvme_mi_async_read(ep, &resp); + + if (rc) + return rc; + + *aem_msg_len = resp.data_len; + return 0; +} + + int nvme_mi_admin_xfer(nvme_mi_ctrl_t ctrl, struct nvme_mi_admin_req_hdr *admin_req, size_t req_data_size, @@ -1798,8 +1874,9 @@ int nvme_mi_mi_subsystem_health_status_poll(nvme_mi_ep_t ep, bool clear, return 0; } -int nvme_mi_mi_config_get(nvme_mi_ep_t ep, __u32 dw0, __u32 dw1, - __u32 *nmresp) +int nvme_mi_mi_config_set_get_ex(nvme_mi_ep_t ep, __u8 opcode, __u32 dw0, + __u32 dw1, void *data_out, size_t data_out_len, + void *data_in, size_t *data_in_len, __u32 *nmresp) { struct nvme_mi_mi_resp_hdr resp_hdr; struct nvme_mi_mi_req_hdr req_hdr; @@ -1810,17 +1887,21 @@ int nvme_mi_mi_config_get(nvme_mi_ep_t ep, __u32 dw0, __u32 dw1, memset(&req_hdr, 0, sizeof(req_hdr)); req_hdr.hdr.type = NVME_MI_MSGTYPE_NVME; req_hdr.hdr.nmp = (NVME_MI_ROR_REQ << 7) | (NVME_MI_MT_MI << 3); - req_hdr.opcode = nvme_mi_mi_opcode_configuration_get; + req_hdr.opcode = opcode; req_hdr.cdw0 = cpu_to_le32(dw0); req_hdr.cdw1 = cpu_to_le32(dw1); memset(&req, 0, sizeof(req)); req.hdr = &req_hdr.hdr; req.hdr_len = sizeof(req_hdr); + req.data = data_out; + req.data_len = data_out_len; memset(&resp, 0, sizeof(resp)); resp.hdr = &resp_hdr.hdr; resp.hdr_len = sizeof(resp_hdr); + resp.data = data_in; + resp.data_len = *data_in_len; rc = nvme_mi_submit(ep, &req, &resp); if (rc) @@ -1829,46 +1910,122 @@ int nvme_mi_mi_config_get(nvme_mi_ep_t ep, __u32 dw0, __u32 dw1, if (resp_hdr.status) return resp_hdr.status; - *nmresp = resp_hdr.nmresp[0] | - resp_hdr.nmresp[1] << 8 | - resp_hdr.nmresp[2] << 16; + *data_in_len = resp.data_len; + + if (nmresp) { + *nmresp = resp_hdr.nmresp[0] | + resp_hdr.nmresp[1] << 8 | + resp_hdr.nmresp[2] << 16; + } return 0; } -int nvme_mi_mi_config_set(nvme_mi_ep_t ep, __u32 dw0, __u32 dw1) +int nvme_mi_mi_config_get(nvme_mi_ep_t ep, __u32 dw0, __u32 dw1, + __u32 *nmresp) { - struct nvme_mi_mi_resp_hdr resp_hdr; - struct nvme_mi_mi_req_hdr req_hdr; - struct nvme_mi_resp resp; - struct nvme_mi_req req; - int rc; + size_t data_in_len = 0; - memset(&req_hdr, 0, sizeof(req_hdr)); - req_hdr.hdr.type = NVME_MI_MSGTYPE_NVME; - req_hdr.hdr.nmp = (NVME_MI_ROR_REQ << 7) | (NVME_MI_MT_MI << 3); - req_hdr.opcode = nvme_mi_mi_opcode_configuration_set; - req_hdr.cdw0 = cpu_to_le32(dw0); - req_hdr.cdw1 = cpu_to_le32(dw1); - - memset(&req, 0, sizeof(req)); - req.hdr = &req_hdr.hdr; - req.hdr_len = sizeof(req_hdr); - - memset(&resp, 0, sizeof(resp)); - resp.hdr = &resp_hdr.hdr; - resp.hdr_len = sizeof(resp_hdr); + return nvme_mi_mi_config_set_get_ex(ep, + nvme_mi_mi_opcode_configuration_get, + dw0, + dw1, + NULL, + 0, + NULL, + &data_in_len, + nmresp); +} - rc = nvme_mi_submit(ep, &req, &resp); +int nvme_mi_mi_config_set(nvme_mi_ep_t ep, __u32 dw0, __u32 dw1) +{ + size_t data_in_len = 0; + + return nvme_mi_mi_config_set_get_ex( + ep, + nvme_mi_mi_opcode_configuration_set, + dw0, + dw1, + NULL, + 0, + NULL, + &data_in_len, + NULL); +} + +int nvme_mi_mi_config_get_async_event(nvme_mi_ep_t ep, + __u8 *aeelver, + struct ae_supported_list_t *list, + size_t *list_num_bytes) +{ + + __u32 dw0 = NVME_MI_CONFIG_AE; + __u32 aeelvertemp = 0; + + int rc = nvme_mi_mi_config_set_get_ex( + ep, + nvme_mi_mi_opcode_configuration_get, + dw0, + 0, + NULL, + 0, + list, + list_num_bytes, + &aeelvertemp); if (rc) return rc; - if (resp_hdr.status) - return resp_hdr.status; + *aeelver = 0x000F & aeelvertemp; return 0; } +int nvme_mi_mi_config_set_async_event(nvme_mi_ep_t ep, + bool envfa, + bool empfa, + bool encfa, + __u8 aemd, + __u8 aerd, + struct ae_enable_list_t *enable_list, + size_t enable_list_length, + struct nvme_mi_ae_occ_list_hdr *occ_list, + size_t *occ_list_num_bytes) +{ + + __u32 dw0 = ((__u32)envfa << 26) | + ((__u32)empfa << 25) | + ((__u32)encfa << 24) | + ((__u32)aemd << 16) | + ((__u16) aerd << 8) | NVME_MI_CONFIG_AE; + + //Basic checks here on lengths + if (enable_list_length < sizeof(struct ae_enable_list_t) || + (sizeof(struct ae_enable_list_t) + + enable_list->hdr.numaee * sizeof(struct ae_enable_item_t) > enable_list_length) + ) { + errno = EINVAL; + return -1; + } + + //Some very baseic header checks + if (enable_list->hdr.aeelhl != sizeof(struct ae_enable_list_header_t) || + enable_list->hdr.aeelver != 0) { + errno = EINVAL; + return -1; + } + + return nvme_mi_mi_config_set_get_ex(ep, + nvme_mi_mi_opcode_configuration_set, + dw0, + 0, + enable_list, + enable_list_length, + occ_list, + occ_list_num_bytes, + NULL); +} + + void nvme_mi_close(nvme_mi_ep_t ep) { struct nvme_mi_ctrl *ctrl, *tmp; @@ -1974,3 +2131,434 @@ const char *nvme_mi_status_to_string(int status) return s; } + +static int validate_enabled_list(struct ae_supported_list_t *list, size_t len) +{ + if (list->hdr.aeslver != 0) { + errno = EPROTO; + return -1; + } else if (list->hdr.aeslhl != sizeof(struct ae_supported_list_t)) { + errno = EPROTO; + return -1; + } else if (list->hdr.aest < len || + list->hdr.aest != + list->hdr.aeslhl + list->hdr.numaes * sizeof(struct ae_supported_item_t)) { + errno = EPROTO; + return -1; + } + return 0; +} +static int validate_occ_list_update_ctx( + struct nvme_mi_ae_occ_list_hdr *occ_header, + size_t len, + struct nvme_mi_aem_ctx *ctx, + bool check_generation_num) +{ + //Make sure header fields have valid data + if (len < sizeof(*occ_header)) { + errno = EPROTO; + goto err_cleanup; + } else if (occ_header->aelver != 0 || + occ_header->aeolhl != sizeof(*occ_header)) { + //Make sure header is the right version and length + errno = EPROTO; + goto err_cleanup; + } else if (occ_header->aeolli.aeoltl > len) { + //Full length is bigger than the data that was received + errno = EPROTO; + goto err_cleanup; + } else if (check_generation_num && + ctx->last_generation_num == (int) occ_header->aemti.aemgn) { + //This is a duplicate and shouldn't be parsed. + //Let's just act like there's no updates + occ_header->numaeo = 0; + } else if (check_generation_num) { + ctx->last_generation_num = occ_header->aemti.aemgn; + } + + //Header is fine. Let's go through the data + //First, we should update our context appropriately + ctx->occ_header = occ_header; + + //Data starts after header + ctx->list_current = (struct nvme_mi_ae_occ_data *) (occ_header + 1); + ctx->list_current_index = 0; + ctx->list_start = ctx->list_current; + + struct nvme_mi_ae_occ_data *current = ctx->list_current; + size_t bytes_so_far = ctx->occ_header->aeolhl; + + for (int i = 0; i < occ_header->numaeo; i++) { + //Validate this item + if (current->aelhlen != sizeof(*current)) { + errno = EPROTO; + goto err_cleanup; + } else if (!ctx->callbacks.enabled[current->aeoui.aeoi]) { + //This is unexpected as this AE shouldn't be enabled + errno = EPROTO; + goto err_cleanup; + } + + //Okay, check data lengths, including this header and the specific data(s) + bytes_so_far += sizeof(*current) + current->aeosil + current->aeovsil; + if (bytes_so_far > occ_header->aeolli.aeoltl) { + errno = EPROTO; + goto err_cleanup; + } + } + + return 0; + +err_cleanup: + return -1; +} + +int nvme_mi_aem_get_fd(nvme_mi_ep_t ep) +{ + if (!ep || !ep->aem_ctx || !ep->transport) + return -1; + + return ep->transport->aem_fd(ep); +} + +static void reset_list_info(struct nvme_mi_aem_ctx *ctx) +{ + //Reset context information + ctx->list_current_index = -1; + ctx->list_start = NULL; + ctx->list_current = NULL; + ctx->occ_header = NULL; +} + +static int aem_sync(nvme_mi_ep_t ep, + bool envfa, + bool empfa, + bool encfa, + __u8 aemd, + __u8 aerd, + struct ae_enable_item_t *items, + __u8 num_items, + struct nvme_mi_ae_occ_list_hdr *resp, + size_t *resp_len +) +{ + size_t msg_len = + sizeof(struct ae_enable_list_header_t) + + num_items * sizeof(struct ae_enable_item_t); + + struct ae_enable_list_header_t *request = malloc(msg_len); + + if (!request) + return -1; + + request->aeelhl = sizeof(struct ae_enable_list_header_t); + request->numaee = num_items; + request->aeelver = 0; + request->aeetl = msg_len; + + //Data follows header + struct ae_enable_item_t *msg_items = (struct ae_enable_item_t *)(request + 1); + + //Let's be explicit about what's enabled and what's not + for (int i = 0; i < num_items; i++) { + msg_items[i] = items[i]; + msg_items[i].aeel = sizeof(msg_items[i]); + } + + //Send it + int rc = nvme_mi_mi_config_set_async_event(ep, + envfa, + empfa, + encfa, + aemd, + aerd, + (struct ae_enable_list_t *)request, + msg_len, + resp, + resp_len); + + free(request); + return rc; +} + +static int aem_disable_enabled(nvme_mi_ep_t ep) +{ + bool already_enabled[NUM_ENABLES] = {false}; + uint8_t response_buffer[4096] = {0}; + size_t response_len = sizeof(response_buffer); + struct nvme_mi_ae_occ_list_hdr *response = + (struct nvme_mi_ae_occ_list_hdr *)response_buffer; + + // First, let's figure out if anything is already enabled that we need to + // disable + int rc = nvme_mi_aem_get_enabled(ep, already_enabled, sizeof(already_enabled)); + + if (rc) + return rc; + + int sync_data_count = 0; + + //Add the enabled items to the list of things to disable + struct ae_enable_item_t sync_data[NUM_ENABLES] = {0}; + + for (int i = 0; i < NUM_ENABLES; i++) { + if (already_enabled[i]) { + sync_data[sync_data_count].aeei.aeeid = i; + sync_data[sync_data_count].aeei.aee = false; + sync_data_count++; + } + } + + rc = aem_sync(ep, false, false, false, 1, 0, + sync_data, sync_data_count, response, &response_len); + + if (rc) + return rc; + + //Now, allow a purge of the aem fd because we could have + //received some events during this process + rc = ep->transport->aem_purge(ep); + + return rc; +} + +int nvme_mi_aem_enable(nvme_mi_ep_t ep, + bool envfa, + bool empfa, + bool encfa, + __u8 aemd, + __u8 aerd, + struct nvme_mi_aem_callbacks *callbacks, + void *userdata) +{ + if (!ep || !callbacks || !callbacks->aem_handler) + return -1; + + int rc = 0; + int sync_data_count = 0; + struct ae_enable_item_t sync_data[NUM_ENABLES] = {0}; + + uint8_t response_buffer[4096] = {0}; + size_t response_len = sizeof(response_buffer); + struct nvme_mi_ae_occ_list_hdr *response = + (struct nvme_mi_ae_occ_list_hdr *)response_buffer; + + //It's possible we're already enabled + if (!ep->aem_ctx) + ep->aem_ctx = malloc(sizeof(*ep->aem_ctx)); + if (!(ep->aem_ctx)) + return -1; + + memset(ep->aem_ctx, 0, sizeof(*ep->aem_ctx)); + ep->aem_ctx->last_generation_num = -1;//Invalid + reset_list_info((ep->aem_ctx)); + ep->aem_ctx->callbacks = *callbacks; + + rc = aem_disable_enabled(ep); + if (rc) + goto cleanup_ctx; + + //Now, let's do a fresh enable of what's asked + for (int i = 0; i < NUM_ENABLES; i++) { + if (callbacks->enabled[i]) { + sync_data[sync_data_count].aeei.aeeid = i; + sync_data[sync_data_count].aeei.aee = true; + sync_data_count++; + } + } + + rc = aem_sync(ep, envfa, empfa, encfa, aemd, aerd, + sync_data, sync_data_count, response, &response_len); + + if (rc) + goto cleanup_ctx; + + //Parse the response and fire events + rc = validate_occ_list_update_ctx(response, + response_len, + ep->aem_ctx, + false /*generation # shouldn't matter*/); + if (rc) + goto cleanup_ctx; + + if (response->numaeo) { + //Return value unused here + callbacks->aem_handler(ep, response->numaeo, userdata); + } + +cleanup_ctx: + // Clear these because they won't point to valid memory anymore + reset_list_info(ep->aem_ctx); + + if (rc) { + free(ep->aem_ctx); + ep->aem_ctx = NULL; + } + return rc; +} + +int nvme_mi_aem_get_enabled(nvme_mi_ep_t ep, + bool enabled[256], + size_t len) +{ + if (!ep || !enabled || len != 256) + return -1; + + int rc = 0; + + unsigned char aeelver; + size_t ae_list_bytes = NUM_ENABLES * sizeof(struct ae_supported_list_t); + struct ae_supported_list_t *enabled_list = malloc(ae_list_bytes); + + if (!enabled_list) + return -1; + + rc = nvme_mi_mi_config_get_async_event( + ep, &aeelver, enabled_list, &ae_list_bytes); + if (rc) + goto cleanup; + + rc = validate_enabled_list(enabled_list, ae_list_bytes); + if (rc) + goto cleanup; + + memset(enabled, 0, 256 * sizeof(*enabled)); + + struct ae_enable_item_t *items = (struct ae_enable_item_t *)(enabled_list + 1); + + for (int i = 0; i < enabled_list->hdr.numaes; i++) + enabled[items[i].aeei.aeeid] = items[i].aeei.aee; + +cleanup: + free(enabled_list); + return rc; +} + +int nvme_mi_aem_disable(nvme_mi_ep_t ep) +{ + if (!ep) + return -1; + + int rc = aem_disable_enabled(ep); + + if (ep->aem_ctx) + free(ep->aem_ctx); + ep->aem_ctx = NULL; + + return rc; +} + +/*When inside a aem_handler, call with the aem_ctx and struct will be populated with next + *event information. Will return NULL when end of parsing (or error) is occurred. + *spec_info and vend_spec_info must be copied to persist as they will not be valid after + *the aem_handler has returned. + */ +struct nvme_mi_event *nvme_mi_aem_get_next_event(nvme_mi_ep_t ep) +{ + if (!ep || !ep->aem_ctx || + !ep->aem_ctx->list_current || + ep->aem_ctx->list_current_index == -1 || + !ep->aem_ctx->occ_header) { + return NULL; + } + + if (ep->aem_ctx->occ_header->numaeo < ep->aem_ctx->list_current_index) + return NULL; + + struct nvme_mi_aem_ctx *aem_ctx = ep->aem_ctx; + struct nvme_mi_ae_occ_data *current = aem_ctx->list_current; + + aem_ctx->event.aeoi = current->aeoui.aeoi; + aem_ctx->event.aessi = current->aeoui.aessi; + aem_ctx->event.aeocidi = current->aeoui.aeocidi; + aem_ctx->event.spec_info_len = current->aeosil; + aem_ctx->event.vend_spec_info_len = current->aeovsil; + //Now the pointers + aem_ctx->event.spec_info = ((uint8_t *)current + current->aelhlen); + aem_ctx->event.vend_spec_info = + ((uint8_t *)aem_ctx->event.spec_info + aem_ctx->event.spec_info_len); + + //Let's grab the next item (if there is any). + aem_ctx->list_current_index++; + aem_ctx->list_current = + (struct nvme_mi_ae_occ_data *) + ((uint8_t *)aem_ctx->event.vend_spec_info + aem_ctx->event.vend_spec_info_len); + + return &aem_ctx->event; +} + +/* POLLIN has indicated events. This function reads and processes them. + * A callback will likely be invoked. + */ +int nvme_mi_aem_process(nvme_mi_ep_t ep, void *userdata) +{ + int rc = 0; + uint8_t response_buffer[4096]; + struct nvme_mi_aem_msg *response = (struct nvme_mi_aem_msg *)response_buffer; + size_t response_len = sizeof(response_buffer) - sizeof(struct nvme_mi_aem_msg); + + if (!ep || !ep->aem_ctx) + return -1; + + memset(response_buffer, 0, sizeof(response_buffer)); + + //Reset context information + reset_list_info(ep->aem_ctx); + + rc = nvme_mi_get_async_message(ep, response, &response_len); + if (rc) + goto cleanup; + + if (!response_len) { + //If no error and response length zero, we've likely received an owned + //tag message from a different endpoint than this path is responsible + //for monitoring. + goto cleanup; + } + + //Parse the response and fire events + rc = validate_occ_list_update_ctx(&response->occ_list_hdr, + response_len, + ep->aem_ctx, + true /*Ensure unique generation number*/); + if (rc) + goto cleanup; + + if (response->occ_list_hdr.numaeo) { + enum nvme_mi_aem_handler_next_action action = + ep->aem_ctx->callbacks.aem_handler(ep, + response->occ_list_hdr.numaeo, + userdata); + + reset_list_info(ep->aem_ctx); + + if (action == NVME_MI_AEM_HNA_ACK) { + response_len = sizeof(response_buffer); + + rc = aem_ack(ep, &response->occ_list_hdr, &response_len); + if (rc) + goto cleanup; + + //The Ack is not guaranteed to have data + if (response_len && response->occ_list_hdr.numaeo) { + rc = validate_occ_list_update_ctx(&response->occ_list_hdr, + response_len, + ep->aem_ctx, + true); + //Callbacks based on ack + if (rc == 0 && response->occ_list_hdr.numaeo) { + //Return value unused here + ep->aem_ctx->callbacks.aem_handler(ep, + response->occ_list_hdr.numaeo, + userdata); + } + } + } + } else { + //This is unexpected unless we have duplicates. But those shouldn't be acked + } + +cleanup: + reset_list_info(ep->aem_ctx); + return rc; +} diff --git a/src/nvme/mi.h b/src/nvme/mi.h index 825de2748..77873d8f7 100644 --- a/src/nvme/mi.h +++ b/src/nvme/mi.h @@ -89,6 +89,7 @@ #include "types.h" #include "tree.h" +#include /** * NVME_MI_MSGTYPE_NVME - MCTP message type for NVMe-MI messages. @@ -207,6 +208,7 @@ struct nvme_mi_msg_resp { __u8 rsvd0[3]; }; + /** * enum nvme_mi_mi_opcode - Operation code for supported NVMe-MI commands. * @nvme_mi_mi_opcode_mi_data_read: Read NVMe-MI Data Structure @@ -281,7 +283,7 @@ enum nvme_mi_dtyp { * status poll. Only for Set ops. * @NVME_MI_CONFIG_MCTP_MTU: MCTP maximum transmission unit size of port * specified in dw 0 - * + * @NVME_MI_CONFIG_AE: Asynchronous Events configuration * Configuration parameters for the MI Get/Set Configuration commands. * * See &nvme_mi_mi_config_get() and &nvme_mi_config_set(). @@ -290,6 +292,7 @@ enum nvme_mi_config_id { NVME_MI_CONFIG_SMBUS_FREQ = 0x1, NVME_MI_CONFIG_HEALTH_STATUS_CHANGE = 0x2, NVME_MI_CONFIG_MCTP_MTU = 0x3, + NVME_MI_CONFIG_AE = 0x4, }; /** @@ -307,6 +310,173 @@ enum nvme_mi_config_smbus_freq { NVME_MI_CONFIG_SMBUS_FREQ_1MHz = 0x3, }; +/* Asynchronous Event Message definitions*/ + +/** + * struct ae_supported_list_header_t - Asyncronous Event Supported List Header. + * @numaes: Number of AE supported data structures that follow the header + * @aeslver: AE Supported List Version + * @aest: AE Supported list length (including this header) + * @aeslhl: AE Supported list header length + * + * This header preceeds a number, (&numaes), of AE supported data structures + */ +struct ae_supported_list_header_t { + __u8 numaes; //Should be zero + __u8 aeslver; + __le16 aest; + __u8 aeslhl; //Should be 5 +} __attribute__((packed)); +_Static_assert(sizeof(struct ae_supported_list_header_t) == 5, + "size_of_ae_supported_list_header_t_is_not_5_bytes"); + + +/** + * struct ae_supported_item_t - AE Supported List Item + * @aesl: AE supported list item length + * @aesi: AE supported info + * + * Following this header should be hdr.numaes entries of ae_supported_item_t structures + */ +struct ae_supported_item_t { + __u8 aesl;//Length of this item. Set to 3 + struct { + __u16 aeis : 8; //Identifier of supported ae + __u16 reserved : 7; + __u16 aese : 1; //AE Support Enabled + } __attribute__((packed)) aesi; +} __attribute__((packed)); +_Static_assert(sizeof(struct ae_supported_item_t) == 3, + "size_of_ae_supported_item_t_is_not_3_bytes"); + +/** + * struct ae_supported_list_t - AE Supported List received with GET CONFIG Asynchronous Event + * @hdr: AE supported list header + * + * Following this header should be hdr.numaes entries of ae_supported_item_t structures + */ +struct ae_supported_list_t { + struct ae_supported_list_header_t hdr; +} __attribute__((packed)); + +/** + * struct ae_enable_item_t - AE Enabled item entry + * @aeel: AE Enable Length (length of this structure which is 3) + * @aeei: AE Enable Info + * + * Following this header should be hdr.numaes entries of ae_supported_item_t structures + */ +struct ae_enable_item_t { + __u8 aeel; + struct{ + __u16 aeeid : 8; //AE identifier + __u16 reserved : 7; + __u16 aee : 1; //AE enabled bit + } __attribute__((packed)) aeei; +}; +_Static_assert(sizeof(struct ae_enable_item_t) == 3, + "size_of_ae_enable_item_t_is_not_3_bytes"); + +/** + * struct ae_enable_list_header_t - AE Enable list header + * @numaee: Number of AE enable items following the header + * @aeelver: Version of the AE enable list (zero) + * @aeetl: Total length of the AE enable list including header and items + * @aeelhl: Header length of this header (5) + */ +struct ae_enable_list_header_t { + __u8 numaee; + __u8 aeelver; + __le16 aeetl; + __u8 aeelhl; +} __attribute__((packed)); +_Static_assert(sizeof(struct ae_enable_list_header_t) == 5, + "size_of_ae_enable_list_header_t_is_not_5_bytes"); + +/** + * struct ae_enable_list_t - AE enable list sent with SET CONFIG Asyncronous Event + * @hdr: AE enable list header + * + * Following this header should be hdr.numaee entries of ae_enable_item_t structures + */ +struct ae_enable_list_t { + struct ae_enable_list_header_t hdr; +} __attribute__((packed)); + +/** + * struct nvme_mi_ae_occ_data - AEM Message definition. + * @aelhlen: AE Occurrence Header Length + * @aeosil: AE Occurrence Specific Info Length + * @aeovsil: AE Occurrence Vendor Specific Info Length + * @aeoui: AE Occurrence Unique ID made up of other subfields + * + * A single entry of ae occurrence data that comes with an nvme_aem_msg. + * Following this structure is variable length AEOSI (occurrence specific + * info) and variable length AEVSI (vendor specific info). The length of + * AEOSI is specified by aeosil and the length of AEVSI is specified by + * AEVSI. Neither field is mandatory and shall be omitted if their length + * parameter is set to zero. + */ +struct nvme_mi_ae_occ_data { + __u8 aelhlen; + __u8 aeosil; + __u8 aeovsil; + struct { + __u8 aeoi; + __u32 aeocidi; + __u8 aessi; + } __attribute__((packed)) aeoui; +} __attribute__((packed)); +_Static_assert(sizeof(struct nvme_mi_ae_occ_data) == 9, + "size_of_nvme_mi_ae_occ_data_is_not_9_bytes"); + +/** + * struct nvme_mi_ae_occ_list_hdr - AE occurrence list header + * @numaeo: Number of AE Occurrence Data Structures + * @aelver: AE Occurrence List Version Number + * @aeolli: AE Occurrence List Length Info (AEOLLI) + * @aeolhl: AE Occurrence List Header Length (shall be set to 7) + * @aemti: AEM Transmission Info + * + * The header for the occurrence list. numaeo defines how many + * nvme_mi_ae_occ_data structures (including variable payaloads) are included. + * Following this header is each of the numaeo occurrence data structures. + */ +struct nvme_mi_ae_occ_list_hdr { + __u8 numaeo; + __u8 aelver; + struct { + unsigned int aeoltl: 23; + unsigned int overflow: 1; + } __attribute__((packed)) aeolli; + __u8 aeolhl; + struct { + unsigned int aemrc: 3; + unsigned int aemgn: 5; + } __attribute__((packed)) aemti; +} __attribute__((packed)); +_Static_assert(sizeof(struct nvme_mi_ae_occ_list_hdr) == 7, + "size_of_nvme_mi_ae_occ_list_hdr_is_not_7_bytes"); + + +/** + * struct nvme_mi_aem_msg - AEM Message definition. + * @hdr: the general response message header + * @occ_list_hdr: ae occurrence list header. + * + * Every ae message will start with one of these. The occ_list_hder wil define + * information about how many ae occ data entries are included. Each entry is + * defined by the nvme_mi_ae_occ_data structure which will follow the + * occ_list_hdr. Each nvme_mi_ae_occ_data structure has a fixed length header + * but a variable length payload ude to occurrence specific and vendor specific + * info. For this reason, do not index the nvme_mi_ae_occ data structures by + * array or fixed offset. + */ +struct nvme_mi_aem_msg { + struct nvme_mi_msg_hdr hdr; + struct nvme_mi_ae_occ_list_hdr occ_list_hdr; +} __attribute__((packed)); + /* Admin command definitions */ /** @@ -1020,6 +1190,89 @@ static inline int nvme_mi_mi_config_set_mctp_mtu(nvme_mi_ep_t ep, __u8 port, return nvme_mi_mi_config_set(ep, dw0, mtu); } + +/** + * nvme_mi_mi_config_get_async_event - get configuration: Asynchronous Event + * @ep: endpoint for MI communication + * @aeelver: Asynchronous Event Enable List Version Number + * @list: AE Supported list header and list contents + * @list_num_bytes: number of bytes in the list header and contents buffer. + * This will be populated with returned size of list and contents if successful. + * + * Performs a MI Configuration Get, to query the current enable Asynchronous + * Events. On success, populates @aeelver and the @list with current info, + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise.. + */ +int nvme_mi_mi_config_get_async_event(nvme_mi_ep_t ep, + __u8 *aeelver, + struct ae_supported_list_t *list, + size_t *list_num_bytes); + +/** + * nvme_mi_mi_config_set_async_event - set configuration: Asynchronous Event + * @ep: endpoint for MI communication + * @envfa: Enable SR-IOV Virtual Functions AE + * @empfa: Enable SR-IOV Physical Functions AE + * @encfa: Enable PCI Functions AE. + * @aemd: AEM Delay Interval (for Sync only) + * @aerd: AEM Retry Delay (for Sync only; time in 100s of ms) + * @enable_list: ae_enable_list_t structure containing header and items + * of events to be enabled or disabled. This is taken as a delta change + * from the current configuration. + * @enable_list_length: Length of the enable_list in bytes including header and data. + * Meant to catch overrun issues. + * @occ_list: Pointer to populate with the occurrence list (header and data) + * @occ_list_num_bytes: Total length of provided occ_list buffer in bytes. Will be + * updated with received size if successful + * + * + * Performs a MI Configuration Set, to ACK (sent after an AEM) or Sync (at anytime to enable + * or disable Asynchronous Events). + * + * On success, populates @occ_list. See TP6035a for details on how occ_list is populated in + * ACK versus Sync conditions + * + * Return: The nvme command status if a response was received (see + * &enum nvme_status_field) or -1 with errno set otherwise.. + */ +int nvme_mi_mi_config_set_async_event(nvme_mi_ep_t ep, + bool envfa, + bool empfa, + bool encfa, + __u8 aemd, + __u8 aerd, + struct ae_enable_list_t *enable_list, + size_t enable_list_length, + struct nvme_mi_ae_occ_list_hdr *occ_list, + size_t *occ_list_num_bytes); + +static inline int aem_ack(nvme_mi_ep_t ep, + struct nvme_mi_ae_occ_list_hdr *occ_list, + size_t *occ_list_num_bytes) +{ + //An AEM Ack is defined as a SET CONFIG AE with no AE enable items + struct ae_enable_list_t list = {0}; + + list.hdr.aeelhl = sizeof(struct ae_enable_list_header_t); + list.hdr.aeelver = 0; + list.hdr.aeetl = sizeof(struct ae_enable_list_header_t); + list.hdr.numaee = 0; + + return nvme_mi_mi_config_set_async_event(ep, + false, + false, + false, + 0, + 0, + &list, + sizeof(list), + occ_list, + occ_list_num_bytes); + +} + /* Admin channel functions */ /** @@ -3086,4 +3339,157 @@ int nvme_mi_admin_format_nvm(nvme_mi_ctrl_t ctrl, int nvme_mi_admin_sanitize_nvm(nvme_mi_ctrl_t ctrl, struct nvme_sanitize_nvm_args *args); +/** + * enum nvme_mi_aem_handler_next_action - Next action for the AEM state machine handler + * @NVME_MI_AEM_HNA_ACK: Send an ack for the AEM + * @NVME_MI_AEM_HNA_NONE: No further action + * + * Used as return value for the AE callback generated when calling nvme_mi_aem_process + */ +enum nvme_mi_aem_handler_next_action { + NVME_MI_AEM_HNA_ACK, + NVME_MI_AEM_HNA_NONE, +}; + +/** + * struct nvme_mi_event - AE event information structure + * @aeoi: Event identifier + * @aessi: Event occurrence scope info + * @aeocidi: Event occurrence scope ID info + * @spec_info: Specific info buffer + * @spec_info_len: Length of specific info buffer + * @vend_spec_info: Vendor specific info buffer + * @vend_spec_info_len: Length of vendor specific info buffer + * + * Application callbacks for nvme_mi_aem_process will be able to call + * nvme_mi_aem_get_next_event which will return a pointer to such an identifier + * for the next event the application should parse + */ +struct nvme_mi_event { + uint8_t aeoi; + uint8_t aessi; + uint32_t aeocidi; + void *spec_info; + size_t spec_info_len; + void *vend_spec_info; + size_t vend_spec_info_len; +}; + +/** + * nvme_mi_aem_get_next_event() - Get details for the next event to parse + * @ep: The endpoint with the event + * + * When inside a aem_handler, call this and a returned struct pointer + * will provide details of event information. Will return NULL when end of parsing is occurred. + * spec_info and vend_spec_info must be copied to persist as they will not be valid + * after the handler_next_action has returned. + * + * Return: Pointer no next nvme_mi_event or NULL if this is the last one + */ +struct nvme_mi_event *nvme_mi_aem_get_next_event(nvme_mi_ep_t ep); + + +/** + * struct nvme_mi_aem_callbacks - Provided for nvme_mi_aem_enable + * @aem_handler: Callback function for application processing of events + * @enabled: Array indicating which AE should be enabled on the endpoint + * + * Application callbacks for nvme_mi_aem_process will be able to call + * nvme_mi_aem_get_next_event which will return a pointer to such an identifier + * for the next event the application should parse + */ +struct nvme_mi_aem_callbacks { + /* + * This is called from inside nvme_mi_process when a payload has been validated and + * can be parsed. The application may call nvme_mi_aem_get_next_event from inside + * the callback to parse event data. + */ + enum nvme_mi_aem_handler_next_action (*aem_handler)( + nvme_mi_ep_t ep, + size_t num_events, + void *userdata); + + bool enabled[256]; +}; + +/** + * nvme_mi_aem_get_fd() - Returns the pollable fd for AEM data available + * @ep: The endpoint being monitored for asynchronous data + * + * This populated structure can be polled from the application to understand if + * a call to nvme_mi_aem_process() is required (when a poll returns > 0). + * + * Return: The fd value or -1 if error + */ +int nvme_mi_aem_get_fd(nvme_mi_ep_t ep); + +/** + * nvme_mi_aem_enable() - Enable AE on the provided endpoint + * @ep: Endpoint to enable AEs + * @envfa: Enable SR-IOV virtual functions AE + * @empfa: Enable SR-IOV physical functions AE + * @encfa: Enable PCIe functions AE + * @aemd: AEM Delay (time in seconds from when event happens to AEM being batched and sent) + * @aerd: AEM Retry Delay (time in 100s of ms between AEM retries from the endpoint) + * @callbacks: AE configuraiton including which events are enabled and the callback function + * @userdata: Application provided context pointer for callback function + * + * This function is called to enable AE on the endpoint. Endpoint will provide initial state + * (if any) of enabled AEs and application can parse those via the aem_handler fn pointer in + * callbacks. Thes can be obtained in the callback by calling nvme_mi_aem_get_next_event(). + * + * Application should poll the fd that can be obtained from nvme_mi_aem_get_fd and then call + * nvme_mi_aem_process() when poll() indicates data available. + * + * A call to nvme_mi_aem_process() will grab AEM data and call the aem_handler fn pointer. + * At this point the application can call nvme_mi_aem_get_next_event() to get information for + * each triggered event. + * + * Return: 0 is a success, nonzero is an error and errno may be read for further details + */ +int nvme_mi_aem_enable(nvme_mi_ep_t ep, + bool envfa, + bool empfa, + bool encfa, + __u8 aemd, + __u8 aerd, + struct nvme_mi_aem_callbacks *callbacks, + void *userdata); + + +/** + * nvme_mi_aem_get_enabled() - Return information on which AEs are enabled + * @ep: Endpoint to check enabled status + * @enabled: buffer indexed by AE event ID of enabled state + * @len: length of enabled. Must be 256 bytes. + * + * Return: 0 is a success, nonzero is an error and errno may be read for further details + */ +int nvme_mi_aem_get_enabled(nvme_mi_ep_t ep, + bool enabled[256], + size_t len); + +/** + * nvme_mi_aem_disable() - Disable AE on the provided endpoint + * @ep: Endpoint to disable AEs + * + * Return: 0 is a success, nonzero is an error and errno may be read for further details + */ +int nvme_mi_aem_disable(nvme_mi_ep_t ep); + +/** + * nvme_mi_aem_process() - Process AEM on the provided endpoint + * @ep: Endpoint to process + * @userdata: Application provided context pointer for callback function + * + * Call this if poll() indicates data is available on the fd provided by nvme_mi_aem_get_fd() + * + * This will call the fn pointer, aem_handler, provided with nvme_mi_aem_callbacks and the + * application can call nvme_mi_aem_get_next_event() from within this callback to get + * aem event data. The callback function should return NVME_MI_AEM_HNA_ACK for normal operation. + * + * Return: 0 is a success, nonzero is an error and errno may be read for further details + */ +int nvme_mi_aem_process(nvme_mi_ep_t ep, void *userdata); + #endif /* _LIBNVME_MI_MI_H */ diff --git a/src/nvme/private.h b/src/nvme/private.h index 48ddedc2a..488f81f42 100644 --- a/src/nvme/private.h +++ b/src/nvme/private.h @@ -244,6 +244,20 @@ struct nvme_mi_transport { void (*close)(struct nvme_mi_ep *ep); int (*desc_ep)(struct nvme_mi_ep *ep, char *buf, size_t len); int (*check_timeout)(struct nvme_mi_ep *ep, unsigned int timeout); + int (*aem_fd)(struct nvme_mi_ep *ep); + int (*aem_read)(struct nvme_mi_ep *ep, + struct nvme_mi_resp *resp); + int (*aem_purge)(struct nvme_mi_ep *ep); +}; + +struct nvme_mi_aem_ctx { + struct nvme_mi_ae_occ_list_hdr *occ_header; + struct nvme_mi_ae_occ_data *list_start; + struct nvme_mi_ae_occ_data *list_current; + int list_current_index; + struct nvme_mi_aem_callbacks callbacks; + int last_generation_num; + struct nvme_mi_event event; }; /* quirks */ @@ -270,6 +284,8 @@ struct nvme_mi_ep { unsigned int inter_command_us; struct timespec last_resp_time; bool last_resp_time_valid; + + struct nvme_mi_aem_ctx *aem_ctx; }; struct nvme_mi_ctrl { diff --git a/test/mi.c b/test/mi.c index 6126d8795..677f4fdc5 100644 --- a/test/mi.c +++ b/test/mi.c @@ -95,6 +95,10 @@ static const struct nvme_mi_transport test_transport = { .submit = test_transport_submit, .close = test_transport_close, .desc_ep = test_transport_desc_ep, + //TO DO: + .aem_fd = NULL, + .aem_purge = NULL, + .aem_read = NULL, }; static void test_set_transport_callback(nvme_mi_ep_t ep, test_submit_cb cb,