From 9aa13e072d6e0bd5eaddf06c782d9c2c5877d73c Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 10 Oct 2024 16:18:49 +0000 Subject: [PATCH 01/34] DAOS-15682 dfuse: Fail on concurrent read. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 43ce9328230..063ff27c3b0 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -270,7 +270,7 @@ chunk_cb(struct dfuse_event *ev) } } -/* Submut a read to dfs. +/* Submit a read to dfs. * * Returns true on success. */ @@ -408,6 +408,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) if (cd->complete) { ev = cd->ev; } else { + D_ASSERT(cd->reqs[slot] == 0); cd->reqs[slot] = req; cd->ohs[slot] = oh; } From 1d4019f1dd183809b4e9e0d8573893ff86d27da1 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 11 Oct 2024 10:34:52 +0000 Subject: [PATCH 02/34] Try and fix issue. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 42 +++++++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 063ff27c3b0..e8bcc247cdd 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -28,15 +28,10 @@ dfuse_cb_read_complete(struct dfuse_event *ev) } } - if (ev->de_len == 0) { + if (ev->de_len == 0) DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, ev->de_req_position + ev->de_req_len - 1); - - DFUSE_REPLY_BUFQ(oh, ev->de_req, ev->de_iov.iov_buf, ev->de_len); - D_GOTO(release, 0); - } - - if (ev->de_len == ev->de_req_len) + else if (ev->de_len == ev->de_req_len) DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", ev->de_req_position, ev->de_req_position + ev->de_req_len - 1); else @@ -146,6 +141,7 @@ struct read_chunk_data { struct dfuse_event *ev; fuse_req_t reqs[8]; struct dfuse_obj_hdl *ohs[8]; + bool slot_done[8]; d_list_t list; uint64_t bucket; struct dfuse_eq *eqt; @@ -226,6 +222,7 @@ chunk_cb(struct dfuse_event *ev) do { int i; + req = 0; D_MUTEX_LOCK(&rc_lock); @@ -239,8 +236,9 @@ chunk_cb(struct dfuse_event *ev) cd->complete = true; for (i = 0; i < 8; i++) { if (cd->reqs[i]) { - req = cd->reqs[i]; - cd->reqs[i] = 0; + req = cd->reqs[i]; + cd->reqs[i] = 0; + cd->slot_done[i] = true; break; } } @@ -396,6 +394,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) rcb = chunk_fetch(req, oh, cd, slot); } else { struct dfuse_event *ev = NULL; + bool sd = false; /* Now check if this read request is complete or not yet, if it isn't then just * save req in the right slot however if it is then reply here. After the call to @@ -407,10 +406,27 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) D_MUTEX_LOCK(&rc_lock); if (cd->complete) { ev = cd->ev; + if (!cd->slot_done[slot]) { + /* DAOS-16686: Reply to each slot more than once if requested, but + * only track the first reply for knowing when to free the buffer. + */ + cd->slot_done[slot] = true; + sd = false; + DFUSE_TRA_WARNING(oh, "concurrent read, met"); + } } else { - D_ASSERT(cd->reqs[slot] == 0); - cd->reqs[slot] = req; - cd->ohs[slot] = oh; + if (cd->reqs[slot] != 0) { + /* Handle concurrent reads of the same slot, this wasn't expected + * but can happen so for now reject it at this level and have read + * perform a seperate I/O for this slot. + * TODO: Handle DAOS-16686 here. + */ + rcb = false; + DFUSE_TRA_WARNING(oh, "concurrent read, un-met"); + } else { + cd->reqs[slot] = req; + cd->ohs[slot] = oh; + } } D_MUTEX_UNLOCK(&rc_lock); @@ -425,7 +441,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) position + K128 - 1); DFUSE_REPLY_BUFQ(oh, req, ev->de_iov.iov_buf + (slot * K128), K128); } - if (atomic_fetch_add_relaxed(&cd->exited, 1) == 7) { + if (sd && atomic_fetch_add_relaxed(&cd->exited, 1) == 7) { d_slab_release(cd->eqt->de_read_slab, cd->ev); D_FREE(cd); } From 122faf03f426a4308b6b3229903f3b8a8790feef Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 11 Oct 2024 11:59:12 +0000 Subject: [PATCH 03/34] First stab at a fix. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 9 ++++ src/client/dfuse/dfuse_core.c | 4 ++ src/client/dfuse/ops/read.c | 87 ++++++++++++++++++++++++++--------- 3 files changed, 79 insertions(+), 21 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index e3b3c0d7d0e..2029ca4346b 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -401,6 +401,13 @@ struct dfuse_event { d_iov_t de_iov; d_sg_list_t de_sgl; d_list_t de_list; + + /* Position in a list of events, this will either be off ie->ie_open_reads or + * de->de_read_slaves + */ + d_list_t de_read_list; + /* List of slave events */ + d_list_t de_read_slaves; struct dfuse_eq *de_eqt; union { struct dfuse_obj_hdl *de_oh; @@ -1013,6 +1020,8 @@ struct dfuse_inode_entry { /* Entry on the evict list */ d_list_t ie_evict_entry; + d_list_t ie_open_reads; + struct read_chunk_core *ie_chunk; }; diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index 4f654fa3209..247382acaab 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1254,6 +1254,7 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) atomic_init(&ie->ie_linear_read, true); atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1); D_INIT_LIST_HEAD(&ie->ie_evict_entry); + D_INIT_LIST_HEAD(&ie->ie_open_reads); D_RWLOCK_INIT(&ie->ie_wlock, 0); } @@ -1317,6 +1318,9 @@ dfuse_read_event_size(void *arg, size_t size) ev->de_sgl.sg_nr = 1; } + /* D_INIT_LIST_HEAD(&ev->de_read_list); */ + D_INIT_LIST_HEAD(&ev->de_read_slaves); + rc = daos_event_init(&ev->de_ev, ev->de_eqt->de_eq, NULL); if (rc != -DER_SUCCESS) { return false; diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index e8bcc247cdd..cb98320b2f9 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -7,8 +7,19 @@ #include "dfuse_common.h" #include "dfuse.h" +/* Global lock for all chunk read operations. Each inode has a struct read_chunk_core * entry + * which is checked for NULL and set whilst holding this lock. Each read_chunk_core then has + * a list of read_chunk_data and again, this lock protects all lists on all inodes. This avoids + * the need for a per-inode lock which for many files would consume considerable memory but does + * mean there is potentially some lock contention. The lock however is only held for list + * manipulation, no dfs or kernel calls are made whilst holding the lock. + * + * Also used for ie_open_reads list. + */ +static pthread_mutex_t rc_lock = PTHREAD_MUTEX_INITIALIZER; + static void -dfuse_cb_read_complete(struct dfuse_event *ev) +cb_read_helper(struct dfuse_event *ev, void *buff) { struct dfuse_obj_hdl *oh = ev->de_oh; @@ -40,9 +51,36 @@ dfuse_cb_read_complete(struct dfuse_event *ev) ev->de_req_position + ev->de_len, ev->de_req_position + ev->de_req_len - 1); - DFUSE_REPLY_BUFQ(oh, ev->de_req, ev->de_iov.iov_buf, ev->de_len); + DFUSE_REPLY_BUFQ(oh, ev->de_req, buff, ev->de_len); release: daos_event_fini(&ev->de_ev); +} + +static void +dfuse_cb_read_complete(struct dfuse_event *ev) +{ + struct dfuse_event *evs, *evn; + + D_MUTEX_LOCK(&rc_lock); + d_list_del(&ev->de_read_list); + D_MUTEX_UNLOCK(&rc_lock); + + d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { + DFUSE_TRA_WARNING(ev, "concurrent network read %p", evs); + evs->de_len = ev->de_len; + evs->de_ev.ev_error = ev->de_ev.ev_error; + d_list_del(&evs->de_read_list); + cb_read_helper(evs, ev->de_iov.iov_buf); + } + + cb_read_helper(ev, ev->de_iov.iov_buf); + + d_list_for_each_entry_safe(evs, evn, &ev->de_read_slaves, de_read_list) { + d_list_del(&evs->de_read_list); + d_slab_restock(evs->de_eqt->de_read_slab); + d_slab_release(evs->de_eqt->de_read_slab, ev); + } + d_slab_restock(ev->de_eqt->de_read_slab); d_slab_release(ev->de_eqt->de_read_slab, ev); } @@ -156,15 +194,6 @@ struct read_chunk_core { d_list_t entries; }; -/* Global lock for all chunk read operations. Each inode has a struct read_chunk_core * entry - * which is checked for NULL and set whilst holding this lock. Each read_chunk_core then has - * a list of read_chunk_data and again, this lock protects all lists on all inodes. This avoids - * the need for a per-inode lock which for many files would consume considerable memory but does - * mean there is potentially some lock contention. The lock however is only held for list - * manipulation, no dfs or kernel calls are made whilst holding the lock. - */ -static pthread_mutex_t rc_lock = PTHREAD_MUTEX_INITIALIZER; - static void chunk_free(struct read_chunk_data *cd) { @@ -418,7 +447,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) if (cd->reqs[slot] != 0) { /* Handle concurrent reads of the same slot, this wasn't expected * but can happen so for now reject it at this level and have read - * perform a seperate I/O for this slot. + * perform a separate I/O for this slot. * TODO: Handle DAOS-16686 here. */ rcb = false; @@ -506,8 +535,10 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct eqt = pick_eqt(dfuse_info); ev = d_slab_acquire(eqt->de_read_slab); - if (ev == NULL) - D_GOTO(err, rc = ENOMEM); + if (ev == NULL) { + DFUSE_REPLY_ERR_RAW(oh, req, ENOMEM); + return; + } if (oh->doh_ie->ie_truncated && position + len < oh->doh_ie->ie_stat.st_size && ((oh->doh_ie->ie_start_off == 0 && oh->doh_ie->ie_end_off == 0) || @@ -544,9 +575,29 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_WFLUSH(oh->doh_ie); + /* Not check for outstanding read which matches */ + D_MUTEX_LOCK(&rc_lock); + { + struct dfuse_event *evc; + + /* Check for concurrent overlapping reads and if so then add request to current op + */ + d_list_for_each_entry(evc, &oh->doh_ie->ie_open_reads, de_read_list) { + if (ev->de_req_position == evc->de_req_position && + ev->de_req_len <= evc->de_req_position) { + d_list_add(&ev->de_read_list, &evc->de_read_slaves); + D_MUTEX_UNLOCK(&rc_lock); + return; + } + } + d_list_add_tail(&ev->de_read_list, &oh->doh_ie->ie_open_reads); + } + D_MUTEX_UNLOCK(&rc_lock); + rc = dfs_read(oh->doh_dfs, oh->doh_obj, &ev->de_sgl, position, &ev->de_len, &ev->de_ev); if (rc != 0) { - D_GOTO(err, rc); + ev->de_ev.ev_error = rc; + dfuse_cb_read_complete(ev); return; } @@ -557,12 +608,6 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct d_slab_restock(eqt->de_read_slab); return; -err: - DFUSE_REPLY_ERR_RAW(oh, req, rc); - if (ev) { - daos_event_fini(&ev->de_ev); - d_slab_release(eqt->de_read_slab, ev); - } } static void From f74f053b11d68ef5576a5df384f341fd52b54749 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 11 Oct 2024 15:17:59 +0000 Subject: [PATCH 04/34] Fix invalid free and leak. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index cb98320b2f9..5f723bcc92e 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -67,9 +67,8 @@ dfuse_cb_read_complete(struct dfuse_event *ev) d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { DFUSE_TRA_WARNING(ev, "concurrent network read %p", evs); - evs->de_len = ev->de_len; + evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; - d_list_del(&evs->de_read_list); cb_read_helper(evs, ev->de_iov.iov_buf); } @@ -78,7 +77,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) d_list_for_each_entry_safe(evs, evn, &ev->de_read_slaves, de_read_list) { d_list_del(&evs->de_read_list); d_slab_restock(evs->de_eqt->de_read_slab); - d_slab_release(evs->de_eqt->de_read_slab, ev); + d_slab_release(evs->de_eqt->de_read_slab, evs); } d_slab_restock(ev->de_eqt->de_read_slab); @@ -452,6 +451,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; DFUSE_TRA_WARNING(oh, "concurrent read, un-met"); + D_FREE(cd); } else { cd->reqs[slot] = req; cd->ohs[slot] = oh; From 6ad750b8732c805f0bb0c07b705026cb23ab99d7 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 11 Oct 2024 16:51:50 +0000 Subject: [PATCH 05/34] Fix a logging line. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 5f723bcc92e..604fc1b8dec 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -66,7 +66,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) D_MUTEX_UNLOCK(&rc_lock); d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { - DFUSE_TRA_WARNING(ev, "concurrent network read %p", evs); + DFUSE_TRA_WARNING(ev->de_oh, "concurrent network read %p", evs); evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; cb_read_helper(evs, ev->de_iov.iov_buf); From 578be24eaf6997e328707cde55931ae4dad9ef36 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 14 Oct 2024 15:06:23 +0000 Subject: [PATCH 06/34] Add some debugging. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/open.c | 3 ++- src/client/dfuse/ops/read.c | 12 ++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index 772e92e1c13..91b3a8b7dd6 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -203,11 +203,12 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) } } } - DFUSE_TRA_DEBUG(oh, "il_calls %d, caching %d,", il_calls, oh->doh_caching); if (il_calls != 0) { atomic_fetch_sub_relaxed(&oh->doh_ie->ie_il_count, 1); } oc = atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1); + DFUSE_TRA_DEBUG(oh, "il_calls %d, caching %d, open count %d", il_calls, oh->doh_caching, + oc - 1); if (oc == 1) { if (read_chunk_close(oh->doh_ie)) oh->doh_linear_read = true; diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 604fc1b8dec..6a42a56fd98 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -66,7 +66,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) D_MUTEX_UNLOCK(&rc_lock); d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { - DFUSE_TRA_WARNING(ev->de_oh, "concurrent network read %p", evs); + DFUSE_TRA_WARNING(ev->de_oh, "concurrent network read %p", evs->de_oh); evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; cb_read_helper(evs, ev->de_iov.iov_buf); @@ -222,6 +222,7 @@ read_chunk_close(struct dfuse_inode_entry *ie) if (cd->complete) { chunk_free(cd); } else { + DFUSE_TRA_DEBUG(ie, "Abandoning %p", cd); cd->exiting = true; } } @@ -422,7 +423,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) rcb = chunk_fetch(req, oh, cd, slot); } else { struct dfuse_event *ev = NULL; - bool sd = false; + bool sd = true; /* Now check if this read request is complete or not yet, if it isn't then just * save req in the right slot however if it is then reply here. After the call to @@ -451,7 +452,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; DFUSE_TRA_WARNING(oh, "concurrent read, un-met"); - D_FREE(cd); } else { cd->reqs[slot] = req; cd->ohs[slot] = oh; @@ -583,8 +583,12 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct /* Check for concurrent overlapping reads and if so then add request to current op */ d_list_for_each_entry(evc, &oh->doh_ie->ie_open_reads, de_read_list) { + DFUSE_TRA_DEBUG(oh, "Checking %p %ld %ld", evc->de_oh, ev->de_req_position, + evc->de_req_position); if (ev->de_req_position == evc->de_req_position && - ev->de_req_len <= evc->de_req_position) { + ev->de_req_len <= evc->de_req_len) { + DFUSE_TRA_DEBUG(oh, "Match, making slave of %p", evc->de_oh); + d_list_add(&ev->de_read_list, &evc->de_read_slaves); D_MUTEX_UNLOCK(&rc_lock); return; From e7719126abf962e87f7a69e2d532ce36dec3c603 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 17 Oct 2024 10:31:16 +0000 Subject: [PATCH 07/34] Track duplicate reads. This avoids a crash but there's still a memory leak Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 6a42a56fd98..a38d04cad20 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -413,6 +413,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) if (++cd->entered < 8) { /* Put on front of list for efficient searching */ + DFUSE_TRA_DEBUG(oh, "at front of list"); d_list_add(&cd->list, &cc->entries); } @@ -423,7 +424,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) rcb = chunk_fetch(req, oh, cd, slot); } else { struct dfuse_event *ev = NULL; - bool sd = true; + int exited; /* Now check if this read request is complete or not yet, if it isn't then just * save req in the right slot however if it is then reply here. After the call to @@ -435,16 +436,21 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) D_MUTEX_LOCK(&rc_lock); if (cd->complete) { ev = cd->ev; + DFUSE_TRA_DEBUG(oh, "Checking for done %d", cd->slot_done[slot]); + if (!cd->slot_done[slot]) { /* DAOS-16686: Reply to each slot more than once if requested, but * only track the first reply for knowing when to free the buffer. */ cd->slot_done[slot] = true; - sd = false; + atomic_fetch_sub_relaxed(&cd->exited, 1); DFUSE_TRA_WARNING(oh, "concurrent read, met"); } } else { - if (cd->reqs[slot] != 0) { + if (cd->reqs[slot] == 0) { + cd->reqs[slot] = req; + cd->ohs[slot] = oh; + } else { /* Handle concurrent reads of the same slot, this wasn't expected * but can happen so for now reject it at this level and have read * perform a separate I/O for this slot. @@ -452,9 +458,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; DFUSE_TRA_WARNING(oh, "concurrent read, un-met"); - } else { - cd->reqs[slot] = req; - cd->ohs[slot] = oh; } } D_MUTEX_UNLOCK(&rc_lock); @@ -470,7 +473,9 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) position + K128 - 1); DFUSE_REPLY_BUFQ(oh, req, ev->de_iov.iov_buf + (slot * K128), K128); } - if (sd && atomic_fetch_add_relaxed(&cd->exited, 1) == 7) { + exited = atomic_fetch_add_relaxed(&cd->exited, 1); + DFUSE_TRA_DEBUG(oh, "Checking for done, exited %d", exited); + if (exited == 7) { d_slab_release(cd->eqt->de_read_slab, cd->ev); D_FREE(cd); } From 6c407e7c3bf1959d6d2ca0a35cb78de96b089168 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 17 Oct 2024 10:52:23 +0000 Subject: [PATCH 08/34] Fix logic. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index a38d04cad20..1478df4a1a2 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -438,13 +438,14 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) ev = cd->ev; DFUSE_TRA_DEBUG(oh, "Checking for done %d", cd->slot_done[slot]); - if (!cd->slot_done[slot]) { + if (cd->slot_done[slot]) { /* DAOS-16686: Reply to each slot more than once if requested, but * only track the first reply for knowing when to free the buffer. */ - cd->slot_done[slot] = true; atomic_fetch_sub_relaxed(&cd->exited, 1); DFUSE_TRA_WARNING(oh, "concurrent read, met"); + } else { + cd->slot_done[slot] = true; } } else { if (cd->reqs[slot] == 0) { From 7f892f3b0c14e5f65ea9b9aaedfa5649b6317294 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 17 Oct 2024 17:34:07 +0000 Subject: [PATCH 09/34] Rework to support blocking on network requests. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 235 +++++++++++++++--------------------- 1 file changed, 96 insertions(+), 139 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 1478df4a1a2..9d26d204513 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -66,7 +66,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) D_MUTEX_UNLOCK(&rc_lock); d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { - DFUSE_TRA_WARNING(ev->de_oh, "concurrent network read %p", evs->de_oh); + DFUSE_TRA_DEBUG(ev->de_oh, "concurrent network read %p", evs->de_oh); evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; cb_read_helper(evs, ev->de_iov.iov_buf); @@ -174,33 +174,29 @@ pick_eqt(struct dfuse_info *dfuse_info) #define CHUNK_SIZE (1024 * 1024) +#define MAX_REQ_COUNT 64 + struct read_chunk_data { struct dfuse_event *ev; - fuse_req_t reqs[8]; - struct dfuse_obj_hdl *ohs[8]; bool slot_done[8]; d_list_t list; uint64_t bucket; struct dfuse_eq *eqt; int rc; int entered; - ATOMIC int exited; - bool exiting; bool complete; + int idx; + struct { + int slot; + fuse_req_t req; + struct dfuse_obj_hdl *oh; + } reqs[MAX_REQ_COUNT]; }; struct read_chunk_core { d_list_t entries; }; -static void -chunk_free(struct read_chunk_data *cd) -{ - d_list_del(&cd->list); - d_slab_release(cd->eqt->de_read_slab, cd->ev); - D_FREE(cd); -} - /* Called when the last open file handle on a inode is closed. This needs to free everything which * is complete and for anything that isn't flag it for deletion in the callback. * @@ -219,12 +215,10 @@ read_chunk_close(struct dfuse_inode_entry *ie) rcb = true; d_list_for_each_entry_safe(cd, cdn, &ie->ie_chunk->entries, list) { - if (cd->complete) { - chunk_free(cd); - } else { - DFUSE_TRA_DEBUG(ie, "Abandoning %p", cd); - cd->exiting = true; - } + D_ASSERT(cd->complete); + d_list_del(&cd->list); + d_slab_release(cd->eqt->de_read_slab, cd->ev); + D_FREE(cd); } D_FREE(ie->ie_chunk); out: @@ -236,8 +230,6 @@ static void chunk_cb(struct dfuse_event *ev) { struct read_chunk_data *cd = ev->de_cd; - fuse_req_t req; - bool done = false; cd->rc = ev->de_ev.ev_error; @@ -249,51 +241,32 @@ chunk_cb(struct dfuse_event *ev) daos_event_fini(&ev->de_ev); - do { - int i; - - req = 0; - - D_MUTEX_LOCK(&rc_lock); - - if (cd->exiting) { - chunk_free(cd); - D_MUTEX_UNLOCK(&rc_lock); - return; - } + /* Mark as complete so no more get put on list */ + D_MUTEX_LOCK(&rc_lock); + cd->complete = true; + D_MUTEX_UNLOCK(&rc_lock); - cd->complete = true; - for (i = 0; i < 8; i++) { - if (cd->reqs[i]) { - req = cd->reqs[i]; - cd->reqs[i] = 0; - cd->slot_done[i] = true; - break; - } - } + for (int i = 0; i < cd->idx; i++) { + int slot; + fuse_req_t req; + size_t position; - D_MUTEX_UNLOCK(&rc_lock); + slot = cd->reqs[i].slot; + req = cd->reqs[i].req; - if (req) { - size_t position = (cd->bucket * CHUNK_SIZE) + (i * K128); + DFUSE_TRA_DEBUG(cd->reqs[i].oh, "Replying idx %d/%d for %ld[%d]", i, cd->idx, + cd->bucket, slot); - if (cd->rc != 0) { - DFUSE_REPLY_ERR_RAW(cd->ohs[i], req, cd->rc); - } else { - DFUSE_TRA_DEBUG(cd->ohs[i], "%#zx-%#zx read", position, - position + K128 - 1); - DFUSE_REPLY_BUFQ(cd->ohs[i], req, ev->de_iov.iov_buf + (i * K128), - K128); - } + position = (cd->bucket * CHUNK_SIZE) + (slot * K128); - if (atomic_fetch_add_relaxed(&cd->exited, 1) == 7) - done = true; + if (cd->rc != 0) { + DFUSE_REPLY_ERR_RAW(cd->reqs[i].oh, req, cd->rc); + } else { + DFUSE_TRA_DEBUG(cd->reqs[i].oh, "%#zx-%#zx read", position, + position + K128 - 1); + DFUSE_REPLY_BUFQ(cd->reqs[i].oh, req, ev->de_iov.iov_buf + (slot * K128), + K128); } - } while (req && !done); - - if (done) { - d_slab_release(cd->eqt->de_read_slab, cd->ev); - D_FREE(cd); } } @@ -302,14 +275,13 @@ chunk_cb(struct dfuse_event *ev) * Returns true on success. */ static bool -chunk_fetch(fuse_req_t req, struct dfuse_obj_hdl *oh, struct read_chunk_data *cd, int slot) +chunk_fetch(fuse_req_t req, struct dfuse_inode_entry *ie, struct read_chunk_data *cd) { - struct dfuse_info *dfuse_info = fuse_req_userdata(req); - struct dfuse_inode_entry *ie = oh->doh_ie; - struct dfuse_event *ev; - struct dfuse_eq *eqt; - int rc; - daos_off_t position = cd->bucket * CHUNK_SIZE; + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_event *ev; + struct dfuse_eq *eqt; + int rc; + daos_off_t position = cd->bucket * CHUNK_SIZE; eqt = pick_eqt(dfuse_info); @@ -326,10 +298,8 @@ chunk_fetch(fuse_req_t req, struct dfuse_obj_hdl *oh, struct read_chunk_data *cd ev->de_len = 0; ev->de_complete_cb = chunk_cb; - cd->ev = ev; - cd->eqt = eqt; - cd->reqs[slot] = req; - cd->ohs[slot] = oh; + cd->ev = ev; + cd->eqt = eqt; rc = dfs_read(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_sgl, position, &ev->de_len, &ev->de_ev); @@ -347,6 +317,7 @@ chunk_fetch(fuse_req_t req, struct dfuse_obj_hdl *oh, struct read_chunk_data *cd err: daos_event_fini(&ev->de_ev); d_slab_release(eqt->de_read_slab, ev); + cd->ev = NULL; cd->rc = rc; return false; } @@ -359,7 +330,6 @@ static bool chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) { struct dfuse_inode_entry *ie = oh->doh_ie; - struct read_chunk_core *cc; struct read_chunk_data *cd; off_t last; uint64_t bucket; @@ -393,12 +363,9 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) goto err; D_INIT_LIST_HEAD(&ie->ie_chunk->entries); } - cc = ie->ie_chunk; - d_list_for_each_entry(cd, &cc->entries, list) + d_list_for_each_entry(cd, &ie->ie_chunk->entries, list) if (cd->bucket == bucket) { - /* Remove from list to re-add again later. */ - d_list_del(&cd->list); goto found; } @@ -409,78 +376,72 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) cd->bucket = bucket; submit = true; + d_list_add(&cd->list, &ie->ie_chunk->entries); + found: - if (++cd->entered < 8) { - /* Put on front of list for efficient searching */ - DFUSE_TRA_DEBUG(oh, "at front of list"); - d_list_add(&cd->list, &cc->entries); +#if 0 + /* Keep track of which slots have been requested, but allow for multiple reads per + * slot */ + if (!cd->slot_done[slot]) { + cd->entered++; + cd->slot_done[slot] = true; + } else { + DFUSE_TRA_DEBUG(oh, "concurrent read on %ld[%d] complete %d", bucket, slot, + cd->complete); } - D_MUTEX_UNLOCK(&rc_lock); + if (cd->entered < 8) { + /* Put on front of list for efficient searching */ + DFUSE_TRA_DEBUG(oh, "%d slots requested, putting on front of list", cd->entered); + d_list_add(&cd->list, &ie->ie_chunk->entries); + } +#endif if (submit) { + cd->reqs[0].req = req; + cd->reqs[0].oh = oh; + cd->reqs[0].slot = slot; + cd->idx++; + + D_MUTEX_UNLOCK(&rc_lock); + DFUSE_TRA_DEBUG(oh, "submit for bucket %ld[%d]", bucket, slot); - rcb = chunk_fetch(req, oh, cd, slot); - } else { - struct dfuse_event *ev = NULL; - int exited; - /* Now check if this read request is complete or not yet, if it isn't then just - * save req in the right slot however if it is then reply here. After the call to - * DFUSE_REPLY_* then no reference is held on either the open file or the inode so - * at that point they could be closed. - */ - rcb = true; + rcb = chunk_fetch(req, ie, cd); + } else if (cd->complete) { + D_MUTEX_UNLOCK(&rc_lock); - D_MUTEX_LOCK(&rc_lock); - if (cd->complete) { - ev = cd->ev; - DFUSE_TRA_DEBUG(oh, "Checking for done %d", cd->slot_done[slot]); - - if (cd->slot_done[slot]) { - /* DAOS-16686: Reply to each slot more than once if requested, but - * only track the first reply for knowing when to free the buffer. - */ - atomic_fetch_sub_relaxed(&cd->exited, 1); - DFUSE_TRA_WARNING(oh, "concurrent read, met"); - } else { - cd->slot_done[slot] = true; - } + if (cd->rc != 0) { + /* Don't pass fuse an error here, rather return false and + * the read will be tried over the network. + */ + rcb = false; } else { - if (cd->reqs[slot] == 0) { - cd->reqs[slot] = req; - cd->ohs[slot] = oh; - } else { - /* Handle concurrent reads of the same slot, this wasn't expected - * but can happen so for now reject it at this level and have read - * perform a separate I/O for this slot. - * TODO: Handle DAOS-16686 here. - */ - rcb = false; - DFUSE_TRA_WARNING(oh, "concurrent read, un-met"); - } + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + K128 - 1); + DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); + rcb = true; } + + } else if (cd->idx < MAX_REQ_COUNT) { + DFUSE_TRA_DEBUG(oh, "Using idx %d for %ld[%d]", cd->idx, bucket, slot); + + cd->reqs[cd->idx].req = req; + cd->reqs[cd->idx].oh = oh; + cd->reqs[cd->idx].slot = slot; + cd->idx++; + D_MUTEX_UNLOCK(&rc_lock); + rcb = true; + } else { D_MUTEX_UNLOCK(&rc_lock); - if (ev) { - if (cd->rc != 0) { - /* Don't pass fuse an error here, rather return false and the read - * will be tried over the network. - */ - rcb = false; - } else { - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, - position + K128 - 1); - DFUSE_REPLY_BUFQ(oh, req, ev->de_iov.iov_buf + (slot * K128), K128); - } - exited = atomic_fetch_add_relaxed(&cd->exited, 1); - DFUSE_TRA_DEBUG(oh, "Checking for done, exited %d", exited); - if (exited == 7) { - d_slab_release(cd->eqt->de_read_slab, cd->ev); - D_FREE(cd); - } - } + /* Handle concurrent reads of the same slot, this wasn't + * expected but can happen so for now reject it at this + * level and have read perform a separate I/O for this slot. + * TODO: Handle DAOS-16686 here. + */ + rcb = false; + DFUSE_TRA_WARNING(oh, "Too many outstanding reads"); } return rcb; @@ -589,12 +550,8 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct /* Check for concurrent overlapping reads and if so then add request to current op */ d_list_for_each_entry(evc, &oh->doh_ie->ie_open_reads, de_read_list) { - DFUSE_TRA_DEBUG(oh, "Checking %p %ld %ld", evc->de_oh, ev->de_req_position, - evc->de_req_position); if (ev->de_req_position == evc->de_req_position && ev->de_req_len <= evc->de_req_len) { - DFUSE_TRA_DEBUG(oh, "Match, making slave of %p", evc->de_oh); - d_list_add(&ev->de_read_list, &evc->de_read_slaves); D_MUTEX_UNLOCK(&rc_lock); return; From 6e286d136ce3eb687e412564ed78902643036b60 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 18 Oct 2024 08:21:42 +0000 Subject: [PATCH 10/34] Bump array size and add stats. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 3 +++ src/client/dfuse/ops/read.c | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 2029ca4346b..29f32add9b0 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -481,6 +481,9 @@ struct dfuse_pool { ACTION(RENAME) \ ACTION(OPEN) \ ACTION(PRE_READ) \ + ACTION(CON_READ) \ + ACTION(CON_READ_BUCKET) \ + ACTION(CON_READ_BC) \ ACTION(READ) \ ACTION(WRITE) \ ACTION(STATFS) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 9d26d204513..b7897d098ea 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -69,6 +69,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) DFUSE_TRA_DEBUG(ev->de_oh, "concurrent network read %p", evs->de_oh); evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; + DFUSE_IE_STAT_ADD(ev->de_oh->doh_ie, DS_CON_READ); cb_read_helper(evs, ev->de_iov.iov_buf); } @@ -174,7 +175,7 @@ pick_eqt(struct dfuse_info *dfuse_info) #define CHUNK_SIZE (1024 * 1024) -#define MAX_REQ_COUNT 64 +#define MAX_REQ_COUNT 256 struct read_chunk_data { struct dfuse_event *ev; @@ -258,7 +259,7 @@ chunk_cb(struct dfuse_event *ev) cd->bucket, slot); position = (cd->bucket * CHUNK_SIZE) + (slot * K128); - + DFUSE_IE_STAT_ADD(ev->de_oh->doh_ie, DS_CON_READ_BUCKET); if (cd->rc != 0) { DFUSE_REPLY_ERR_RAW(cd->reqs[i].oh, req, cd->rc); } else { @@ -418,6 +419,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; } else { + DFUSE_IE_STAT_ADD(oh->doh_ie, DS_CON_READ_BC); DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + K128 - 1); DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); rcb = true; From b2a21c32b527a1090d07cea6a6446fb54aee26e7 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 18 Oct 2024 10:16:05 +0000 Subject: [PATCH 11/34] Fix a segv in the stats. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 6 +++--- src/client/dfuse/ops/read.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 29f32add9b0..0731ef44837 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -481,10 +481,10 @@ struct dfuse_pool { ACTION(RENAME) \ ACTION(OPEN) \ ACTION(PRE_READ) \ - ACTION(CON_READ) \ - ACTION(CON_READ_BUCKET) \ - ACTION(CON_READ_BC) \ ACTION(READ) \ + ACTION(READ_CON) \ + ACTION(READ_BUCKET) \ + ACTION(READ_BUCKET_M) \ ACTION(WRITE) \ ACTION(STATFS) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index b7897d098ea..c6ae144b028 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -69,7 +69,7 @@ dfuse_cb_read_complete(struct dfuse_event *ev) DFUSE_TRA_DEBUG(ev->de_oh, "concurrent network read %p", evs->de_oh); evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; - DFUSE_IE_STAT_ADD(ev->de_oh->doh_ie, DS_CON_READ); + DFUSE_IE_STAT_ADD(ev->de_oh->doh_ie, DS_READ_CON); cb_read_helper(evs, ev->de_iov.iov_buf); } @@ -259,7 +259,7 @@ chunk_cb(struct dfuse_event *ev) cd->bucket, slot); position = (cd->bucket * CHUNK_SIZE) + (slot * K128); - DFUSE_IE_STAT_ADD(ev->de_oh->doh_ie, DS_CON_READ_BUCKET); + DFUSE_IE_STAT_ADD(cd->reqs[i].oh->doh_ie, DS_READ_BUCKET); if (cd->rc != 0) { DFUSE_REPLY_ERR_RAW(cd->reqs[i].oh, req, cd->rc); } else { @@ -419,7 +419,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; } else { - DFUSE_IE_STAT_ADD(oh->doh_ie, DS_CON_READ_BC); + DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ_BUCKET_M); DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + K128 - 1); DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); rcb = true; From b6855917dbd24afd5eba7fce2dca4ccdee8619e9 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 18 Oct 2024 11:59:03 +0000 Subject: [PATCH 12/34] Track EOF better in reads. Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 1 + src/client/dfuse/ops/read.c | 55 ++++++++++++++++++++++++++--------- src/tests/ftest/dfuse/read.py | 4 ++- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 0731ef44837..8cbe45f3785 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -482,6 +482,7 @@ struct dfuse_pool { ACTION(OPEN) \ ACTION(PRE_READ) \ ACTION(READ) \ + ACTION(READ_EOF_M) \ ACTION(READ_CON) \ ACTION(READ_BUCKET) \ ACTION(READ_BUCKET_M) \ diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index c6ae144b028..167e6f2607d 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -18,6 +18,12 @@ */ static pthread_mutex_t rc_lock = PTHREAD_MUTEX_INITIALIZER; +struct read_chunk_core { + d_list_t entries; + size_t file_size; + bool seen_eof; +}; + static void cb_read_helper(struct dfuse_event *ev, void *buff) { @@ -39,17 +45,28 @@ cb_read_helper(struct dfuse_event *ev, void *buff) } } - if (ev->de_len == 0) - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, - ev->de_req_position + ev->de_req_len - 1); - else if (ev->de_len == ev->de_req_len) + if (ev->de_len == ev->de_req_len) { DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", ev->de_req_position, ev->de_req_position + ev->de_req_len - 1); - else - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read %#zx-%#zx not read (truncated)", - ev->de_req_position, ev->de_req_position + ev->de_len - 1, - ev->de_req_position + ev->de_len, - ev->de_req_position + ev->de_req_len - 1); + } else { + struct dfuse_inode_entry *ie = oh->doh_ie; + + if (ie->ie_chunk) { + ie->ie_chunk->seen_eof = true; + if (ev->de_len == 0) + ie->ie_chunk->file_size = ev->de_req_position; + else + ie->ie_chunk->file_size = ev->de_req_position + ev->de_len - 1; + } + if (ev->de_len == 0) + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, + ev->de_req_position + ev->de_req_len - 1); + else + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read %#zx-%#zx not read (truncated)", + ev->de_req_position, ev->de_req_position + ev->de_len - 1, + ev->de_req_position + ev->de_len, + ev->de_req_position + ev->de_req_len - 1); + } DFUSE_REPLY_BUFQ(oh, ev->de_req, buff, ev->de_len); release: @@ -194,10 +211,6 @@ struct read_chunk_data { } reqs[MAX_REQ_COUNT]; }; -struct read_chunk_core { - d_list_t entries; -}; - /* Called when the last open file handle on a inode is closed. This needs to free everything which * is complete and for anything that isn't flag it for deletion in the callback. * @@ -419,6 +432,8 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; } else { + oh->doh_linear_read_pos = max(oh->doh_linear_read_pos, position + K128); + DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ_BUCKET_M); DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + K128 - 1); DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); @@ -462,13 +477,26 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct struct dfuse_eq *eqt; int rc; struct dfuse_event *ev; + bool reached_eof = false; DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ); if (oh->doh_linear_read_eof && position == oh->doh_linear_read_pos) { + reached_eof = true; + } else if (oh->doh_ie->ie_chunk && oh->doh_ie->ie_chunk->seen_eof) { + if (position >= oh->doh_ie->ie_chunk->file_size) + reached_eof = true; + } + + if (reached_eof) { DFUSE_TRA_DEBUG(oh, "Returning EOF early without round trip %#zx", position); oh->doh_linear_read_eof = false; +#if 0 + /* Release uses this to set the bit on the directory so do not turn it off here + * but I do need to check why it was set before. + */ oh->doh_linear_read = false; +#endif if (oh->doh_readahead) { D_MUTEX_LOCK(&oh->doh_readahead->dra_lock); @@ -483,6 +511,7 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_STAT_ADD(oh->doh_ie, DS_PRE_READ); } } + DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ_EOF_M); DFUSE_REPLY_BUFQ(oh, req, NULL, 0); return; } diff --git a/src/tests/ftest/dfuse/read.py b/src/tests/ftest/dfuse/read.py index 607b8f99f9f..71a13a14044 100644 --- a/src/tests/ftest/dfuse/read.py +++ b/src/tests/ftest/dfuse/read.py @@ -23,7 +23,9 @@ def test_dfuse_pre_read(self): Read one large file entirely using pre-read. Read a second smaller file to ensure that the first file leaves the flag enabled. - :avocado: tags=all,full_regression + TODO: Check this with 128k I/O size and non-128K I/O size. + + :avocado: tags=all,daily_regression :avocado: tags=vm :avocado: tags=dfuse :avocado: tags=DFusePreReadTest,test_dfuse_pre_read From 4fcedda9ee05ba73607f94d5ab79c1810c277731 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 11 Nov 2024 11:02:14 +0000 Subject: [PATCH 13/34] Fixup after merge Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 6 ++-- src/client/dfuse/file.c | 2 +- src/client/dfuse/ops/open.c | 10 +----- src/client/dfuse/ops/read.c | 67 +++++++++++++------------------------ 4 files changed, 28 insertions(+), 57 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index cc6884cd379..6be2c67a776 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -1026,12 +1026,12 @@ struct dfuse_inode_entry { d_list_t ie_evict_entry; d_list_t ie_open_reads; - - struct read_chunk_core *ie_chunk; }; struct active_inode { d_list_t chunks; + size_t file_size; + bool seen_eof; pthread_spinlock_t lock; }; @@ -1147,7 +1147,7 @@ dfuse_cache_evict_dir(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *i * Returns true if feature was used. */ bool -read_chunk_close(struct dfuse_inode_entry *ie); +read_chunk_close(struct active_inode *active); /* Metadata caching functions. */ diff --git a/src/client/dfuse/file.c b/src/client/dfuse/file.c index c06b7f7fd81..a2a3aa10d8c 100644 --- a/src/client/dfuse/file.c +++ b/src/client/dfuse/file.c @@ -66,7 +66,7 @@ active_oh_decref(struct dfuse_obj_hdl *oh) if (oc != 1) goto out; - rcb = read_chunk_close(oh->doh_ie); + rcb = read_chunk_close(oh->doh_ie->ie_active); ah_free(oh->doh_ie); out: diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index fbfa8d9468e..447c5213b72 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -148,7 +148,6 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) struct dfuse_inode_entry *ie = NULL; int rc; uint32_t il_calls; - uint32_t oc; /* Perform the opposite of what the ioctl call does, always change the open handle count * but the inode only tracks number of open handles with non-zero ioctl counts @@ -213,18 +212,11 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) } } } + DFUSE_TRA_DEBUG(oh, "il_calls %d, caching %d,", il_calls, oh->doh_caching); if (il_calls != 0) { atomic_fetch_sub_relaxed(&oh->doh_ie->ie_il_count, 1); } - oc = atomic_fetch_sub_relaxed(&oh->doh_ie->ie_open_count, 1); - DFUSE_TRA_DEBUG(oh, "il_calls %d, caching %d, open count %d", il_calls, oh->doh_caching, - oc - 1); - if (oc == 1) { - if (read_chunk_close(oh->doh_ie)) - oh->doh_linear_read = true; - } - if (oh->doh_evict_on_close) { ie = oh->doh_ie; atomic_fetch_add_relaxed(&ie->ie_ref, 1); diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 7ec072c4715..2265b226b3c 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -16,13 +16,6 @@ * * Also used for ie_open_reads list. */ -static pthread_mutex_t rc_lock = PTHREAD_MUTEX_INITIALIZER; - -struct read_chunk_core { - d_list_t entries; - size_t file_size; - bool seen_eof; -}; static void cb_read_helper(struct dfuse_event *ev, void *buff) @@ -51,13 +44,12 @@ cb_read_helper(struct dfuse_event *ev, void *buff) } else { struct dfuse_inode_entry *ie = oh->doh_ie; - if (ie->ie_chunk) { - ie->ie_chunk->seen_eof = true; - if (ev->de_len == 0) - ie->ie_chunk->file_size = ev->de_req_position; - else - ie->ie_chunk->file_size = ev->de_req_position + ev->de_len - 1; - } + ie->ie_active->seen_eof = true; + if (ev->de_len == 0) + ie->ie_active->file_size = ev->de_req_position; + else + ie->ie_active->file_size = ev->de_req_position + ev->de_len - 1; + if (ev->de_len == 0) DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, ev->de_req_position + ev->de_req_len - 1); @@ -78,9 +70,9 @@ dfuse_cb_read_complete(struct dfuse_event *ev) { struct dfuse_event *evs, *evn; - D_MUTEX_LOCK(&rc_lock); + D_SPIN_LOCK(&ev->de_oh->doh_ie->ie_active->lock); d_list_del(&ev->de_read_list); - D_MUTEX_UNLOCK(&rc_lock); + D_SPIN_UNLOCK(&ev->de_oh->doh_ie->ie_active->lock); d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { DFUSE_TRA_DEBUG(ev->de_oh, "concurrent network read %p", evs->de_oh); @@ -218,26 +210,20 @@ struct read_chunk_data { * Returns true if the feature was used. */ bool -read_chunk_close(struct dfuse_inode_entry *ie) +read_chunk_close(struct active_inode *active) { struct read_chunk_data *cd, *cdn; - bool rcb = false; - - D_SPIN_LOCK(&ie->ie_active->lock); - if (d_list_empty(&ie->ie_active->chunks)) - goto out; - rcb = true; + if (d_list_empty(&active->chunks)) + return false; - d_list_for_each_entry_safe(cd, cdn, &ie->ie_active->chunks, list) { + d_list_for_each_entry_safe(cd, cdn, &active->chunks, list) { D_ASSERT(cd->complete); d_list_del(&cd->list); d_slab_release(cd->eqt->de_read_slab, cd->ev); D_FREE(cd); } -out: - D_SPIN_UNLOCK(&ie->ie_active->lock); - return rcb; + return true; } static void @@ -371,8 +357,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) DFUSE_TRA_DEBUG(oh, "read bucket %#zx-%#zx last %#zx size %#zx bucket %ld slot %d", position, position + len - 1, last, ie->ie_stat.st_size, bucket, slot); - - d_list_for_each_entry(cd, &ie->ie_chunk->entries, list) D_SPIN_LOCK(&ie->ie_active->lock); d_list_for_each_entry(cd, &ie->ie_active->chunks, list) @@ -388,7 +372,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) cd->bucket = bucket; submit = true; - d_list_add(&cd->list, &ie->ie_chunk->entries); + d_list_add(&cd->list, &ie->ie_active->chunks); found: @@ -409,12 +393,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) d_list_add(&cd->list, &ie->ie_chunk->entries); } #endif - if (++cd->entered < 8) { - /* Put on front of list for efficient searching */ - d_list_add(&cd->list, &ie->ie_active->chunks); - } - - D_SPIN_UNLOCK(&ie->ie_active->lock); if (submit) { cd->reqs[0].req = req; @@ -429,10 +407,11 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ DFUSE_TRA_DEBUG(oh, "submit for bucket %ld[%d]", bucket, slot); + D_SPIN_UNLOCK(&ie->ie_active->lock); rcb = chunk_fetch(req, ie, cd); } else if (cd->complete) { - D_MUTEX_UNLOCK(&rc_lock); + D_SPIN_UNLOCK(&ie->ie_active->lock); if (cd->rc != 0) { /* Don't pass fuse an error here, rather return false and @@ -455,10 +434,10 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) cd->reqs[cd->idx].oh = oh; cd->reqs[cd->idx].slot = slot; cd->idx++; - D_MUTEX_UNLOCK(&rc_lock); + D_SPIN_UNLOCK(&ie->ie_active->lock); rcb = true; } else { - D_MUTEX_UNLOCK(&rc_lock); + D_SPIN_UNLOCK(&ie->ie_active->lock); /* Handle concurrent reads of the same slot, this wasn't * expected but can happen so for now reject it at this @@ -491,8 +470,8 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct if (oh->doh_linear_read_eof && position == oh->doh_linear_read_pos) { reached_eof = true; - } else if (oh->doh_ie->ie_chunk && oh->doh_ie->ie_chunk->seen_eof) { - if (position >= oh->doh_ie->ie_chunk->file_size) + } else if (oh->doh_ie->ie_active->seen_eof) { + if (position >= oh->doh_ie->ie_active->file_size) reached_eof = true; } @@ -582,7 +561,7 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_WFLUSH(oh->doh_ie); /* Not check for outstanding read which matches */ - D_MUTEX_LOCK(&rc_lock); + D_SPIN_LOCK(&oh->doh_ie->ie_active->lock); { struct dfuse_event *evc; @@ -592,13 +571,13 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct if (ev->de_req_position == evc->de_req_position && ev->de_req_len <= evc->de_req_len) { d_list_add(&ev->de_read_list, &evc->de_read_slaves); - D_MUTEX_UNLOCK(&rc_lock); + D_SPIN_UNLOCK(&oh->doh_ie->ie_active->lock); return; } } d_list_add_tail(&ev->de_read_list, &oh->doh_ie->ie_open_reads); } - D_MUTEX_UNLOCK(&rc_lock); + D_SPIN_UNLOCK(&oh->doh_ie->ie_active->lock); rc = dfs_read(oh->doh_dfs, oh->doh_obj, &ev->de_sgl, position, &ev->de_len, &ev->de_ev); if (rc != 0) { From 018449e3b94c332cb177ecd377300979e9e9d890 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 11 Nov 2024 11:07:46 +0000 Subject: [PATCH 14/34] Move active read list to active. Test-tag: dfuse Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 3 +-- src/client/dfuse/dfuse_core.c | 1 - src/client/dfuse/file.c | 1 + src/client/dfuse/ops/read.c | 15 ++++++++------- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 6be2c67a776..f28eaf40169 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -1024,14 +1024,13 @@ struct dfuse_inode_entry { /* Entry on the evict list */ d_list_t ie_evict_entry; - - d_list_t ie_open_reads; }; struct active_inode { d_list_t chunks; size_t file_size; bool seen_eof; + d_list_t open_reads; pthread_spinlock_t lock; }; diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index 17877d29761..47e6ecfc102 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1254,7 +1254,6 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) atomic_init(&ie->ie_linear_read, true); atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1); D_INIT_LIST_HEAD(&ie->ie_evict_entry); - D_INIT_LIST_HEAD(&ie->ie_open_reads); D_RWLOCK_INIT(&ie->ie_wlock, 0); } diff --git a/src/client/dfuse/file.c b/src/client/dfuse/file.c index a2a3aa10d8c..651797feb0a 100644 --- a/src/client/dfuse/file.c +++ b/src/client/dfuse/file.c @@ -38,6 +38,7 @@ active_ie_init(struct dfuse_inode_entry *ie) goto out; } D_INIT_LIST_HEAD(&ie->ie_active->chunks); + D_INIT_LIST_HEAD(&ie->ie_active->open_reads); out: D_MUTEX_UNLOCK(&alock); return rc; diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 2265b226b3c..60374775b3c 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -459,6 +459,7 @@ void dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct fuse_file_info *fi) { struct dfuse_obj_hdl *oh = (struct dfuse_obj_hdl *)fi->fh; + struct active_inode *active = oh->doh_ie->ie_active; struct dfuse_info *dfuse_info = fuse_req_userdata(req); bool mock_read = false; struct dfuse_eq *eqt; @@ -470,8 +471,8 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct if (oh->doh_linear_read_eof && position == oh->doh_linear_read_pos) { reached_eof = true; - } else if (oh->doh_ie->ie_active->seen_eof) { - if (position >= oh->doh_ie->ie_active->file_size) + } else if (active->seen_eof) { + if (position >= active->file_size) reached_eof = true; } @@ -561,23 +562,23 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_WFLUSH(oh->doh_ie); /* Not check for outstanding read which matches */ - D_SPIN_LOCK(&oh->doh_ie->ie_active->lock); + D_SPIN_LOCK(&active->lock); { struct dfuse_event *evc; /* Check for concurrent overlapping reads and if so then add request to current op */ - d_list_for_each_entry(evc, &oh->doh_ie->ie_open_reads, de_read_list) { + d_list_for_each_entry(evc, &active->open_reads, de_read_list) { if (ev->de_req_position == evc->de_req_position && ev->de_req_len <= evc->de_req_len) { d_list_add(&ev->de_read_list, &evc->de_read_slaves); - D_SPIN_UNLOCK(&oh->doh_ie->ie_active->lock); + D_SPIN_UNLOCK(&active->lock); return; } } - d_list_add_tail(&ev->de_read_list, &oh->doh_ie->ie_open_reads); + d_list_add_tail(&ev->de_read_list, &active->open_reads); } - D_SPIN_UNLOCK(&oh->doh_ie->ie_active->lock); + D_SPIN_UNLOCK(&active->lock); rc = dfs_read(oh->doh_dfs, oh->doh_obj, &ev->de_sgl, position, &ev->de_len, &ev->de_ev); if (rc != 0) { From 221c849131346ca33cb2ab2a3e8fe920dd3c59da Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 12 Nov 2024 13:01:16 +0000 Subject: [PATCH 15/34] Rebase and iterate on comments. Test-tag: dfuse Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 4 +-- src/client/dfuse/dfuse_core.c | 1 - src/client/dfuse/ops/read.c | 58 +++++++++++++++-------------------- 3 files changed, 27 insertions(+), 36 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index f28eaf40169..bc468bcda76 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -401,8 +401,8 @@ struct dfuse_event { d_sg_list_t de_sgl; d_list_t de_list; - /* Position in a list of events, this will either be off ie->ie_open_reads or - * de->de_read_slaves + /* Position in a list of events, this will either be off active->open_reads or + * de->de_read_slaves. */ d_list_t de_read_list; /* List of slave events */ diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index 47e6ecfc102..f679cf22d5d 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1318,7 +1318,6 @@ dfuse_read_event_size(void *arg, size_t size) ev->de_sgl.sg_nr = 1; } - /* D_INIT_LIST_HEAD(&ev->de_read_list); */ D_INIT_LIST_HEAD(&ev->de_read_slaves); rc = daos_event_init(&ev->de_ev, ev->de_eqt->de_eq, NULL); diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 60374775b3c..748dd27a531 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -7,16 +7,6 @@ #include "dfuse_common.h" #include "dfuse.h" -/* Global lock for all chunk read operations. Each inode has a struct read_chunk_core * entry - * which is checked for NULL and set whilst holding this lock. Each read_chunk_core then has - * a list of read_chunk_data and again, this lock protects all lists on all inodes. This avoids - * the need for a per-inode lock which for many files would consume considerable memory but does - * mean there is potentially some lock contention. The lock however is only held for list - * manipulation, no dfs or kernel calls are made whilst holding the lock. - * - * Also used for ie_open_reads list. - */ - static void cb_read_helper(struct dfuse_event *ev, void *buff) { @@ -174,12 +164,10 @@ pick_eqt(struct dfuse_info *dfuse_info) * * This code is entered when caching is enabled and reads are correctly size/aligned and not in the * last CHUNK_SIZE of a file. When open then the inode contains a single read_chunk_core pointer - * and this contains a list of read_chunk_data entries, one for each bucket. Buckets where all - * slots have been requested are remove from the list and closed when the last request is completed. + * and this contains a list of read_chunk_data entries, one for each bucket. * - * TODO: Currently there is no code to remove partially read buckets from the list so reading - * one slot every chunk would leave the entire file contents in memory until close and mean long - * list traversal times. + * TODO: Currently there is no code to remove buckets from the list so all buckets will remain in + * memory until close. */ #define CHUNK_SIZE (1024 * 1024) @@ -245,6 +233,13 @@ chunk_cb(struct dfuse_event *ev) /* Mark as complete so no more get put on list */ D_SPIN_LOCK(&ia->lock); cd->complete = true; + + /* Mark the slot as replied to. There's a race here as the slot hasn't been replied to + * however references are dropped by the DFUSE_REPLY macros below so an extra ref on active + * would be required. + */ + for (int i = 0; i < cd->idx; i++) + cd->slot_done[cd->reqs[i].slot] = true; D_SPIN_UNLOCK(&ia->lock); for (int i = 0; i < cd->idx; i++) { @@ -337,6 +332,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) int slot; bool submit = false; bool rcb; + bool all_done = true; if (len != K128) return false; @@ -361,6 +357,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) d_list_for_each_entry(cd, &ie->ie_active->chunks, list) if (cd->bucket == bucket) { + d_list_del(&cd->list); goto found; } @@ -376,23 +373,15 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) found: -#if 0 - /* Keep track of which slots have been requested, but allow for multiple reads per - * slot */ - if (!cd->slot_done[slot]) { - cd->entered++; - cd->slot_done[slot] = true; - } else { - DFUSE_TRA_DEBUG(oh, "concurrent read on %ld[%d] complete %d", bucket, slot, - cd->complete); + for (int i = 0; i < 8; i++) { + if (!cd->slot_done[i]) + all_done = false; } - if (cd->entered < 8) { - /* Put on front of list for efficient searching */ - DFUSE_TRA_DEBUG(oh, "%d slots requested, putting on front of list", cd->entered); - d_list_add(&cd->list, &ie->ie_chunk->entries); - } -#endif + if (all_done) + d_list_add(&cd->list, &ie->ie_active->chunks); + else + d_list_add_tail(&cd->list, &ie->ie_active->chunks); if (submit) { cd->reqs[0].req = req; @@ -411,6 +400,8 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) rcb = chunk_fetch(req, ie, cd); } else if (cd->complete) { + cd->slot_done[slot] = true; + D_SPIN_UNLOCK(&ie->ie_active->lock); if (cd->rc != 0) { @@ -561,13 +552,14 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_WFLUSH(oh->doh_ie); - /* Not check for outstanding read which matches */ + /* Check for open matching reads, if there are multiple readers of the same file offset + * then chain future requests off the first one to avoid extra network round-trips. This + * can and does happen even with caching enabled if there are multiple client processes. + */ D_SPIN_LOCK(&active->lock); { struct dfuse_event *evc; - /* Check for concurrent overlapping reads and if so then add request to current op - */ d_list_for_each_entry(evc, &active->open_reads, de_read_list) { if (ev->de_req_position == evc->de_req_position && ev->de_req_len <= evc->de_req_len) { From f4956ac1df8fb99ec28134115e42a054ac18aacb Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 12 Nov 2024 13:20:18 +0000 Subject: [PATCH 16/34] fix: remove an extra list operation. Test-tag: dfuse Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 748dd27a531..b89067c47af 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -369,8 +369,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) cd->bucket = bucket; submit = true; - d_list_add(&cd->list, &ie->ie_active->chunks); - found: for (int i = 0; i < 8; i++) { @@ -470,12 +468,7 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct if (reached_eof) { DFUSE_TRA_DEBUG(oh, "Returning EOF early without round trip %#zx", position); oh->doh_linear_read_eof = false; -#if 0 - /* Release uses this to set the bit on the directory so do not turn it off here - * but I do need to check why it was set before. - */ oh->doh_linear_read = false; -#endif if (oh->doh_readahead) { D_MUTEX_LOCK(&oh->doh_readahead->dra_lock); From 0733c0ba5b9c8ec6bb1b54dfb738f18ef8a7eed1 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Wed, 13 Nov 2024 09:12:13 +0000 Subject: [PATCH 17/34] Back out test and stat changes. Test-tag: dfuse Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 4 ---- src/client/dfuse/ops/read.c | 6 +----- src/tests/ftest/dfuse/read.py | 4 +--- 3 files changed, 2 insertions(+), 12 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index bc468bcda76..a571952930b 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -481,10 +481,6 @@ struct dfuse_pool { ACTION(OPEN) \ ACTION(PRE_READ) \ ACTION(READ) \ - ACTION(READ_EOF_M) \ - ACTION(READ_CON) \ - ACTION(READ_BUCKET) \ - ACTION(READ_BUCKET_M) \ ACTION(WRITE) \ ACTION(STATFS) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index b89067c47af..9dd54baf318 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -68,7 +68,6 @@ dfuse_cb_read_complete(struct dfuse_event *ev) DFUSE_TRA_DEBUG(ev->de_oh, "concurrent network read %p", evs->de_oh); evs->de_len = min(ev->de_len, evs->de_req_len); evs->de_ev.ev_error = ev->de_ev.ev_error; - DFUSE_IE_STAT_ADD(ev->de_oh->doh_ie, DS_READ_CON); cb_read_helper(evs, ev->de_iov.iov_buf); } @@ -102,7 +101,7 @@ dfuse_readahead_reply(fuse_req_t req, size_t len, off_t position, struct dfuse_o } if (((position % K128) == 0) && ((len % K128) == 0)) { - DFUSE_TRA_INFO(oh, "allowing out-of-order pre read"); + DFUSE_TRA_DEBUG(oh, "allowing out-of-order pre read"); /* Do not closely track the read position in this case, just the maximum, * later checks will determine if the file is read to the end. */ @@ -254,7 +253,6 @@ chunk_cb(struct dfuse_event *ev) cd->bucket, slot); position = (cd->bucket * CHUNK_SIZE) + (slot * K128); - DFUSE_IE_STAT_ADD(cd->reqs[i].oh->doh_ie, DS_READ_BUCKET); if (cd->rc != 0) { DFUSE_REPLY_ERR_RAW(cd->reqs[i].oh, req, cd->rc); } else { @@ -410,7 +408,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) } else { oh->doh_linear_read_pos = max(oh->doh_linear_read_pos, position + K128); - DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ_BUCKET_M); DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + K128 - 1); DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); rcb = true; @@ -483,7 +480,6 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_STAT_ADD(oh->doh_ie, DS_PRE_READ); } } - DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ_EOF_M); DFUSE_REPLY_BUFQ(oh, req, NULL, 0); return; } diff --git a/src/tests/ftest/dfuse/read.py b/src/tests/ftest/dfuse/read.py index 71a13a14044..607b8f99f9f 100644 --- a/src/tests/ftest/dfuse/read.py +++ b/src/tests/ftest/dfuse/read.py @@ -23,9 +23,7 @@ def test_dfuse_pre_read(self): Read one large file entirely using pre-read. Read a second smaller file to ensure that the first file leaves the flag enabled. - TODO: Check this with 128k I/O size and non-128K I/O size. - - :avocado: tags=all,daily_regression + :avocado: tags=all,full_regression :avocado: tags=vm :avocado: tags=dfuse :avocado: tags=DFusePreReadTest,test_dfuse_pre_read From 3a2bbd193652c1a5a2b40bf3e74fab11c1660763 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 14 Nov 2024 09:34:27 +0000 Subject: [PATCH 18/34] Try and solve patchelf problem. Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 4 +++- src/client/dfuse/ops/read.c | 6 ++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index ed18cc18b76..8e847c639d8 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -206,7 +206,9 @@ def run_commands(self, commands, subdir=None, env=None): retval = True else: print(f"RUN: {' '.join(cmd)}") - if subprocess.call(cmd, shell=False, cwd=subdir, env=passed_env['ENV']) != 0: + rc = subprocess.call(cmd, shell=False, cwd=subdir, env=passed_env['ENV']) + if rc != 0: + print(f"Command failed with {rc}") retval = False break return retval diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 9dd54baf318..5b5962d7ba5 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -332,6 +332,12 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) bool rcb; bool all_done = true; + if (ie->ie_dfs->dfc_data_timeout == 0) + return false; + + if (atomic_load_relaxed(&oh->doh_ie->ie_open_write_count) != 0) + return false; + if (len != K128) return false; From 2308febf68d36bdf0502478f7bd5eaa2df94b343 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 15 Nov 2024 14:25:48 +0000 Subject: [PATCH 19/34] Change failure mode. Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 8e847c639d8..c5db583422f 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1420,7 +1420,10 @@ def _patch_rpaths(self): full_lib = os.path.join(path, lib) cmd = ['patchelf', '--set-rpath', ':'.join(rpath), full_lib] if not RUNNER.run_commands([cmd]): - print(f'Skipped patching {full_lib}') + if lib == 'libspdk.so': + print(f'Skipped patching {full_lib}') + else: + raise BuildFailure(f"Failed to patch {lib}") def build(self, env, needed_libs): """Build the component, if necessary From 6fd17dfe85b744fa79ff460dc81e7a9868afdcd8 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 19 Nov 2024 18:29:52 +0000 Subject: [PATCH 20/34] add strace. Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 4 +++- utils/scripts/install-el8.sh | 1 + utils/scripts/install-leap15.sh | 1 + 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index c5db583422f..463d74e5185 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1418,7 +1418,9 @@ def _patch_rpaths(self): # Assume every file in bin can be patched continue full_lib = os.path.join(path, lib) - cmd = ['patchelf', '--set-rpath', ':'.join(rpath), full_lib] + # pylint: disable=line-too-long + cmd = ['strace', '-e', 'file,mmap,read,write,close', + 'patchelf', '--set-rpath', ':'.join(rpath), full_lib] if not RUNNER.run_commands([cmd]): if lib == 'libspdk.so': print(f'Skipped patching {full_lib}') diff --git a/utils/scripts/install-el8.sh b/utils/scripts/install-el8.sh index 472f88c9925..a1bced856bb 100755 --- a/utils/scripts/install-el8.sh +++ b/utils/scripts/install-el8.sh @@ -62,6 +62,7 @@ dnf --nodocs install \ python3-devel \ python3-pip \ sg3_utils \ + strace \ sudo \ systemd \ valgrind-devel \ diff --git a/utils/scripts/install-leap15.sh b/utils/scripts/install-leap15.sh index 0eb8ef44fee..02e5f782034 100755 --- a/utils/scripts/install-leap15.sh +++ b/utils/scripts/install-leap15.sh @@ -58,6 +58,7 @@ dnf --nodocs install \ python3-devel \ scons \ sg3_utils \ + strace \ sudo \ valgrind-devel \ which \ From f8ac3f4700426ba269343ebb97ddcdeb1f19af6c Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Wed, 20 Nov 2024 11:53:39 +0000 Subject: [PATCH 21/34] Do not pre-fetch or keep cache for writeable files. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 10 +++++----- src/client/dfuse/ops/open.c | 16 +++++++++------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index 30b865c0a69..3c89fe93bed 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -1023,11 +1023,11 @@ struct dfuse_inode_entry { }; struct active_inode { - d_list_t chunks; - size_t file_size; - bool seen_eof; - d_list_t open_reads; - pthread_spinlock_t lock; + d_list_t chunks; + size_t file_size; + bool seen_eof; + d_list_t open_reads; + pthread_spinlock_t lock; struct dfuse_pre_read *readahead; }; diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index 2cb6a12a2bf..fc2ca06b817 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -67,13 +67,15 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) * which pre-existed in the container. */ - /* TODO: This probably wants reflowing to not reference ie_open_count */ - if (atomic_load_relaxed(&ie->ie_open_count) > 0 || - ((ie->ie_dcache_last_update.tv_sec != 0) && - dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout))) { - fi_out.keep_cache = 1; - } else { - prefetch = true; + if (!oh->doh_writeable) { + /* TODO: This probably wants reflowing to not reference ie_open_count */ + if (atomic_load_relaxed(&ie->ie_open_count) > 0 || + ((ie->ie_dcache_last_update.tv_sec != 0) && + dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout))) { + fi_out.keep_cache = 1; + } else { + prefetch = true; + } } } else if (ie->ie_dfs->dfc_data_otoc) { /* Open to close caching, this allows the use of shared mmap */ From 94534062e7904aa0f55303e78206acaea523ed35 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Wed, 20 Nov 2024 16:36:35 +0000 Subject: [PATCH 22/34] Stat and re-fresh the files. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 463d74e5185..d3bec1fd785 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1409,6 +1409,33 @@ def _patch_rpaths(self): norigin.append(os.path.normpath(path)) break + for folder in self.key_words.get("patch_rpath", []): + path = os.path.join(comp_path, folder) + files = os.listdir(path) + for lib in files: + if folder != 'bin' and not lib.endswith(".so"): + # Assume every file in bin can be patched + continue + full_lib = os.path.join(path, lib) + # pylint: disable=line-too-long + cmd = ['stat', full_lib] + RUNNER.run_commands([cmd]) + + cmd = ['daos', 'filesystem', 'evict', path] + RUNNER.run_commands([cmd]) + + for folder in self.key_words.get("patch_rpath", []): + path = os.path.join(comp_path, folder) + files = os.listdir(path) + for lib in files: + if folder != 'bin' and not lib.endswith(".so"): + # Assume every file in bin can be patched + continue + full_lib = os.path.join(path, lib) + # pylint: disable=line-too-long + cmd = ['stat', full_lib] + RUNNER.run_commands([cmd]) + rpath += norigin for folder in self.key_words.get("patch_rpath", []): path = os.path.join(comp_path, folder) From ffe786c0466bd6b2570c3758ccce5b73319a55b0 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 21 Nov 2024 09:57:06 +0000 Subject: [PATCH 23/34] Fix build not to fail. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index d3bec1fd785..fdca1b17e64 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1422,7 +1422,10 @@ def _patch_rpaths(self): RUNNER.run_commands([cmd]) cmd = ['daos', 'filesystem', 'evict', path] - RUNNER.run_commands([cmd]) + try: + RUNNER.run_commands([cmd]) + except FileNotFoundError: + pass for folder in self.key_words.get("patch_rpath", []): path = os.path.join(comp_path, folder) From 1a8630c39564abaefa1a129a49381e7130939254 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 21 Nov 2024 15:23:36 +0000 Subject: [PATCH 24/34] Stat the file, not the link Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index fdca1b17e64..1f3283e45ec 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -1418,7 +1418,7 @@ def _patch_rpaths(self): continue full_lib = os.path.join(path, lib) # pylint: disable=line-too-long - cmd = ['stat', full_lib] + cmd = ['stat', '-L', full_lib] RUNNER.run_commands([cmd]) cmd = ['daos', 'filesystem', 'evict', path] @@ -1436,7 +1436,7 @@ def _patch_rpaths(self): continue full_lib = os.path.join(path, lib) # pylint: disable=line-too-long - cmd = ['stat', full_lib] + cmd = ['stat', '-L', full_lib] RUNNER.run_commands([cmd]) rpath += norigin From a79ad9d8f01073830fac3e880d6bad424058f9e9 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 25 Nov 2024 16:55:12 +0000 Subject: [PATCH 25/34] Disable matching reads and chunk code to debug failure. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 93ee23d1a43..559435d6fb1 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -385,6 +385,8 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) if ((position % K128) != 0) return false; + return false; + last = D_ALIGNUP(position + len - 1, CHUNK_SIZE); if (last > oh->doh_ie->ie_stat.st_size) @@ -575,6 +577,7 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct */ D_SPIN_LOCK(&active->lock); { +#if 0 struct dfuse_event *evc; d_list_for_each_entry(evc, &active->open_reads, de_read_list) { @@ -585,6 +588,7 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct return; } } +#endif d_list_add_tail(&ev->de_read_list, &active->open_reads); } D_SPIN_UNLOCK(&active->lock); From 6d64edea34f914751e17b39396ee1b8504d8f34a Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Mon, 25 Nov 2024 19:40:11 +0000 Subject: [PATCH 26/34] Disalbe the file-size EOF optimisation. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 559435d6fb1..4002b549e35 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -506,9 +506,11 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct if (oh->doh_linear_read_eof && position == oh->doh_linear_read_pos) { reached_eof = true; +#if 0 } else if (active->seen_eof) { if (position >= active->file_size) reached_eof = true; +#endif } if (reached_eof) { From 073248870de2f956eff829751799930e60c9f50c Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Tue, 26 Nov 2024 17:09:27 +0000 Subject: [PATCH 27/34] Re-enable feature. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 4002b549e35..97f181720dd 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -373,11 +373,13 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) bool rcb; bool all_done = true; +#if 0 if (ie->ie_dfs->dfc_data_timeout == 0) return false; if (atomic_load_relaxed(&oh->doh_ie->ie_open_write_count) != 0) return false; +#endif if (len != K128) return false; @@ -385,8 +387,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) if ((position % K128) != 0) return false; - return false; - last = D_ALIGNUP(position + len - 1, CHUNK_SIZE); if (last > oh->doh_ie->ie_stat.st_size) From c4b0d02794680a2705ab14852fd634f62f861eb9 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Nov 2024 09:37:17 +0000 Subject: [PATCH 28/34] Back out build debug. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- site_scons/prereq_tools/base.py | 43 +++------------------------------ utils/scripts/install-el8.sh | 1 - utils/scripts/install-leap15.sh | 1 - 3 files changed, 3 insertions(+), 42 deletions(-) diff --git a/site_scons/prereq_tools/base.py b/site_scons/prereq_tools/base.py index 1f3283e45ec..ed18cc18b76 100644 --- a/site_scons/prereq_tools/base.py +++ b/site_scons/prereq_tools/base.py @@ -206,9 +206,7 @@ def run_commands(self, commands, subdir=None, env=None): retval = True else: print(f"RUN: {' '.join(cmd)}") - rc = subprocess.call(cmd, shell=False, cwd=subdir, env=passed_env['ENV']) - if rc != 0: - print(f"Command failed with {rc}") + if subprocess.call(cmd, shell=False, cwd=subdir, env=passed_env['ENV']) != 0: retval = False break return retval @@ -1409,36 +1407,6 @@ def _patch_rpaths(self): norigin.append(os.path.normpath(path)) break - for folder in self.key_words.get("patch_rpath", []): - path = os.path.join(comp_path, folder) - files = os.listdir(path) - for lib in files: - if folder != 'bin' and not lib.endswith(".so"): - # Assume every file in bin can be patched - continue - full_lib = os.path.join(path, lib) - # pylint: disable=line-too-long - cmd = ['stat', '-L', full_lib] - RUNNER.run_commands([cmd]) - - cmd = ['daos', 'filesystem', 'evict', path] - try: - RUNNER.run_commands([cmd]) - except FileNotFoundError: - pass - - for folder in self.key_words.get("patch_rpath", []): - path = os.path.join(comp_path, folder) - files = os.listdir(path) - for lib in files: - if folder != 'bin' and not lib.endswith(".so"): - # Assume every file in bin can be patched - continue - full_lib = os.path.join(path, lib) - # pylint: disable=line-too-long - cmd = ['stat', '-L', full_lib] - RUNNER.run_commands([cmd]) - rpath += norigin for folder in self.key_words.get("patch_rpath", []): path = os.path.join(comp_path, folder) @@ -1448,14 +1416,9 @@ def _patch_rpaths(self): # Assume every file in bin can be patched continue full_lib = os.path.join(path, lib) - # pylint: disable=line-too-long - cmd = ['strace', '-e', 'file,mmap,read,write,close', - 'patchelf', '--set-rpath', ':'.join(rpath), full_lib] + cmd = ['patchelf', '--set-rpath', ':'.join(rpath), full_lib] if not RUNNER.run_commands([cmd]): - if lib == 'libspdk.so': - print(f'Skipped patching {full_lib}') - else: - raise BuildFailure(f"Failed to patch {lib}") + print(f'Skipped patching {full_lib}') def build(self, env, needed_libs): """Build the component, if necessary diff --git a/utils/scripts/install-el8.sh b/utils/scripts/install-el8.sh index a1bced856bb..472f88c9925 100755 --- a/utils/scripts/install-el8.sh +++ b/utils/scripts/install-el8.sh @@ -62,7 +62,6 @@ dnf --nodocs install \ python3-devel \ python3-pip \ sg3_utils \ - strace \ sudo \ systemd \ valgrind-devel \ diff --git a/utils/scripts/install-leap15.sh b/utils/scripts/install-leap15.sh index 02e5f782034..0eb8ef44fee 100755 --- a/utils/scripts/install-leap15.sh +++ b/utils/scripts/install-leap15.sh @@ -58,7 +58,6 @@ dnf --nodocs install \ python3-devel \ scons \ sg3_utils \ - strace \ sudo \ valgrind-devel \ which \ From 974c2eebe1da7fa5b4b216bcc3dd7bf34d43662c Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Nov 2024 09:52:38 +0000 Subject: [PATCH 29/34] Back out matching read changes, they're in a different PR. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/dfuse.h | 10 ---- src/client/dfuse/dfuse_core.c | 2 - src/client/dfuse/file.c | 1 - src/client/dfuse/ops/open.c | 16 +++--- src/client/dfuse/ops/read.c | 103 ++++++---------------------------- 5 files changed, 23 insertions(+), 109 deletions(-) diff --git a/src/client/dfuse/dfuse.h b/src/client/dfuse/dfuse.h index f925a35aad7..6f1560450ad 100644 --- a/src/client/dfuse/dfuse.h +++ b/src/client/dfuse/dfuse.h @@ -399,13 +399,6 @@ struct dfuse_event { d_iov_t de_iov; d_sg_list_t de_sgl; d_list_t de_list; - - /* Position in a list of events, this will either be off active->open_reads or - * de->de_read_slaves. - */ - d_list_t de_read_list; - /* List of slave events */ - d_list_t de_read_slaves; struct dfuse_eq *de_eqt; union { struct dfuse_obj_hdl *de_oh; @@ -1024,9 +1017,6 @@ struct dfuse_inode_entry { struct active_inode { d_list_t chunks; - size_t file_size; - bool seen_eof; - d_list_t open_reads; pthread_spinlock_t lock; struct dfuse_pre_read *readahead; }; diff --git a/src/client/dfuse/dfuse_core.c b/src/client/dfuse/dfuse_core.c index 56b49af721e..8161556c3b1 100644 --- a/src/client/dfuse/dfuse_core.c +++ b/src/client/dfuse/dfuse_core.c @@ -1322,8 +1322,6 @@ dfuse_read_event_size(void *arg, size_t size) ev->de_sgl.sg_nr = 1; } - D_INIT_LIST_HEAD(&ev->de_read_slaves); - rc = daos_event_init(&ev->de_ev, ev->de_eqt->de_eq, NULL); if (rc != -DER_SUCCESS) { return false; diff --git a/src/client/dfuse/file.c b/src/client/dfuse/file.c index e3d7d2ff80e..6a86134d628 100644 --- a/src/client/dfuse/file.c +++ b/src/client/dfuse/file.c @@ -41,7 +41,6 @@ active_ie_init(struct dfuse_inode_entry *ie, bool *preread) goto out; } D_INIT_LIST_HEAD(&ie->ie_active->chunks); - D_INIT_LIST_HEAD(&ie->ie_active->open_reads); if (preread && *preread) { D_ALLOC_PTR(ie->ie_active->readahead); if (ie->ie_active->readahead) { diff --git a/src/client/dfuse/ops/open.c b/src/client/dfuse/ops/open.c index fc2ca06b817..2cb6a12a2bf 100644 --- a/src/client/dfuse/ops/open.c +++ b/src/client/dfuse/ops/open.c @@ -67,15 +67,13 @@ dfuse_cb_open(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) * which pre-existed in the container. */ - if (!oh->doh_writeable) { - /* TODO: This probably wants reflowing to not reference ie_open_count */ - if (atomic_load_relaxed(&ie->ie_open_count) > 0 || - ((ie->ie_dcache_last_update.tv_sec != 0) && - dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout))) { - fi_out.keep_cache = 1; - } else { - prefetch = true; - } + /* TODO: This probably wants reflowing to not reference ie_open_count */ + if (atomic_load_relaxed(&ie->ie_open_count) > 0 || + ((ie->ie_dcache_last_update.tv_sec != 0) && + dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout))) { + fi_out.keep_cache = 1; + } else { + prefetch = true; } } else if (ie->ie_dfs->dfc_data_otoc) { /* Open to close caching, this allows the use of shared mmap */ diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 97f181720dd..057ac1adaba 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -8,7 +8,7 @@ #include "dfuse.h" static void -cb_read_helper(struct dfuse_event *ev, void *buff) +dfuse_cb_read_complete(struct dfuse_event *ev) { struct dfuse_obj_hdl *oh = ev->de_oh; @@ -28,57 +28,26 @@ cb_read_helper(struct dfuse_event *ev, void *buff) } } - if (ev->de_len == ev->de_req_len) { - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", ev->de_req_position, + if (ev->de_len == 0) { + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, ev->de_req_position + ev->de_req_len - 1); - } else { - struct dfuse_inode_entry *ie = oh->doh_ie; - - ie->ie_active->seen_eof = true; - if (ev->de_len == 0) - ie->ie_active->file_size = ev->de_req_position; - else - ie->ie_active->file_size = ev->de_req_position + ev->de_len - 1; - - if (ev->de_len == 0) - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx requested (EOF)", ev->de_req_position, - ev->de_req_position + ev->de_req_len - 1); - else - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read %#zx-%#zx not read (truncated)", - ev->de_req_position, ev->de_req_position + ev->de_len - 1, - ev->de_req_position + ev->de_len, - ev->de_req_position + ev->de_req_len - 1); - } - - DFUSE_REPLY_BUFQ(oh, ev->de_req, buff, ev->de_len); -release: - daos_event_fini(&ev->de_ev); -} - -static void -dfuse_cb_read_complete(struct dfuse_event *ev) -{ - struct dfuse_event *evs, *evn; - D_SPIN_LOCK(&ev->de_oh->doh_ie->ie_active->lock); - d_list_del(&ev->de_read_list); - D_SPIN_UNLOCK(&ev->de_oh->doh_ie->ie_active->lock); - - d_list_for_each_entry(evs, &ev->de_read_slaves, de_read_list) { - DFUSE_TRA_DEBUG(ev->de_oh, "concurrent network read %p", evs->de_oh); - evs->de_len = min(ev->de_len, evs->de_req_len); - evs->de_ev.ev_error = ev->de_ev.ev_error; - cb_read_helper(evs, ev->de_iov.iov_buf); + DFUSE_REPLY_BUFQ(oh, ev->de_req, ev->de_iov.iov_buf, ev->de_len); + D_GOTO(release, 0); } - cb_read_helper(ev, ev->de_iov.iov_buf); - - d_list_for_each_entry_safe(evs, evn, &ev->de_read_slaves, de_read_list) { - d_list_del(&evs->de_read_list); - d_slab_restock(evs->de_eqt->de_read_slab); - d_slab_release(evs->de_eqt->de_read_slab, evs); - } + if (ev->de_len == ev->de_req_len) + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", ev->de_req_position, + ev->de_req_position + ev->de_req_len - 1); + else + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read %#zx-%#zx not read (truncated)", + ev->de_req_position, ev->de_req_position + ev->de_len - 1, + ev->de_req_position + ev->de_len, + ev->de_req_position + ev->de_req_len - 1); + DFUSE_REPLY_BUFQ(oh, ev->de_req, ev->de_iov.iov_buf, ev->de_len); +release: + daos_event_fini(&ev->de_ev); d_slab_restock(ev->de_eqt->de_read_slab); d_slab_release(ev->de_eqt->de_read_slab, ev); } @@ -373,14 +342,6 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) bool rcb; bool all_done = true; -#if 0 - if (ie->ie_dfs->dfc_data_timeout == 0) - return false; - - if (atomic_load_relaxed(&oh->doh_ie->ie_open_write_count) != 0) - return false; -#endif - if (len != K128) return false; @@ -500,20 +461,10 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct struct dfuse_eq *eqt; int rc; struct dfuse_event *ev; - bool reached_eof = false; DFUSE_IE_STAT_ADD(oh->doh_ie, DS_READ); if (oh->doh_linear_read_eof && position == oh->doh_linear_read_pos) { - reached_eof = true; -#if 0 - } else if (active->seen_eof) { - if (position >= active->file_size) - reached_eof = true; -#endif - } - - if (reached_eof) { DFUSE_TRA_DEBUG(oh, "Returning EOF early without round trip %#zx", position); oh->doh_linear_read_eof = false; oh->doh_linear_read = false; @@ -573,28 +524,6 @@ dfuse_cb_read(fuse_req_t req, fuse_ino_t ino, size_t len, off_t position, struct DFUSE_IE_WFLUSH(oh->doh_ie); - /* Check for open matching reads, if there are multiple readers of the same file offset - * then chain future requests off the first one to avoid extra network round-trips. This - * can and does happen even with caching enabled if there are multiple client processes. - */ - D_SPIN_LOCK(&active->lock); - { -#if 0 - struct dfuse_event *evc; - - d_list_for_each_entry(evc, &active->open_reads, de_read_list) { - if (ev->de_req_position == evc->de_req_position && - ev->de_req_len <= evc->de_req_len) { - d_list_add(&ev->de_read_list, &evc->de_read_slaves); - D_SPIN_UNLOCK(&active->lock); - return; - } - } -#endif - d_list_add_tail(&ev->de_read_list, &active->open_reads); - } - D_SPIN_UNLOCK(&active->lock); - rc = dfs_read(oh->doh_dfs, oh->doh_obj, &ev->de_sgl, position, &ev->de_len, &ev->de_ev); if (rc != 0) { ev->de_ev.ev_error = rc; From db9213f2ef5aa7e03f5f8157a18d81aaa61c7409 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Nov 2024 10:38:04 +0000 Subject: [PATCH 30/34] Make number of slots vcariable. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 185 +++++++++++++++++------------------- 1 file changed, 86 insertions(+), 99 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 057ac1adaba..27b742503c1 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -181,8 +181,6 @@ pick_eqt(struct dfuse_info *dfuse_info) #define CHUNK_SIZE (1024 * 1024) -#define MAX_REQ_COUNT 256 - struct read_chunk_data { struct dfuse_event *ev; bool slot_done[8]; @@ -193,12 +191,14 @@ struct read_chunk_data { int rc; int entered; bool complete; - int idx; - struct { - int slot; - fuse_req_t req; - struct dfuse_obj_hdl *oh; - } reqs[MAX_REQ_COUNT]; + d_list_t req_list; +}; + +struct read_chunk_req { + d_list_t req_list; + struct dfuse_obj_hdl *oh; + fuse_req_t req; + int slot; }; /* Called when the last open file handle on a inode is closed. This needs to free everything which @@ -228,6 +228,8 @@ chunk_cb(struct dfuse_event *ev) { struct read_chunk_data *cd = ev->de_cd; struct active_inode *ia = cd->ia; + struct read_chunk_req *cr; + struct read_chunk_req *crn; cd->rc = ev->de_ev.ev_error; @@ -245,87 +247,32 @@ chunk_cb(struct dfuse_event *ev) /* Mark the slot as replied to. There's a race here as the slot hasn't been replied to * however references are dropped by the DFUSE_REPLY macros below so an extra ref on active - * would be required. + * would be required. The danger is that the bucket gets put on the end of the list rather + * than the start. */ - for (int i = 0; i < cd->idx; i++) - cd->slot_done[cd->reqs[i].slot] = true; - D_SPIN_UNLOCK(&ia->lock); + d_list_for_each_entry(cr, &cd->req_list, req_list) + cd->slot_done[cr->slot] = true; - for (int i = 0; i < cd->idx; i++) { - int slot; - fuse_req_t req; - size_t position; + D_SPIN_UNLOCK(&ia->lock); - slot = cd->reqs[i].slot; - req = cd->reqs[i].req; + d_list_for_each_entry_safe(cr, crn, &cd->req_list, req_list) { + size_t position; - DFUSE_TRA_DEBUG(cd->reqs[i].oh, "Replying idx %d/%d for %ld[%d]", i, cd->idx, - cd->bucket, slot); + DFUSE_TRA_DEBUG(cr->oh, "Replying for %ld[%d]", cd->bucket, cr->slot); - position = (cd->bucket * CHUNK_SIZE) + (slot * K128); + position = (cd->bucket * CHUNK_SIZE) + (cr->slot * K128); if (cd->rc != 0) { - DFUSE_REPLY_ERR_RAW(cd->reqs[i].oh, req, cd->rc); + DFUSE_REPLY_ERR_RAW(cr->oh, cr->req, cd->rc); } else { - DFUSE_TRA_DEBUG(cd->reqs[i].oh, "%#zx-%#zx read", position, - position + K128 - 1); - DFUSE_REPLY_BUFQ(cd->reqs[i].oh, req, ev->de_iov.iov_buf + (slot * K128), + DFUSE_TRA_DEBUG(cr->oh, "%#zx-%#zx read", position, position + K128 - 1); + DFUSE_REPLY_BUFQ(cr->oh, cr->req, ev->de_iov.iov_buf + (cr->slot * K128), K128); } + d_list_del(&cr->req_list); + D_FREE(cr); } } -/* Submit a read to dfs. - * - * Returns true on success. - */ -static bool -chunk_fetch(fuse_req_t req, struct dfuse_inode_entry *ie, struct read_chunk_data *cd) -{ - struct dfuse_info *dfuse_info = fuse_req_userdata(req); - struct dfuse_event *ev; - struct dfuse_eq *eqt; - int rc; - daos_off_t position = cd->bucket * CHUNK_SIZE; - - eqt = pick_eqt(dfuse_info); - - ev = d_slab_acquire(eqt->de_read_slab); - if (ev == NULL) { - cd->rc = ENOMEM; - return false; - } - - ev->de_iov.iov_len = CHUNK_SIZE; - ev->de_req = req; - ev->de_cd = cd; - ev->de_sgl.sg_nr = 1; - ev->de_len = 0; - ev->de_complete_cb = chunk_cb; - - cd->ev = ev; - cd->eqt = eqt; - - rc = dfs_read(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_sgl, position, &ev->de_len, - &ev->de_ev); - if (rc != 0) - goto err; - - /* Send a message to the async thread to wake it up and poll for events */ - sem_post(&eqt->de_sem); - - /* Now ensure there are more descriptors for the next request */ - d_slab_restock(eqt->de_read_slab); - - return true; - -err: - daos_event_fini(&ev->de_ev); - d_slab_release(eqt->de_read_slab, ev); - cd->ev = NULL; - cd->rc = rc; - return false; -} - /* Try and do a bulk read. * * Returns true if it was able to handle the read. @@ -335,6 +282,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) { struct dfuse_inode_entry *ie = oh->doh_ie; struct read_chunk_data *cd; + struct read_chunk_req *cr; off_t last; uint64_t bucket; int slot; @@ -373,6 +321,13 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) if (cd == NULL) goto err; + D_ALLOC_PTR(cr); + if (cr == NULL) { + D_FREE(cd); + goto err; + } + + D_INIT_LIST_HEAD(&cd->req_list); cd->ia = ie->ie_active; cd->bucket = bucket; submit = true; @@ -390,10 +345,25 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) d_list_add_tail(&cd->list, &ie->ie_active->chunks); if (submit) { - cd->reqs[0].req = req; - cd->reqs[0].oh = oh; - cd->reqs[0].slot = slot; - cd->idx++; + struct dfuse_info *dfuse_info = fuse_req_userdata(req); + struct dfuse_eq *eqt; + struct dfuse_event *ev; + int rc; + + /* Overwrite position here to the start of the bucket */ + position = cd->bucket * CHUNK_SIZE; + + eqt = pick_eqt(dfuse_info); + + ev = d_slab_acquire(eqt->de_read_slab); + if (ev == NULL) { + d_list_del(&cd->list); + D_FREE(cr); + D_FREE(cd); + goto err; + } + + d_list_add(&cr->req_list, &cd->req_list); /* Now check if this read request is complete or not yet, if it isn't then just * save req in the right slot however if it is then reply here. After the call to @@ -404,7 +374,31 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) DFUSE_TRA_DEBUG(oh, "submit for bucket %ld[%d]", bucket, slot); D_SPIN_UNLOCK(&ie->ie_active->lock); - rcb = chunk_fetch(req, ie, cd); + cd->eqt = eqt; + cd->ev = ev; + + cr->req = req; + cr->oh = oh; + cr->slot = slot; + + ev->de_iov.iov_len = CHUNK_SIZE; + ev->de_req = req; + ev->de_cd = cd; + ev->de_sgl.sg_nr = 1; + ev->de_len = 0; + ev->de_complete_cb = chunk_cb; + + rc = dfs_read(ie->ie_dfs->dfs_ns, ie->ie_obj, &ev->de_sgl, position, &ev->de_len, + &ev->de_ev); + if (rc == 0) { + /* Send a message to the async thread to wake it up and poll for events */ + sem_post(&eqt->de_sem); + } else { + ev->de_ev.ev_error = rc; + chunk_cb(ev); + } + + rcb = true; } else if (cd->complete) { cd->slot_done[slot] = true; @@ -422,26 +416,19 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); rcb = true; } + } else { + rcb = false; - } else if (cd->idx < MAX_REQ_COUNT) { - DFUSE_TRA_DEBUG(oh, "Using idx %d for %ld[%d]", cd->idx, bucket, slot); + D_ALLOC_PTR(cr); + if (cr) { + cr->req = req; + cr->oh = oh; + cr->slot = slot; + d_list_add_tail(&cr->req_list, &cd->req_list); + rcb = true; + } - cd->reqs[cd->idx].req = req; - cd->reqs[cd->idx].oh = oh; - cd->reqs[cd->idx].slot = slot; - cd->idx++; - D_SPIN_UNLOCK(&ie->ie_active->lock); - rcb = true; - } else { D_SPIN_UNLOCK(&ie->ie_active->lock); - - /* Handle concurrent reads of the same slot, this wasn't - * expected but can happen so for now reject it at this - * level and have read perform a separate I/O for this slot. - * TODO: Handle DAOS-16686 here. - */ - rcb = false; - DFUSE_TRA_WARNING(oh, "Too many outstanding reads"); } return rcb; From 342978e024234629b3effd6d262f8362ab0462d8 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Nov 2024 11:44:30 +0000 Subject: [PATCH 31/34] Fix a compile warning. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 27b742503c1..eecc4f1037d 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -282,7 +282,7 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) { struct dfuse_inode_entry *ie = oh->doh_ie; struct read_chunk_data *cd; - struct read_chunk_req *cr; + struct read_chunk_req *cr = NULL; off_t last; uint64_t bucket; int slot; From 82854c0499d03880c133556a9a6c2b8ef6653c5e Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Thu, 28 Nov 2024 16:51:39 +0000 Subject: [PATCH 32/34] Handle truncated reads and avoid crash. Skip-unit-tests: true Skip-func-test-vm: true Test-tag: DaosBuild Skip-fault-injection-test: true Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index eecc4f1037d..95ba4586e00 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -234,7 +234,6 @@ chunk_cb(struct dfuse_event *ev) cd->rc = ev->de_ev.ev_error; if (cd->rc == 0 && (ev->de_len != CHUNK_SIZE)) { - cd->rc = EIO; DS_WARN(cd->rc, "Unexpected short read bucket %ld (%#zx) expected %i got %zi", cd->bucket, cd->bucket * CHUNK_SIZE, CHUNK_SIZE, ev->de_len); } @@ -256,19 +255,26 @@ chunk_cb(struct dfuse_event *ev) D_SPIN_UNLOCK(&ia->lock); d_list_for_each_entry_safe(cr, crn, &cd->req_list, req_list) { - size_t position; + size_t position = (cd->bucket * CHUNK_SIZE) + (cr->slot * K128); + size_t len; DFUSE_TRA_DEBUG(cr->oh, "Replying for %ld[%d]", cd->bucket, cr->slot); - position = (cd->bucket * CHUNK_SIZE) + (cr->slot * K128); + /* Delete from the list before replying as there's no reference held otherwise */ + d_list_del(&cr->req_list); + if (cd->rc != 0) { DFUSE_REPLY_ERR_RAW(cr->oh, cr->req, cd->rc); } else { - DFUSE_TRA_DEBUG(cr->oh, "%#zx-%#zx read", position, position + K128 - 1); + if ((((cr->slot + 1) * K128) - 1) >= ev->de_len) + len = max(ev->de_len - (cr->slot * K128), 0); + else + len = K128; + + DFUSE_TRA_DEBUG(cr->oh, "%#zx-%#zx read", position, position + len - 1); DFUSE_REPLY_BUFQ(cr->oh, cr->req, ev->de_iov.iov_buf + (cr->slot * K128), - K128); + len); } - d_list_del(&cr->req_list); D_FREE(cr); } } @@ -410,10 +416,17 @@ chunk_read(fuse_req_t req, size_t len, off_t position, struct dfuse_obj_hdl *oh) */ rcb = false; } else { - oh->doh_linear_read_pos = max(oh->doh_linear_read_pos, position + K128); + size_t read_len; + + if ((((slot + 1) * K128) - 1) >= cd->ev->de_len) + read_len = max(cd->ev->de_len - (slot * K128), 0); + else + read_len = K128; + + oh->doh_linear_read_pos = max(oh->doh_linear_read_pos, position + read_len); - DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + K128 - 1); - DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), K128); + DFUSE_TRA_DEBUG(oh, "%#zx-%#zx read", position, position + read_len - 1); + DFUSE_REPLY_BUFQ(oh, req, cd->ev->de_iov.iov_buf + (slot * K128), read_len); rcb = true; } } else { From 3f0e0f392690a7adefb6887f8b41329e91534d68 Mon Sep 17 00:00:00 2001 From: Di Wang Date: Mon, 16 Dec 2024 18:44:04 +0000 Subject: [PATCH 33/34] DAOS-16686 dfuse: avoid possible corruption Comments from Ashely In chunk_cb(), there's no reference on cd held here after the last call to DFUSE_REPLY../. so the list needs to be spliced onto the stack before the list is iterated. Required-githooks: true Signed-off-by: Di Wang --- src/client/dfuse/ops/read.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 95ba4586e00..084e1b75cce 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -229,6 +229,7 @@ chunk_cb(struct dfuse_event *ev) struct read_chunk_data *cd = ev->de_cd; struct active_inode *ia = cd->ia; struct read_chunk_req *cr; + d_list_t tmp_list = cd->req_list; struct read_chunk_req *crn; cd->rc = ev->de_ev.ev_error; @@ -254,7 +255,7 @@ chunk_cb(struct dfuse_event *ev) D_SPIN_UNLOCK(&ia->lock); - d_list_for_each_entry_safe(cr, crn, &cd->req_list, req_list) { + d_list_for_each_entry_safe(cr, crn, &tmp_list, req_list) { size_t position = (cd->bucket * CHUNK_SIZE) + (cr->slot * K128); size_t len; From c7a870377d632454fd99c97dc7cdb4042b01d444 Mon Sep 17 00:00:00 2001 From: Ashley Pittman Date: Fri, 3 Jan 2025 10:06:35 +0000 Subject: [PATCH 34/34] Fix list handling. Signed-off-by: Ashley Pittman --- src/client/dfuse/ops/read.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/client/dfuse/ops/read.c b/src/client/dfuse/ops/read.c index 084e1b75cce..c2deff3a784 100644 --- a/src/client/dfuse/ops/read.c +++ b/src/client/dfuse/ops/read.c @@ -1,5 +1,5 @@ /** - * (C) Copyright 2016-2024 Intel Corporation. + * (C) Copyright 2016-2025 Intel Corporation. * * SPDX-License-Identifier: BSD-2-Clause-Patent */ @@ -229,7 +229,7 @@ chunk_cb(struct dfuse_event *ev) struct read_chunk_data *cd = ev->de_cd; struct active_inode *ia = cd->ia; struct read_chunk_req *cr; - d_list_t tmp_list = cd->req_list; + d_list_t tmp_list = D_LIST_HEAD_INIT(tmp_list); struct read_chunk_req *crn; cd->rc = ev->de_ev.ev_error; @@ -253,6 +253,8 @@ chunk_cb(struct dfuse_event *ev) d_list_for_each_entry(cr, &cd->req_list, req_list) cd->slot_done[cr->slot] = true; + d_list_splice_init(&cd->req_list, &tmp_list); + D_SPIN_UNLOCK(&ia->lock); d_list_for_each_entry_safe(cr, crn, &tmp_list, req_list) {