-
Notifications
You must be signed in to change notification settings - Fork 309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-14073 dfuse: Move writeback caching from kernel to dfuse. #12729
Changes from all commits
1917a8d
841ce5f
377a6b2
9cc1c45
6c7fe85
03b8de6
4255d3c
bd347ba
cfdc60c
fa2eaa2
d59c603
25683d8
020767f
355d94b
65e2455
f4d6026
d8dc076
d6696f4
83431d3
6a88f68
cf2b80f
f213f33
1a3d84b
5f551ed
29340c9
0c77d4e
3d2a2cb
96d8129
071aae8
2d85959
faa8429
8923436
3d2e1df
a606e40
aec576a
c113aa8
ad68c6b
e9730d4
0b6be3d
1bdb650
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -62,9 +62,8 @@ dfuse_show_flags(void *handle, unsigned int cap, unsigned int want) | |
DFUSE_TRA_WARNING(handle, "Unknown requested flags %#x", want); | ||
} | ||
|
||
/* Called on filesystem init. It has the ability to both observe configuration | ||
* options, but also to modify them. As we do not use the FUSE command line | ||
* parsing this is where we apply tunables. | ||
/* Called on filesystem init. It has the ability to both observe configuration options, but also to | ||
* modify them. As we do not use the FUSE command line parsing this is where we apply tunables. | ||
*/ | ||
static void | ||
dfuse_fuse_init(void *arg, struct fuse_conn_info *conn) | ||
|
@@ -76,8 +75,8 @@ dfuse_fuse_init(void *arg, struct fuse_conn_info *conn) | |
DFUSE_TRA_INFO(dfuse_info, "Proto %d %d", conn->proto_major, conn->proto_minor); | ||
|
||
/* These are requests dfuse makes to the kernel, but are then capped by the kernel itself, | ||
* for max_read zero means "as large as possible" which is what we want, but then dfuse | ||
* does not know how large to pre-allocate any buffers. | ||
* for max_read zero means "as large as possible" which is what we want, but then dfuse does | ||
* not know how large to pre-allocate any buffers. | ||
*/ | ||
DFUSE_TRA_INFO(dfuse_info, "max read %#x", conn->max_read); | ||
DFUSE_TRA_INFO(dfuse_info, "max write %#x", conn->max_write); | ||
|
@@ -91,16 +90,12 @@ dfuse_fuse_init(void *arg, struct fuse_conn_info *conn) | |
conn->want |= FUSE_CAP_READDIRPLUS; | ||
conn->want |= FUSE_CAP_READDIRPLUS_AUTO; | ||
|
||
conn->time_gran = 1; | ||
|
||
if (dfuse_info->di_wb_cache) | ||
conn->want |= FUSE_CAP_WRITEBACK_CACHE; | ||
|
||
#ifdef FUSE_CAP_CACHE_SYMLINKS | ||
conn->want |= FUSE_CAP_CACHE_SYMLINKS; | ||
#endif | ||
dfuse_show_flags(dfuse_info, conn->capable, conn->want); | ||
|
||
conn->time_gran = 1; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IIRC this has something to do with ms or ns time? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. This isn't a logic change, just grouping the conn->want code together makes it appear new. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, I missed that |
||
conn->max_background = 16; | ||
conn->congestion_threshold = 8; | ||
|
||
|
@@ -170,6 +165,8 @@ df_ll_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
DFUSE_IE_STAT_ADD(inode, DS_GETATTR); | ||
} | ||
|
||
DFUSE_IE_WFLUSH(inode); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. more locking on the getattr (and open) path sounds going in the wrong direction for me, performance wise There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, although this should uncontented as it's just flushing the inode. The alternative would be to use atomics to track the in-flight count. That would be extra code but allow this to be a single atomic which wouldn't need to flush the pipeline. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Or... we could remove this flush but then the returned file-size might be "incorrect" with respect to committed writes that have been submitted by the kernel which seems dangerous. We'd at least want to detect this and set a attr_timeout of 0 which again would mean we needed to keep track of the in-flight count using atomics. |
||
|
||
if (inode->ie_dfs->dfc_attr_timeout && | ||
(atomic_load_relaxed(&inode->ie_open_write_count) == 0) && | ||
(atomic_load_relaxed(&inode->ie_il_count) == 0)) { | ||
|
@@ -209,6 +206,8 @@ df_ll_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, int to_set, | |
DFUSE_IE_STAT_ADD(inode, DS_SETATTR); | ||
} | ||
|
||
DFUSE_IE_WFLUSH(inode); | ||
|
||
if (inode->ie_dfs->dfs_ops->setattr) | ||
inode->ie_dfs->dfs_ops->setattr(req, inode, attr, to_set); | ||
else | ||
|
@@ -541,6 +540,34 @@ df_ll_statfs(fuse_req_t req, fuse_ino_t ino) | |
DFUSE_REPLY_ERR_RAW(dfuse_info, req, rc); | ||
} | ||
|
||
static void | ||
dfuse_cb_flush(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | ||
{ | ||
struct dfuse_obj_hdl *oh; | ||
struct dfuse_inode_entry *inode; | ||
|
||
D_ASSERT(fi != NULL); | ||
oh = (struct dfuse_obj_hdl *)fi->fh; | ||
inode = oh->doh_ie; | ||
|
||
DFUSE_IE_WFLUSH(inode); | ||
DFUSE_REPLY_ZERO(inode, req); | ||
} | ||
|
||
static void | ||
dfuse_cb_fdatasync(fuse_req_t req, fuse_ino_t ino, int datasync, struct fuse_file_info *fi) | ||
{ | ||
struct dfuse_obj_hdl *oh; | ||
struct dfuse_inode_entry *inode; | ||
|
||
D_ASSERT(fi != NULL); | ||
oh = (struct dfuse_obj_hdl *)fi->fh; | ||
inode = oh->doh_ie; | ||
|
||
DFUSE_IE_WFLUSH(inode); | ||
DFUSE_REPLY_ZERO(inode, req); | ||
} | ||
|
||
/* dfuse ops that are used for accessing dfs mounts */ | ||
const struct dfuse_inode_ops dfuse_dfs_ops = { | ||
.lookup = dfuse_cb_lookup, | ||
|
@@ -598,7 +625,9 @@ const struct dfuse_inode_ops dfuse_pool_ops = { | |
ACTION(write_buf, dfuse_cb_write, true) \ | ||
ACTION(read, dfuse_cb_read, false) \ | ||
ACTION(readlink, dfuse_cb_readlink, false) \ | ||
ACTION(ioctl, dfuse_cb_ioctl, false) | ||
ACTION(ioctl, dfuse_cb_ioctl, false) \ | ||
ACTION(flush, dfuse_cb_flush, true) \ | ||
ACTION(fsync, dfuse_cb_fdatasync, true) | ||
|
||
#define SET_MEMBER(member, fn, ...) ops.member = fn; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -145,6 +145,8 @@ dfuse_cb_release(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
|
||
DFUSE_TRA_DEBUG(oh, "Closing %d", oh->doh_caching); | ||
|
||
DFUSE_IE_WFLUSH(oh->doh_ie); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is in open.c but actually the function is release which maps to close(), this line is what causes flush-on-close which we do want to keep, and won't affect the performance of open at all. |
||
|
||
if (oh->doh_readahead) { | ||
struct dfuse_event *ev; | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this a best effort flush? since it can be very racy of course depending on when other writes acquire or try to acquire the lock
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The semantics will depend on what rwlocks do but it'll flush at least all writes which have already started. I don't know what it will do for writes that are issued after the flush starts but before active writes are complete. It's probably rare that a process is calling flush and write on the same fd at the same time however so I don't think this is a big issue.
It is per inode though rather than per file so could happen across processes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can change this macro though to only check for files, there's no lock for non-files.