-
Notifications
You must be signed in to change notification settings - Fork 308
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DAOS-15338 dfuse: Configure fuse to allow writes to populate cache. #13927
base: master
Are you sure you want to change the base?
Changes from 55 commits
05122bd
e8c52a1
21449e7
5257102
10dae00
20a5f62
1ae23d5
f9a75f4
82b2d5e
30f73a3
8fe67e8
6e657e0
ffc647e
43bad1f
1f9f676
16af3f0
28a0d45
bf7628a
20a352c
836953c
5632d8d
69539ae
8737908
d5f92ef
a2044e2
64ce894
a46c404
6b32a7e
99c6a12
faecce6
1adef98
47368a1
579c8c1
006271a
6fd51c0
24d41d9
a2130c2
767bb43
200ca13
bc21d4f
2c49c4f
2a9a8a1
8997763
845fc1f
a48cc28
d544a2d
085b30a
e45d179
adc532f
8e9a03f
bd84ad4
f40a391
973ec34
1a48745
7039849
52124db
bce5f4e
fc82b21
288e2f6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -410,6 +410,7 @@ struct dfuse_event { | |||||
union { | ||||||
size_t de_req_len; | ||||||
size_t de_readahead_len; | ||||||
struct dfuse_info *de_di; | ||||||
}; | ||||||
void (*de_complete_cb)(struct dfuse_event *ev); | ||||||
struct stat de_attr; | ||||||
|
@@ -456,6 +457,7 @@ struct dfuse_pool { | |||||
ACTION(CREATE) \ | ||||||
ACTION(MKNOD) \ | ||||||
ACTION(FGETATTR) \ | ||||||
ACTION(PRE_GETATTR) \ | ||||||
ACTION(GETATTR) \ | ||||||
ACTION(FSETATTR) \ | ||||||
ACTION(SETATTR) \ | ||||||
|
@@ -534,6 +536,9 @@ struct dfuse_cont { | |||||
double dfc_dentry_dir_timeout; | ||||||
double dfc_ndentry_timeout; | ||||||
double dfc_data_timeout; | ||||||
|
||||||
double dfc_dentry_inval_time; | ||||||
|
||||||
bool dfc_data_otoc; | ||||||
bool dfc_direct_io_disable; | ||||||
bool dfc_wb_cache; | ||||||
|
@@ -550,9 +555,6 @@ struct dfuse_cont { | |||||
#define DFUSE_IE_STAT_ADD(_ie, _stat) \ | ||||||
atomic_fetch_add_relaxed(&(_ie)->ie_dfs->dfs_stat_value[(_stat)], 1) | ||||||
|
||||||
void | ||||||
dfuse_set_default_cont_cache_values(struct dfuse_cont *dfc); | ||||||
|
||||||
/* Connect to a container via a label | ||||||
* Called either for labels on the command line or via dfuse_cont_get_handle() if opening via uuid | ||||||
* | ||||||
|
@@ -748,7 +750,7 @@ dfuse_loop(struct dfuse_info *dfuse_info); | |||||
_Static_assert(IS_OH(_oh), "Param is not open handle"); \ | ||||||
(_oh)->doh_ie = NULL; \ | ||||||
__rc = fuse_reply_err(req, 0); \ | ||||||
if (__rc != 0) \ | ||||||
if ((__rc != 0) && (__rc != -ENOENT)) \ | ||||||
DS_ERROR(-__rc, "fuse_reply_err() error"); \ | ||||||
} while (0) | ||||||
|
||||||
|
@@ -998,6 +1000,36 @@ struct dfuse_inode_entry { | |||||
/** File has been unlinked from daos */ | ||||||
bool ie_unlinked; | ||||||
|
||||||
/* Data cache metadata, list known size/mtime for file, if these have been updated then | ||||||
* the data cache should be dropped. | ||||||
* | ||||||
* When the last fd on a file is closed and all writes are completed then dfuse will launch | ||||||
* a stat to get the update size/mtime for the inode. Future opens should block on this | ||||||
* stat in order to know if the file has been updated. | ||||||
* | ||||||
* Access is controlled via atomics and semaphore, when a decision to make the stat is taken | ||||||
* then active in increased, and the sem is posted. | ||||||
* | ||||||
* Future accesses of the inode should check active, if the value is 0 then there is nothing | ||||||
* to do. | ||||||
* If active is positive then it should increate active, wait on the semaphore, decrease | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* active and then post the semaphore if active != 0; | ||||||
* | ||||||
* After active is 0, (or the semaphore has been waited on) then the new stat structure is | ||||||
* valid. | ||||||
* | ||||||
* The release() code to initialize stat is atomic as it's only triggered by the last | ||||||
* release on a inode. It could race with open() where the inode is known in advance | ||||||
* or create() where it is not. Open will flush the stat before setting keep_cache. | ||||||
*/ | ||||||
struct { | ||||||
struct stat stat; | ||||||
bool valid; | ||||||
ATOMIC uint32_t active; | ||||||
struct timespec last_update; | ||||||
sem_t sem; | ||||||
} ie_dc; | ||||||
|
||||||
/* Lock for writes, shared locks are held during write-back reads, exclusive lock is | ||||||
* acquired and released to flush outstanding writes for getattr, close and forget. | ||||||
*/ | ||||||
|
@@ -1013,6 +1045,8 @@ struct dfuse_inode_entry { | |||||
d_list_t ie_evict_entry; | ||||||
}; | ||||||
|
||||||
void | ||||||
dfuse_ie_cs_flush(struct dfuse_inode_entry *ie); | ||||||
/* Flush write-back cache writes to a inode. It does this by waiting for and then releasing an | ||||||
* exclusive lock on the inode. Writes take a shared lock so this will block until all pending | ||||||
* writes are complete. | ||||||
|
@@ -1126,6 +1160,11 @@ dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *tim | |||||
bool | ||||||
dfuse_dentry_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout); | ||||||
|
||||||
void | ||||||
dfuse_dc_cache_set_time(struct dfuse_inode_entry *ie); | ||||||
bool | ||||||
dfuse_dc_cache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout); | ||||||
|
||||||
/* inval.c */ | ||||||
|
||||||
int | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -752,7 +752,7 @@ dfuse_cont_get_cache(struct dfuse_info *dfuse_info, struct dfuse_cont *dfc) | |
* dentries which represent directories and are therefore referenced much | ||
* more often during path-walk activities are set to five seconds. | ||
*/ | ||
void | ||
static void | ||
dfuse_set_default_cont_cache_values(struct dfuse_cont *dfc) | ||
{ | ||
dfc->dfc_attr_timeout = 1; | ||
|
@@ -876,13 +876,29 @@ dfuse_cont_open(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp, const cha | |
uuid_copy(dfc->dfc_uuid, c_info.ci_uuid); | ||
|
||
if (dfuse_info->di_caching) { | ||
double timeout; | ||
|
||
rc = dfuse_cont_get_cache(dfuse_info, dfc); | ||
if (rc == ENODATA) { | ||
DFUSE_TRA_INFO(dfc, "Using default caching values"); | ||
dfuse_set_default_cont_cache_values(dfc); | ||
} else if (rc != 0) { | ||
D_GOTO(err_umount, rc); | ||
} | ||
|
||
/* Set the timeout for invalidate. This is the duration after which inodes | ||
* will be evicted from the kernel. Data timeout is considered here as | ||
* well as attrs are used to decide if keeping the data in cache is | ||
* correct. Data timeout can be set to True/-1 so cap this duration at | ||
* ten minutes or nothing would ever get evicted. | ||
*/ | ||
timeout = max(dfc->dfc_attr_timeout, dfc->dfc_data_timeout); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have expected min instead of max for L895 and L899, but I am probably missing something. |
||
|
||
timeout = min(timeout, 10 * 60); | ||
|
||
timeout = max(timeout, dfc->dfc_dentry_timeout); | ||
|
||
dfc->dfc_dentry_inval_time = timeout + 3; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why +3? |
||
} | ||
|
||
rc = ival_add_cont_buckets(dfc); | ||
|
@@ -1048,6 +1064,51 @@ dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *tim | |
return use; | ||
} | ||
|
||
/* Set a timer to mark cache entry as valid */ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor: "timer"-->"time"? |
||
void | ||
dfuse_dc_cache_set_time(struct dfuse_inode_entry *ie) | ||
{ | ||
struct timespec now; | ||
|
||
clock_gettime(CLOCK_MONOTONIC_COARSE, &now); | ||
ie->ie_dc.last_update = now; | ||
} | ||
|
||
bool | ||
dfuse_dc_cache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout) | ||
{ | ||
bool use = false; | ||
struct timespec now; | ||
struct timespec left; | ||
double time_left; | ||
|
||
D_ASSERT(max_age != -1); | ||
D_ASSERT(max_age >= 0); | ||
Comment on lines
+1085
to
+1086
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Minor issue, if "max_age >= 0", then the first assertion always is true. Do we need the first assertion? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. True, we don't. I've used this check elsewhere in the code though, for some cache types (data) then -1 is a valid cache age so the reason for having two checks here is that if it does fail we know why, was the cache incorrectly initialised/loaded in which case it'll be -1 or was a specific age incorrectly calculated in which case it's be a different negative value. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree. Thank you for your clarification! |
||
|
||
if (ie->ie_dc.last_update.tv_sec == 0) | ||
return false; | ||
|
||
clock_gettime(CLOCK_MONOTONIC_COARSE, &now); | ||
|
||
left.tv_sec = now.tv_sec - ie->ie_dc.last_update.tv_sec; | ||
left.tv_nsec = now.tv_nsec - ie->ie_dc.last_update.tv_nsec; | ||
if (left.tv_nsec < 0) { | ||
left.tv_sec--; | ||
left.tv_nsec += 1000000000; | ||
} | ||
time_left = max_age - (left.tv_sec + ((double)left.tv_nsec / 1000000000)); | ||
if (time_left > 0) { | ||
use = true; | ||
|
||
DFUSE_TRA_DEBUG(ie, "Allowing cache use, time remaining: %.1lf", time_left); | ||
|
||
if (timeout) | ||
*timeout = time_left; | ||
} | ||
|
||
return use; | ||
} | ||
|
||
bool | ||
dfuse_dentry_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout) | ||
{ | ||
|
@@ -1251,6 +1312,7 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie) | |
atomic_init(&ie->ie_open_count, 0); | ||
atomic_init(&ie->ie_open_write_count, 0); | ||
atomic_init(&ie->ie_il_count, 0); | ||
sem_init(&ie->ie_dc.sem, 0, 0); | ||
atomic_init(&ie->ie_linear_read, true); | ||
atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1); | ||
D_INIT_LIST_HEAD(&ie->ie_evict_entry); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
/** | ||
* (C) Copyright 2016-2023 Intel Corporation. | ||
* (C) Copyright 2016-2024 Intel Corporation. | ||
* | ||
* SPDX-License-Identifier: BSD-2-Clause-Patent | ||
*/ | ||
|
@@ -54,6 +54,12 @@ dfuse_show_flags(void *handle, unsigned int cap, unsigned int want) | |
#ifdef FUSE_CAP_EXPLICIT_INVAL_DATA | ||
SHOW_FLAG(handle, cap, want, FUSE_CAP_EXPLICIT_INVAL_DATA); | ||
#endif | ||
#ifdef FUSE_CAP_EXPIRE_ONLY | ||
SHOW_FLAG(handle, cap, want, FUSE_CAP_EXPIRE_ONLY); | ||
#endif | ||
#ifdef FUSE_CAP_SETXATTR_EXT | ||
SHOW_FLAG(handle, cap, want, FUSE_CAP_SETXATTR_EXT); | ||
#endif | ||
|
||
if (cap) | ||
DFUSE_TRA_WARNING(handle, "Unknown capability flags %#x", cap); | ||
|
@@ -90,6 +96,12 @@ dfuse_fuse_init(void *arg, struct fuse_conn_info *conn) | |
conn->want |= FUSE_CAP_READDIRPLUS; | ||
conn->want |= FUSE_CAP_READDIRPLUS_AUTO; | ||
|
||
#ifdef FUSE_CAP_EXPLICIT_INVAL_DATA | ||
/* DAOS-15338 Do not let the kernel evict data on mtime changes */ | ||
conn->want |= FUSE_CAP_EXPLICIT_INVAL_DATA; | ||
conn->want &= ~FUSE_CAP_AUTO_INVAL_DATA; | ||
#endif | ||
|
||
#ifdef FUSE_CAP_CACHE_SYMLINKS | ||
conn->want |= FUSE_CAP_CACHE_SYMLINKS; | ||
#endif | ||
|
@@ -164,6 +176,22 @@ df_ll_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi) | |
DFUSE_IE_STAT_ADD(inode, DS_GETATTR); | ||
} | ||
|
||
/* Check for stat-after-write/close. On close a stat is performed so the first getattr | ||
* call can use the result of that. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably dummy questions:
|
||
*/ | ||
if (inode->ie_dfs->dfc_attr_timeout) { | ||
double timeout; | ||
dfuse_ie_cs_flush(inode); | ||
|
||
if (dfuse_dc_cache_get_valid(inode, inode->ie_dfs->dfc_attr_timeout, &timeout)) { | ||
if (inode->ie_dc.valid) { | ||
DFUSE_IE_STAT_ADD(inode, DS_PRE_GETATTR); | ||
inode->ie_dc.valid = false; | ||
DFUSE_REPLY_ATTR_FORCE(inode, req, timeout); | ||
return; | ||
} | ||
} | ||
} | ||
DFUSE_IE_WFLUSH(inode); | ||
|
||
if (inode->ie_dfs->dfc_attr_timeout && | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
im not sure what "Data cache metadata" means
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the information used to determine if the data cache is to be considered valid - it's data about the data cache.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yea maybe reword to something like "metadata for the data-cache"
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can re-push with that, any other changes whilst I'm at it?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this is just a minor comment that does not require a repush. there is a bigger concern on the stat after close that is more fundamental and needs to be discuss more before any repush..