Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DAOS-15338 dfuse: Configure fuse to allow writes to populate cache. #13927

Open
wants to merge 59 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 55 commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
05122bd
DAOS-15338 dfuse: add a test for read caching.
ashleypittman Mar 4, 2024
e8c52a1
Check for flag before logging it.
ashleypittman Mar 4, 2024
21449e7
Add a ftest for this.
ashleypittman Mar 5, 2024
5257102
Fix the test tags.
ashleypittman Mar 5, 2024
10dae00
Try loading dfuse before access.
ashleypittman Mar 5, 2024
20a5f62
Setup a container.
ashleypittman Mar 5, 2024
1ae23d5
Change the pool setup.
ashleypittman Mar 5, 2024
f9a75f4
Turn on dfuse debugging.
ashleypittman Mar 5, 2024
82b2d5e
Use a larger write size.
ashleypittman Mar 5, 2024
30f73a3
Skip-build-ubuntu20-rpm: true
ashleypittman Mar 5, 2024
8fe67e8
Debug the test.
ashleypittman Mar 5, 2024
6e657e0
Fix spelling.
ashleypittman Mar 5, 2024
ffc647e
Re-attach the data before parsing it.
ashleypittman Mar 5, 2024
43bad1f
check the number of reads.
ashleypittman Mar 8, 2024
1f9f676
Fix json key.
ashleypittman Mar 8, 2024
16af3f0
Make new code into a dfuse method.
ashleypittman Mar 8, 2024
28a0d45
Tidy up.
ashleypittman Mar 8, 2024
bf7628a
Remove date checks.
ashleypittman Mar 18, 2024
20a352c
Start adding back in the eviction code.
ashleypittman Mar 21, 2024
836953c
Further tweak build
ashleypittman Mar 25, 2024
5632d8d
Save my changes.
ashleypittman Mar 26, 2024
69539ae
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 4, 2024
8737908
Hotpatch Jenkinsfile.
ashleypittman Apr 4, 2024
d5f92ef
Enpty commit for testing.
ashleypittman Apr 5, 2024
a2044e2
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 9, 2024
64ce894
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 11, 2024
a46c404
Drop the data cache if metadata has changesd.
ashleypittman Apr 11, 2024
6b32a7e
Fix lint.
ashleypittman Apr 11, 2024
99c6a12
Silence a new warning.
ashleypittman Apr 11, 2024
faecce6
Back out NLT metadata changes.
ashleypittman Apr 11, 2024
1adef98
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 15, 2024
47368a1
Reply with the new inode data.
ashleypittman Apr 16, 2024
579c8c1
Only use the cached value once.
ashleypittman Apr 16, 2024
006271a
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 16, 2024
6fd51c0
Fir clang-format and run tests.
ashleypittman Apr 16, 2024
24d41d9
Run cachning test
ashleypittman Apr 18, 2024
a2130c2
Add debugging to test.
ashleypittman Apr 18, 2024
767bb43
Fix caching check test.
ashleypittman Apr 18, 2024
200ca13
Use the new fuse version.
ashleypittman Apr 18, 2024
bc21d4f
Bump timeouts.
ashleypittman Apr 18, 2024
2c49c4f
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 22, 2024
2a9a8a1
Do not run the complex rename test with caching.
ashleypittman Apr 22, 2024
8997763
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 23, 2024
845fc1f
Fix flake.
ashleypittman Apr 23, 2024
a48cc28
Minor changes.
ashleypittman Apr 23, 2024
d544a2d
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Apr 25, 2024
085b30a
Reset default cache settings.
ashleypittman Apr 25, 2024
e45d179
DAOS-15745 dfuse: Add the pre_read metrics whilst holding reference.
ashleypittman Apr 26, 2024
adc532f
Merge branch 'master' into amd/dfuse-read-cache and run with new libf…
ashleypittman Apr 29, 2024
8e9a03f
Tune down a debug message.
ashleypittman Apr 30, 2024
bd84ad4
Fix a bug where reads weren't going into cache.
ashleypittman Apr 30, 2024
f40a391
Fix flake and run more tests.
ashleypittman Apr 30, 2024
973ec34
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman May 2, 2024
1a48745
Tidy up test code.
ashleypittman May 2, 2024
7039849
Run with features.
ashleypittman May 2, 2024
52124db
Merge branch 'master' into amd/dfuse-read-cache
ashleypittman Sep 26, 2024
bce5f4e
Fix merge.
ashleypittman Sep 26, 2024
fc82b21
Bump test runtime.
ashleypittman Sep 30, 2024
288e2f6
Update spelling.
ashleypittman Sep 30, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 43 additions & 4 deletions src/client/dfuse/dfuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ struct dfuse_event {
union {
size_t de_req_len;
size_t de_readahead_len;
struct dfuse_info *de_di;
};
void (*de_complete_cb)(struct dfuse_event *ev);
struct stat de_attr;
Expand Down Expand Up @@ -456,6 +457,7 @@ struct dfuse_pool {
ACTION(CREATE) \
ACTION(MKNOD) \
ACTION(FGETATTR) \
ACTION(PRE_GETATTR) \
ACTION(GETATTR) \
ACTION(FSETATTR) \
ACTION(SETATTR) \
Expand Down Expand Up @@ -534,6 +536,9 @@ struct dfuse_cont {
double dfc_dentry_dir_timeout;
double dfc_ndentry_timeout;
double dfc_data_timeout;

double dfc_dentry_inval_time;

bool dfc_data_otoc;
bool dfc_direct_io_disable;
bool dfc_wb_cache;
Expand All @@ -550,9 +555,6 @@ struct dfuse_cont {
#define DFUSE_IE_STAT_ADD(_ie, _stat) \
atomic_fetch_add_relaxed(&(_ie)->ie_dfs->dfs_stat_value[(_stat)], 1)

void
dfuse_set_default_cont_cache_values(struct dfuse_cont *dfc);

/* Connect to a container via a label
* Called either for labels on the command line or via dfuse_cont_get_handle() if opening via uuid
*
Expand Down Expand Up @@ -748,7 +750,7 @@ dfuse_loop(struct dfuse_info *dfuse_info);
_Static_assert(IS_OH(_oh), "Param is not open handle"); \
(_oh)->doh_ie = NULL; \
__rc = fuse_reply_err(req, 0); \
if (__rc != 0) \
if ((__rc != 0) && (__rc != -ENOENT)) \
DS_ERROR(-__rc, "fuse_reply_err() error"); \
} while (0)

Expand Down Expand Up @@ -998,6 +1000,36 @@ struct dfuse_inode_entry {
/** File has been unlinked from daos */
bool ie_unlinked;

/* Data cache metadata, list known size/mtime for file, if these have been updated then
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im not sure what "Data cache metadata" means

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the information used to determine if the data cache is to be considered valid - it's data about the data cache.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yea maybe reword to something like "metadata for the data-cache"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can re-push with that, any other changes whilst I'm at it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is just a minor comment that does not require a repush. there is a bigger concern on the stat after close that is more fundamental and needs to be discuss more before any repush..

* the data cache should be dropped.
*
* When the last fd on a file is closed and all writes are completed then dfuse will launch
* a stat to get the update size/mtime for the inode. Future opens should block on this
* stat in order to know if the file has been updated.
*
* Access is controlled via atomics and semaphore, when a decision to make the stat is taken
* then active in increased, and the sem is posted.
*
* Future accesses of the inode should check active, if the value is 0 then there is nothing
* to do.
* If active is positive then it should increate active, wait on the semaphore, decrease
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* If active is positive then it should increate active, wait on the semaphore, decrease
* If active is positive then it should increase active, wait on the semaphore, decrease

* active and then post the semaphore if active != 0;
*
* After active is 0, (or the semaphore has been waited on) then the new stat structure is
* valid.
*
* The release() code to initialize stat is atomic as it's only triggered by the last
* release on a inode. It could race with open() where the inode is known in advance
* or create() where it is not. Open will flush the stat before setting keep_cache.
*/
struct {
struct stat stat;
bool valid;
ATOMIC uint32_t active;
struct timespec last_update;
sem_t sem;
} ie_dc;

/* Lock for writes, shared locks are held during write-back reads, exclusive lock is
* acquired and released to flush outstanding writes for getattr, close and forget.
*/
Expand All @@ -1013,6 +1045,8 @@ struct dfuse_inode_entry {
d_list_t ie_evict_entry;
};

void
dfuse_ie_cs_flush(struct dfuse_inode_entry *ie);
/* Flush write-back cache writes to a inode. It does this by waiting for and then releasing an
* exclusive lock on the inode. Writes take a shared lock so this will block until all pending
* writes are complete.
Expand Down Expand Up @@ -1126,6 +1160,11 @@ dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *tim
bool
dfuse_dentry_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout);

void
dfuse_dc_cache_set_time(struct dfuse_inode_entry *ie);
bool
dfuse_dc_cache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout);

/* inval.c */

int
Expand Down
64 changes: 63 additions & 1 deletion src/client/dfuse/dfuse_core.c
Original file line number Diff line number Diff line change
Expand Up @@ -752,7 +752,7 @@ dfuse_cont_get_cache(struct dfuse_info *dfuse_info, struct dfuse_cont *dfc)
* dentries which represent directories and are therefore referenced much
* more often during path-walk activities are set to five seconds.
*/
void
static void
dfuse_set_default_cont_cache_values(struct dfuse_cont *dfc)
{
dfc->dfc_attr_timeout = 1;
Expand Down Expand Up @@ -876,13 +876,29 @@ dfuse_cont_open(struct dfuse_info *dfuse_info, struct dfuse_pool *dfp, const cha
uuid_copy(dfc->dfc_uuid, c_info.ci_uuid);

if (dfuse_info->di_caching) {
double timeout;

rc = dfuse_cont_get_cache(dfuse_info, dfc);
if (rc == ENODATA) {
DFUSE_TRA_INFO(dfc, "Using default caching values");
dfuse_set_default_cont_cache_values(dfc);
} else if (rc != 0) {
D_GOTO(err_umount, rc);
}

/* Set the timeout for invalidate. This is the duration after which inodes
* will be evicted from the kernel. Data timeout is considered here as
* well as attrs are used to decide if keeping the data in cache is
* correct. Data timeout can be set to True/-1 so cap this duration at
* ten minutes or nothing would ever get evicted.
*/
timeout = max(dfc->dfc_attr_timeout, dfc->dfc_data_timeout);
Copy link
Contributor

@knard38 knard38 May 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have expected min instead of max for L895 and L899, but I am probably missing something.


timeout = min(timeout, 10 * 60);

timeout = max(timeout, dfc->dfc_dentry_timeout);

dfc->dfc_dentry_inval_time = timeout + 3;
Copy link
Contributor

@knard38 knard38 May 3, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why +3?
A named constant could help to understand the purpose of this addition.

}

rc = ival_add_cont_buckets(dfc);
Expand Down Expand Up @@ -1048,6 +1064,51 @@ dfuse_mcache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *tim
return use;
}

/* Set a timer to mark cache entry as valid */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minor: "timer"-->"time"?

void
dfuse_dc_cache_set_time(struct dfuse_inode_entry *ie)
{
struct timespec now;

clock_gettime(CLOCK_MONOTONIC_COARSE, &now);
ie->ie_dc.last_update = now;
}

bool
dfuse_dc_cache_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout)
{
bool use = false;
struct timespec now;
struct timespec left;
double time_left;

D_ASSERT(max_age != -1);
D_ASSERT(max_age >= 0);
Comment on lines +1085 to +1086
Copy link
Contributor

@wiliamhuang wiliamhuang May 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Minor issue, if "max_age >= 0", then the first assertion always is true. Do we need the first assertion?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

True, we don't. I've used this check elsewhere in the code though, for some cache types (data) then -1 is a valid cache age so the reason for having two checks here is that if it does fail we know why, was the cache incorrectly initialised/loaded in which case it'll be -1 or was a specific age incorrectly calculated in which case it's be a different negative value.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree. Thank you for your clarification!


if (ie->ie_dc.last_update.tv_sec == 0)
return false;

clock_gettime(CLOCK_MONOTONIC_COARSE, &now);

left.tv_sec = now.tv_sec - ie->ie_dc.last_update.tv_sec;
left.tv_nsec = now.tv_nsec - ie->ie_dc.last_update.tv_nsec;
if (left.tv_nsec < 0) {
left.tv_sec--;
left.tv_nsec += 1000000000;
}
time_left = max_age - (left.tv_sec + ((double)left.tv_nsec / 1000000000));
if (time_left > 0) {
use = true;

DFUSE_TRA_DEBUG(ie, "Allowing cache use, time remaining: %.1lf", time_left);

if (timeout)
*timeout = time_left;
}

return use;
}

bool
dfuse_dentry_get_valid(struct dfuse_inode_entry *ie, double max_age, double *timeout)
{
Expand Down Expand Up @@ -1251,6 +1312,7 @@ dfuse_ie_init(struct dfuse_info *dfuse_info, struct dfuse_inode_entry *ie)
atomic_init(&ie->ie_open_count, 0);
atomic_init(&ie->ie_open_write_count, 0);
atomic_init(&ie->ie_il_count, 0);
sem_init(&ie->ie_dc.sem, 0, 0);
atomic_init(&ie->ie_linear_read, true);
atomic_fetch_add_relaxed(&dfuse_info->di_inode_count, 1);
D_INIT_LIST_HEAD(&ie->ie_evict_entry);
Expand Down
30 changes: 29 additions & 1 deletion src/client/dfuse/dfuse_fuseops.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand Down Expand Up @@ -54,6 +54,12 @@ dfuse_show_flags(void *handle, unsigned int cap, unsigned int want)
#ifdef FUSE_CAP_EXPLICIT_INVAL_DATA
SHOW_FLAG(handle, cap, want, FUSE_CAP_EXPLICIT_INVAL_DATA);
#endif
#ifdef FUSE_CAP_EXPIRE_ONLY
SHOW_FLAG(handle, cap, want, FUSE_CAP_EXPIRE_ONLY);
#endif
#ifdef FUSE_CAP_SETXATTR_EXT
SHOW_FLAG(handle, cap, want, FUSE_CAP_SETXATTR_EXT);
#endif

if (cap)
DFUSE_TRA_WARNING(handle, "Unknown capability flags %#x", cap);
Expand Down Expand Up @@ -90,6 +96,12 @@ dfuse_fuse_init(void *arg, struct fuse_conn_info *conn)
conn->want |= FUSE_CAP_READDIRPLUS;
conn->want |= FUSE_CAP_READDIRPLUS_AUTO;

#ifdef FUSE_CAP_EXPLICIT_INVAL_DATA
/* DAOS-15338 Do not let the kernel evict data on mtime changes */
conn->want |= FUSE_CAP_EXPLICIT_INVAL_DATA;
conn->want &= ~FUSE_CAP_AUTO_INVAL_DATA;
#endif

#ifdef FUSE_CAP_CACHE_SYMLINKS
conn->want |= FUSE_CAP_CACHE_SYMLINKS;
#endif
Expand Down Expand Up @@ -164,6 +176,22 @@ df_ll_getattr(fuse_req_t req, fuse_ino_t ino, struct fuse_file_info *fi)
DFUSE_IE_STAT_ADD(inode, DS_GETATTR);
}

/* Check for stat-after-write/close. On close a stat is performed so the first getattr
* call can use the result of that.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably dummy questions:

  • why only the first getattr() can use the result ?
  • If after the close(), an open() is done before getattr() is called. It seems, that it will be able to use the cache. Is this use case could lead to some inconstancies ?

*/
if (inode->ie_dfs->dfc_attr_timeout) {
double timeout;
dfuse_ie_cs_flush(inode);

if (dfuse_dc_cache_get_valid(inode, inode->ie_dfs->dfc_attr_timeout, &timeout)) {
if (inode->ie_dc.valid) {
DFUSE_IE_STAT_ADD(inode, DS_PRE_GETATTR);
inode->ie_dc.valid = false;
DFUSE_REPLY_ATTR_FORCE(inode, req, timeout);
return;
}
}
}
DFUSE_IE_WFLUSH(inode);

if (inode->ie_dfs->dfc_attr_timeout &&
Expand Down
8 changes: 4 additions & 4 deletions src/client/dfuse/inval.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ ival_update_inode(struct dfuse_inode_entry *inode, double timeout)
if (S_ISDIR(inode->ie_stat.st_mode))
timeout += INVAL_DIRECTORY_GRACE;
else
timeout += INVAL_FILE_GRACE;
timeout = inode->ie_dfs->dfc_dentry_inval_time;

clock_gettime(CLOCK_MONOTONIC_COARSE, &now);

Expand Down Expand Up @@ -444,7 +444,7 @@ ival_bucket_dec_value(double timeout)
}

/* Ensure the correct buckets exist for a attached container. Pools have a zero dentry timeout
* so skip zero values
* so skip zero values.
*/
int
ival_add_cont_buckets(struct dfuse_cont *dfc)
Expand All @@ -457,7 +457,7 @@ ival_add_cont_buckets(struct dfuse_cont *dfc)
if (rc != 0)
goto out;
if (dfc->dfc_dentry_timeout != 0) {
rc = ival_bucket_add_value(dfc->dfc_dentry_timeout + INVAL_FILE_GRACE);
rc = ival_bucket_add_value(dfc->dfc_dentry_inval_time);
if (rc != 0)
ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE);
}
Expand All @@ -473,7 +473,7 @@ ival_dec_cont_buckets(struct dfuse_cont *dfc)
{
D_MUTEX_LOCK(&ival_lock);
if (dfc->dfc_dentry_timeout != 0)
ival_bucket_dec_value(dfc->dfc_dentry_timeout + INVAL_FILE_GRACE);
ival_bucket_dec_value(dfc->dfc_dentry_inval_time);
ival_bucket_dec_value(dfc->dfc_dentry_dir_timeout + INVAL_DIRECTORY_GRACE);
D_MUTEX_UNLOCK(&ival_lock);
}
Expand Down
22 changes: 18 additions & 4 deletions src/client/dfuse/ops/fgetattr.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* (C) Copyright 2016-2023 Intel Corporation.
* (C) Copyright 2016-2024 Intel Corporation.
*
* SPDX-License-Identifier: BSD-2-Clause-Patent
*/
Expand All @@ -10,14 +10,28 @@
static void
dfuse_cb_getattr_cb(struct dfuse_event *ev)
{
struct dfuse_inode_entry *ie = ev->de_ie;

if (ev->de_ev.ev_error != 0) {
DFUSE_REPLY_ERR_RAW(ev->de_ie, ev->de_req, ev->de_ev.ev_error);
DFUSE_REPLY_ERR_RAW(ie, ev->de_req, ev->de_ev.ev_error);
D_GOTO(release, 0);
}

ev->de_attr.st_ino = ev->de_ie->ie_stat.st_ino;
ev->de_attr.st_ino = ie->ie_stat.st_ino;

ie->ie_stat = ev->de_attr;

ev->de_ie->ie_stat = ev->de_attr;
if ((ie->ie_dfs->dfc_data_timeout != 0) &&
(dfuse_dcache_get_valid(ie, ie->ie_dfs->dfc_data_timeout))) {
/* This tries to match the code in fuse_change_attributes in fs/fuse/inode.c,
* if the mtime or the size has changed then drop the data cache.
*/
if ((ie->ie_stat.st_size != ie->ie_dc.stat.st_size) ||
(d_timediff_ns(&ie->ie_stat.st_mtim, &ie->ie_dc.stat.st_mtim) != 0)) {
DFUSE_TRA_DEBUG(ie, "Invalidating data cache");
dfuse_dcache_evict(ie);
}
}

DFUSE_REPLY_ATTR(ev->de_ie, ev->de_req, &ev->de_attr);
release:
Expand Down
Loading
Loading