Skip to content

Commit

Permalink
DAOS-15041 bio: handle normal device hot remove
Browse files Browse the repository at this point in the history
When a normal device is hot removed before being marked as faulty,
the faulty reaction won't be performed, so that we need to close
opened blobs when teardown the device, at the same time, the closed
blobs need be opened on setup when the normal device is plugged
back (in which case reint won't be triggered).

Signed-off-by: Niu Yawei <[email protected]>
  • Loading branch information
NiuYawei committed Jan 19, 2024
1 parent 91b93c8 commit 049a93f
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 4 deletions.
2 changes: 2 additions & 0 deletions src/bio/bio_context.c
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ __bio_ioctxt_open(struct bio_io_context **pctxt, struct bio_xs_context *xs_ctxt,
D_INIT_LIST_HEAD(&ctxt->bic_link);
ctxt->bic_xs_ctxt = xs_ctxt;
uuid_copy(ctxt->bic_pool_id, uuid);
ctxt->bic_blob_id = SPDK_BLOBID_INVALID;

bxb = bio_xs_context2xs_blobstore(xs_ctxt, st);
D_ASSERT(bxb != NULL);
Expand Down Expand Up @@ -1005,6 +1006,7 @@ bio_blob_close(struct bio_io_context *ctxt, bool async)
ba->bca_inflights = 1;
bma->bma_ioc = ctxt;
bma->bma_async = async;
ctxt->bic_blob_id = spdk_blob_get_id(ctxt->bic_blob);
spdk_thread_send_msg(owner_thread(bbs), blob_msg_close, bma);

if (async)
Expand Down
2 changes: 2 additions & 0 deletions src/bio/bio_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ struct bio_xs_blobstore {
struct bio_blobstore *bxb_blobstore;
/* All I/O contexts for this xstream blobstore */
d_list_t bxb_io_ctxts;
bool bxb_ready;
};

/* Per-xstream NVMe context */
Expand All @@ -391,6 +392,7 @@ struct bio_xs_context {
struct bio_io_context {
d_list_t bic_link; /* link to bxb_io_ctxts */
struct spdk_blob *bic_blob;
spdk_blob_id bic_blob_id;
struct bio_xs_blobstore *bic_xs_blobstore;
struct bio_xs_context *bic_xs_ctxt;
uint32_t bic_inflight_dmas;
Expand Down
54 changes: 50 additions & 4 deletions src/bio/bio_recovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ on_faulty(struct bio_blobstore *bbs)
static void
teardown_xs_bs(void *arg)
{
struct bio_io_context *ioc;
int opened_blobs = 0;
struct bio_xs_blobstore *bxb = arg;

D_ASSERT(bxb != NULL);
Expand All @@ -72,8 +74,23 @@ teardown_xs_bs(void *arg)
if (bxb->bxb_io_channel == NULL)
return;

/* Blobs (VOS pools) should have been close on faulty reaction */
D_ASSERT(d_list_empty(&bxb->bxb_io_ctxts));
/* When a normal device is unplugged, the opened blobs need be closed here */
d_list_for_each_entry(ioc, &bxb->bxb_io_ctxts, bic_link) {
if (ioc->bic_blob == NULL && ioc->bic_opening == 0)
continue;

opened_blobs++;
if (ioc->bic_closing || ioc->bic_opening)
continue;

bio_blob_close(ioc, true);
}

if (opened_blobs) {
D_DEBUG(DB_MGMT, "blobstore:%p has %d opened blobs\n",
bxb->bxb_blobstore, opened_blobs);
return;
}

/* Put the io channel */
if (bxb->bxb_io_channel != NULL) {
Expand Down Expand Up @@ -158,6 +175,7 @@ on_teardown(struct bio_blobstore *bbs)
continue;

D_ASSERT(xs_ctxt->bxc_thread != NULL);
bxb->bxb_ready = false;
spdk_thread_send_msg(xs_ctxt->bxc_thread, teardown_xs_bs, bxb);
rc += 1;
}
Expand Down Expand Up @@ -199,8 +217,10 @@ on_teardown(struct bio_blobstore *bbs)
static void
setup_xs_bs(void *arg)
{
struct bio_io_context *ioc;
struct bio_xs_blobstore *bxb = arg;
struct bio_blobstore *bbs;
int closed_blobs = 0;

D_ASSERT(bxb != NULL);
if (!is_server_started()) {
Expand All @@ -223,9 +243,35 @@ setup_xs_bs(void *arg)
D_ERROR("Failed to create io channel for %p\n", bbs);
return;
}
/* Blobs (VOS pools) will be opened in reint reaction */
}

/* If reint will be tirggered later, blobs will be opened in reint reaction */
if (bbs->bb_dev->bb_trigger_reint) {
D_ASSERT(d_list_empty(&bxb->bxb_io_ctxts));
goto done;
}

/* Open all blobs when reint won't be tirggered */
d_list_for_each_entry(ioc, &bxb->bxb_io_ctxts, bic_link) {
if (ioc->bic_blob != NULL && !ioc->bic_closing)
continue;

closed_blobs += 1;
if (ioc->bic_opening || ioc->bic_closing)
continue;

D_ASSERT(ioc->bic_blob_id != SPDK_BLOBID_INVALID);
/* device type and flags will be ignored in bio_blob_open() */
bio_blob_open(ioc, true, 0, SMD_DEV_TYPE_MAX, ioc->bic_blob_id);
}

if (closed_blobs) {
D_DEBUG(DB_MGMT, "blobstore:%p has %d closed blobs\n",
bbs, closed_blobs);
return;
}
done:
bxb->bxb_ready = true;
}

static void
Expand Down Expand Up @@ -325,7 +371,7 @@ on_setup(struct bio_blobstore *bbs)
D_ASSERT(bxb != NULL);

/* Setup for the per-xsteam blobstore is done */
if (bxb->bxb_io_channel != NULL)
if (bxb->bxb_ready)
continue;

D_ASSERT(xs_ctxt->bxc_thread != NULL);
Expand Down

0 comments on commit 049a93f

Please sign in to comment.