From dd872387810766a79abdcab19d12e155c00ae7b0 Mon Sep 17 00:00:00 2001 From: Kirill Pleshivtsev Date: Mon, 10 Feb 2025 19:37:44 +0700 Subject: [PATCH] Write critical event when mismatch happened --- .../libs/diagnostics/critical_events.h | 3 +- cloud/blockstore/libs/nbd/server_handler.cpp | 1 + .../libs/service/aligned_device_handler.cpp | 8 +++-- .../libs/service/aligned_device_handler.h | 1 + .../libs/service/checksum_storage_wrapper.cpp | 18 ++++++++-- .../libs/service/checksum_storage_wrapper.h | 2 +- .../libs/service/device_handler.cpp | 3 ++ .../blockstore/libs/service/device_handler.h | 1 + .../libs/service/device_handler_ut.cpp | 34 ++++++++++++++++++- .../libs/service/unaligned_device_handler.cpp | 2 ++ .../libs/service/unaligned_device_handler.h | 1 + cloud/blockstore/libs/service/ut/ya.make | 4 +++ cloud/blockstore/libs/vhost/server.cpp | 1 + 13 files changed, 70 insertions(+), 9 deletions(-) diff --git a/cloud/blockstore/libs/diagnostics/critical_events.h b/cloud/blockstore/libs/diagnostics/critical_events.h index 8d1c6bd8bf7..3104cf784a7 100644 --- a/cloud/blockstore/libs/diagnostics/critical_events.h +++ b/cloud/blockstore/libs/diagnostics/critical_events.h @@ -33,6 +33,8 @@ namespace NCloud::NBlockStore { xxx(MirroredDiskDeviceReplacementRateLimitExceeded) \ xxx(MirroredDiskMinorityChecksumMismatch) \ xxx(MirroredDiskMajorityChecksumMismatch) \ + xxx(MirroredDiskChecksumMismatchUponRead) \ + xxx(MirroredDiskChecksumMismatchUponWrite) \ xxx(CounterUpdateRace) \ xxx(EndpointStartingError) \ xxx(ResyncFailed) \ @@ -64,7 +66,6 @@ namespace NCloud::NBlockStore { xxx(DiskRegistryPurgeHostError) \ xxx(DiskRegistryCleanupAgentConfigError) \ xxx(DiskRegistryOccupiedDeviceConfigurationHasChanged) \ - xxx(MirroredDiskChecksumMismatchUponRead) \ xxx(DiskRegistryWrongMigratedDeviceOwnership) \ xxx(DiskRegistryInitialAgentRejectionThresholdExceeded) \ // BLOCKSTORE_CRITICAL_EVENTS diff --git a/cloud/blockstore/libs/nbd/server_handler.cpp b/cloud/blockstore/libs/nbd/server_handler.cpp index 924578179e3..aac858177b7 100644 --- a/cloud/blockstore/libs/nbd/server_handler.cpp +++ b/cloud/blockstore/libs/nbd/server_handler.cpp @@ -951,6 +951,7 @@ IServerHandlerFactoryPtr CreateServerHandlerFactory( { auto deviceHandler = deviceHandlerFactory->CreateDeviceHandler( std::move(storage), + options.DiskId, options.ClientId, options.BlockSize, options.UnalignedRequestsDisabled, diff --git a/cloud/blockstore/libs/service/aligned_device_handler.cpp b/cloud/blockstore/libs/service/aligned_device_handler.cpp index acbf0aa6852..5a4ffa2ccc2 100644 --- a/cloud/blockstore/libs/service/aligned_device_handler.cpp +++ b/cloud/blockstore/libs/service/aligned_device_handler.cpp @@ -131,14 +131,16 @@ TBlocksInfo TBlocksInfo::MakeAligned() const TAlignedDeviceHandler::TAlignedDeviceHandler( IStoragePtr storage, + TString diskId, TString clientId, ui32 blockSize, ui32 maxSubRequestSize, bool checkBufferModificationDuringWriting) : Storage( - checkBufferModificationDuringWriting - ? CreateChecksumStorageWrapper(std::move(storage)) - : std::move(storage)) + checkBufferModificationDuringWriting ? CreateChecksumStorageWrapper( + std::move(storage), + std::move(diskId)) + : std::move(storage)) , ClientId(std::move(clientId)) , BlockSize(blockSize) , MaxBlockCount(maxSubRequestSize / BlockSize) diff --git a/cloud/blockstore/libs/service/aligned_device_handler.h b/cloud/blockstore/libs/service/aligned_device_handler.h index d0b8d8f3edb..8763b55f554 100644 --- a/cloud/blockstore/libs/service/aligned_device_handler.h +++ b/cloud/blockstore/libs/service/aligned_device_handler.h @@ -55,6 +55,7 @@ class TAlignedDeviceHandler final public: TAlignedDeviceHandler( IStoragePtr storage, + TString diskId, TString clientId, ui32 blockSize, ui32 maxSubRequestSize, diff --git a/cloud/blockstore/libs/service/checksum_storage_wrapper.cpp b/cloud/blockstore/libs/service/checksum_storage_wrapper.cpp index b0d9919d14f..029c6de6166 100644 --- a/cloud/blockstore/libs/service/checksum_storage_wrapper.cpp +++ b/cloud/blockstore/libs/service/checksum_storage_wrapper.cpp @@ -3,7 +3,9 @@ #include "storage.h" #include +#include #include +#include #include #include #include @@ -64,10 +66,12 @@ class TChecksumStorageWrapper final , public IStorage { const IStoragePtr Storage; + const TString DiskId; public: - explicit TChecksumStorageWrapper(IStoragePtr storage) + TChecksumStorageWrapper(IStoragePtr storage, TString diskId) : Storage(std::move(storage)) + , DiskId(std::move(diskId)) {} TFuture ZeroBlocks( @@ -165,6 +169,12 @@ TChecksumStorageWrapper::RetryWriteBlocksLocal( TCallContextPtr callContext, std::shared_ptr request) { + const auto range = TBlockRange64::WithLength( + request->GetStartIndex(), + request->BlocksCount); + ReportMirroredDiskChecksumMismatchUponWrite( + TStringBuilder() << "d:" << DiskId << ", r:" << range); + auto guard = request->Sglist.Acquire(); if (!guard) { return MakeFuture( @@ -190,9 +200,11 @@ TChecksumStorageWrapper::RetryWriteBlocksLocal( //////////////////////////////////////////////////////////////////////////////// -IStoragePtr CreateChecksumStorageWrapper(IStoragePtr storage) +IStoragePtr CreateChecksumStorageWrapper(IStoragePtr storage, TString diskId) { - return std::make_shared(std::move(storage)); + return std::make_shared( + std::move(storage), + std::move(diskId)); } } // namespace NCloud::NBlockStore diff --git a/cloud/blockstore/libs/service/checksum_storage_wrapper.h b/cloud/blockstore/libs/service/checksum_storage_wrapper.h index 778805158d9..3e9f79be476 100644 --- a/cloud/blockstore/libs/service/checksum_storage_wrapper.h +++ b/cloud/blockstore/libs/service/checksum_storage_wrapper.h @@ -6,7 +6,7 @@ namespace NCloud::NBlockStore { //////////////////////////////////////////////////////////////////////////////// -IStoragePtr CreateChecksumStorageWrapper(IStoragePtr storage); +IStoragePtr CreateChecksumStorageWrapper(IStoragePtr storage, TString diskId); //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/blockstore/libs/service/device_handler.cpp b/cloud/blockstore/libs/service/device_handler.cpp index 54e5963015d..4cbc8dab840 100644 --- a/cloud/blockstore/libs/service/device_handler.cpp +++ b/cloud/blockstore/libs/service/device_handler.cpp @@ -30,6 +30,7 @@ struct TDefaultDeviceHandlerFactory final IDeviceHandlerPtr CreateDeviceHandler( IStoragePtr storage, + TString diskId, TString clientId, ui32 blockSize, bool unalignedRequestsDisabled, @@ -38,6 +39,7 @@ struct TDefaultDeviceHandlerFactory final if (unalignedRequestsDisabled) { return std::make_shared( std::move(storage), + std::move(diskId), std::move(clientId), blockSize, MaxSubRequestSize, @@ -46,6 +48,7 @@ struct TDefaultDeviceHandlerFactory final return std::make_shared( std::move(storage), + std::move(diskId), std::move(clientId), blockSize, MaxSubRequestSize, diff --git a/cloud/blockstore/libs/service/device_handler.h b/cloud/blockstore/libs/service/device_handler.h index 47a495a709f..5df0b5caf20 100644 --- a/cloud/blockstore/libs/service/device_handler.h +++ b/cloud/blockstore/libs/service/device_handler.h @@ -43,6 +43,7 @@ struct IDeviceHandlerFactory virtual IDeviceHandlerPtr CreateDeviceHandler( IStoragePtr storage, + TString diskId, TString clientId, ui32 blockSize, bool unalignedRequestsDisabled, diff --git a/cloud/blockstore/libs/service/device_handler_ut.cpp b/cloud/blockstore/libs/service/device_handler_ut.cpp index f1f31a8030e..887acd11d6d 100644 --- a/cloud/blockstore/libs/service/device_handler_ut.cpp +++ b/cloud/blockstore/libs/service/device_handler_ut.cpp @@ -4,10 +4,12 @@ #include #include #include - #include #include +#include +#include +#include #include #include @@ -20,6 +22,15 @@ namespace { //////////////////////////////////////////////////////////////////////////////// +auto SetupCriticalEvents() +{ + NMonitoring::TDynamicCountersPtr counters = + new NMonitoring::TDynamicCounters(); + InitCriticalEventsCounter(counters); + return counters; +} + +//////////////////////////////////////////////////////////////////////////////// class TTestEnvironment { private: @@ -126,6 +137,7 @@ class TTestEnvironment auto factory = CreateDeviceHandlerFactory(maxBlockCount * BlockSize); DeviceHandler = factory->CreateDeviceHandler( std::move(testStorage), + "disk1", "testClientId", BlockSize, unalignedRequestsDisabled, // unalignedRequestsDisabled, @@ -330,6 +342,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) Y_UNIT_TEST(ShouldSliceHugeZeroRequest) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; const ui64 deviceBlocksCount = 8*1024; @@ -340,6 +353,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto factory = CreateDeviceHandlerFactory(blocksCountLimit * blockSize); auto deviceHandler = factory->CreateDeviceHandler( storage, + diskId, clientId, blockSize, false, // unalignedRequestsDisabled, @@ -397,6 +411,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) Y_UNIT_TEST(ShouldHandleAlignedRequestsWhenUnalignedRequestsDisabled) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; @@ -404,6 +419,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto device = CreateDefaultDeviceHandlerFactory()->CreateDeviceHandler( storage, + diskId, clientId, blockSize, true, // unalignedRequestsDisabled, @@ -485,6 +501,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) Y_UNIT_TEST(ShouldNotHandleUnalignedRequestsWhenUnalignedRequestsDisabled) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; @@ -492,6 +509,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto device = CreateDefaultDeviceHandlerFactory()->CreateDeviceHandler( storage, + diskId, clientId, blockSize, true, // unalignedRequestsDisabled, @@ -594,6 +612,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) void DoShouldSliceHugeZeroRequest(bool requestUnaligned, bool unalignedRequestDisabled) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; const ui64 deviceBlocksCount = 12; @@ -607,6 +626,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto factory = CreateDeviceHandlerFactory(blocksCountLimit * blockSize); auto deviceHandler = factory->CreateDeviceHandler( storage, + diskId, clientId, blockSize, unalignedRequestDisabled, // unalignedRequestsDisabled, @@ -716,6 +736,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) Y_UNIT_TEST(ShouldReturnErrorForHugeUnalignedReadWriteRequests) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; @@ -724,6 +745,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto deviceHandler = CreateDefaultDeviceHandlerFactory()->CreateDeviceHandler( storage, + diskId, clientId, blockSize, false, // unalignedRequestsDisabled, @@ -782,6 +804,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) Y_UNIT_TEST(ShouldReturnErrorForInvalidBufferSize) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; @@ -790,6 +813,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto deviceHandler = CreateDefaultDeviceHandlerFactory()->CreateDeviceHandler( storage, + diskId, clientId, blockSize, false, // unalignedRequestsDisabled, @@ -905,6 +929,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) Y_UNIT_TEST(ShouldCopyBufferWhenClientModifyBuffer) { + const auto diskId = "disk1"; const auto clientId = "testClientId"; const ui32 blockSize = DefaultBlockSize; const ui64 deviceBlocksCount = 8*1024; @@ -915,6 +940,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) auto factory = CreateDeviceHandlerFactory(blocksCountLimit * blockSize); auto deviceHandler = factory->CreateDeviceHandler( storage, + diskId, clientId, blockSize, false, // unalignedRequestsDisabled, @@ -945,6 +971,11 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) return MakeFuture(); }; + auto counters = SetupCriticalEvents(); + auto mirroredDiskChecksumMismatchUponWrite = counters->GetCounter( + "AppCriticalEvents/MirroredDiskChecksumMismatchUponWrite", + true); + auto future = deviceHandler->Write( MakeIntrusive(), 0, @@ -954,6 +985,7 @@ Y_UNIT_TEST_SUITE(TDeviceHandlerTest) const auto& response = future.GetValue(TDuration::Seconds(5)); UNIT_ASSERT(!HasError(response)); UNIT_ASSERT_VALUES_EQUAL(2, writeAttempts); + UNIT_ASSERT_VALUES_EQUAL(1, mirroredDiskChecksumMismatchUponWrite->Val()); } } diff --git a/cloud/blockstore/libs/service/unaligned_device_handler.cpp b/cloud/blockstore/libs/service/unaligned_device_handler.cpp index d67bca41211..ca2b29c8ff3 100644 --- a/cloud/blockstore/libs/service/unaligned_device_handler.cpp +++ b/cloud/blockstore/libs/service/unaligned_device_handler.cpp @@ -468,6 +468,7 @@ TZeroRequest::TResponseFuture TZeroRequest::ModifyAndWrite() TUnalignedDeviceHandler::TUnalignedDeviceHandler( IStoragePtr storage, + TString diskId, TString clientId, ui32 blockSize, ui32 maxSubRequestSize, @@ -475,6 +476,7 @@ TUnalignedDeviceHandler::TUnalignedDeviceHandler( bool checkBufferModificationDuringWriting) : Backend(std::make_shared( std::move(storage), + std::move(diskId), std::move(clientId), blockSize, maxSubRequestSize, diff --git a/cloud/blockstore/libs/service/unaligned_device_handler.h b/cloud/blockstore/libs/service/unaligned_device_handler.h index 7be97c1aae3..af476fa7fad 100644 --- a/cloud/blockstore/libs/service/unaligned_device_handler.h +++ b/cloud/blockstore/libs/service/unaligned_device_handler.h @@ -43,6 +43,7 @@ class TUnalignedDeviceHandler final public: TUnalignedDeviceHandler( IStoragePtr storage, + TString diskId, TString clientId, ui32 blockSize, ui32 maxSubRequestSize, diff --git a/cloud/blockstore/libs/service/ut/ya.make b/cloud/blockstore/libs/service/ut/ya.make index e3626454ea2..0a8a9af7447 100644 --- a/cloud/blockstore/libs/service/ut/ya.make +++ b/cloud/blockstore/libs/service/ut/ya.make @@ -8,4 +8,8 @@ SRCS( storage_ut.cpp ) +PEERDIR( + cloud/blockstore/libs/diagnostics +) + END() diff --git a/cloud/blockstore/libs/vhost/server.cpp b/cloud/blockstore/libs/vhost/server.cpp index 0f96dbe0547..8190badfcfd 100644 --- a/cloud/blockstore/libs/vhost/server.cpp +++ b/cloud/blockstore/libs/vhost/server.cpp @@ -419,6 +419,7 @@ class TExecutor final { auto deviceHandler = AppCtx.DeviceHandlerFactory->CreateDeviceHandler( std::move(storage), + options.DiskId, options.ClientId, options.BlockSize, options.UnalignedRequestsDisabled,