diff --git a/.github/actions/nebius_cli/action.yaml b/.github/actions/nebius_cli/action.yaml index 6eb1e3ecc70..47ecedf1103 100644 --- a/.github/actions/nebius_cli/action.yaml +++ b/.github/actions/nebius_cli/action.yaml @@ -23,7 +23,7 @@ runs: - name: install nebius cli shell: bash run: | - curl -sSL https://storage.ai.nebius.cloud/nebius/install.sh | bash + curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash - name: configure nebius cli shell: bash run: | diff --git a/.github/actions/nebius_threads_calculator/action.yaml b/.github/actions/nebius_threads_calculator/action.yaml index 904a7ea7db9..f736ddb5de2 100644 --- a/.github/actions/nebius_threads_calculator/action.yaml +++ b/.github/actions/nebius_threads_calculator/action.yaml @@ -34,19 +34,19 @@ runs: if [[ $tests_size == *"large"* ]]; then case "$vm_preset" in "80vcpu-320gb") - test_threads=20 + test_threads=16 build_threads=80 ;; "64vcpu-256gb") - test_threads=18 + test_threads=14 build_threads=64 ;; "48vcpu-192gb") - test_threads=16 + test_threads=12 build_threads=48 ;; "32vcpu-128gb") - test_threads=12 + test_threads=10 build_threads=32 ;; "16vcpu-64gb") @@ -73,11 +73,11 @@ runs: else case "$vm_preset" in "80vcpu-320gb") - test_threads=32 + test_threads=24 build_threads=80 ;; "64vcpu-256gb") - test_threads=24 + test_threads=20 build_threads=64 ;; "48vcpu-192gb") diff --git a/.github/actions/test/action.yaml b/.github/actions/test/action.yaml index ffdc94cd5d6..d9c7a5acb2f 100644 --- a/.github/actions/test/action.yaml +++ b/.github/actions/test/action.yaml @@ -56,6 +56,10 @@ inputs: required: false default: 'no' description: 'Use nebius' + truncate_enabled: + required: false + default: 'yes' + description: 'Truncate err files' runs: using: composite @@ -90,6 +94,9 @@ runs: echo "Cleaning ya dir" rm -rf /home/github/.ya/ fi + # checking /etc/hosts to see if vm was created correctly + cat /etc/hosts + - name: ya test shell: bash --noprofile --norc -eo pipefail -x {0} run: | @@ -280,8 +287,21 @@ runs: find "$TESTS_DATA_DIR" -type f -print0 | xargs -0 -n 10 file -i | grep "application/x-executable" | awk -F: '{print $1}' | xargs rm echo "::endgroup::" echo "::group::remove-images-from-tests-data-dir" - find "$TESTS_DATA_DIR" -name generated_raw_image -o -name generated_vmdk_image -o -name invalid_qcow2_image -o -name qcow2_fuzzing_image - find "$TESTS_DATA_DIR" \( -name generated_raw_image -o -name generated_vmdk_image -o -name invalid_qcow2_image -o -name qcow2_fuzzing_image \) -delete + find "$TESTS_DATA_DIR" -name generated_raw_image -o -name generated_vmdk_image -o -name invalid_qcow2_image -o -name qcow2_fuzzing_image -o -name NVMENBS01 -o -name generated_other_big_raw_image + find "$TESTS_DATA_DIR" \( -name generated_raw_image -o -name generated_vmdk_image -o -name invalid_qcow2_image -o -name qcow2_fuzzing_image -o -name NVMENBS01 -o -name generated_other_big_raw_image \) -delete + echo "::endgroup::" + echo "::group::truncate-err-files" + find "$TESTS_DATA_DIR" -type f -name "*.err" -size +1G -print0 | while IFS= read -r -d '' file; do + orig_size=$(du -h "$file" | cut -f1) + echo "$file - $orig_size" + # shellcheck disable=SC2193 + if [ "${{ inputs.truncate_enabled }}" == "yes" ]; then + truncate -s 1G "$file" + echo "... truncated (original size was $orig_size) ..." >> "$file" + else + echo "not truncated" + fi + done echo "::endgroup::" echo "::group::s3-sync" if [ "$SYNC_TO_S3" = "true" ]; diff --git a/.github/config/muted_ya_nebius.txt b/.github/config/muted_ya_nebius.txt index e69de29bb2d..fd08150592c 100644 --- a/.github/config/muted_ya_nebius.txt +++ b/.github/config/muted_ya_nebius.txt @@ -0,0 +1,2 @@ +cloud/filestore/tests/fio_index/mount-kikimr-test * +cloud/filestore/tests/fio_index/mount-local-test * diff --git a/.github/scripts/github-runner.sh b/.github/scripts/github-runner.sh index 064e4f50581..2a31b1b3008 100644 --- a/.github/scripts/github-runner.sh +++ b/.github/scripts/github-runner.sh @@ -92,6 +92,9 @@ sudo usermod -a -G docker "${USER_TO_CREATE}" sudo echo "${USER_TO_CREATE} ALL=(ALL) NOPASSWD:ALL" | sudo tee "/etc/sudoers.d/99-${USER_TO_CREATE}" > /dev/null sudo chmod 0440 "/etc/sudoers.d/99-${USER_TO_CREATE}" +# increase the total number of aio requests to run more tests in parallel, default is 65536 +echo "fs.aio-max-nr=1048576" >> /etc/sysctl.conf + if [ -n "$GITHUB_TOKEN" ] && [ -n "$ORG" ] && [ -n "$TEAM" ]; then export LOGINS_FILE export KEYS_FILE diff --git a/.github/scripts/nebius-manage-vm.py b/.github/scripts/nebius-manage-vm.py index d5085507e60..8205d7d415a 100644 --- a/.github/scripts/nebius-manage-vm.py +++ b/.github/scripts/nebius-manage-vm.py @@ -374,7 +374,7 @@ def wrapper(sdk: SDK, args: argparse.Namespace) -> callable: logger.info("Next run will be at %s", time.ctime(next_run_time)) while ( time.time() < next_run_time - and time.time() - start_time < total_time_limit + and time.time() - start_time < total_time_limit # noqa: W503 ): time.sleep(1) except Exception as e: diff --git a/.github/scripts/shell-extractor.py b/.github/scripts/shell-extractor.py index b4ff1d1747c..1c061151f5d 100644 --- a/.github/scripts/shell-extractor.py +++ b/.github/scripts/shell-extractor.py @@ -106,6 +106,8 @@ def parse_command_blocks(run_content): def write_runs_to_files(runs, output_dir, prefix): """ + Write run commands to files. + Write each run command to a .sh file with template {action name}-{index inside of action file} in the given output_dir. diff --git a/.github/scripts/tests/generate-summary.py b/.github/scripts/tests/generate-summary.py index 10c43da3fb6..079c4ec2a09 100755 --- a/.github/scripts/tests/generate-summary.py +++ b/.github/scripts/tests/generate-summary.py @@ -2,7 +2,6 @@ import argparse import dataclasses import os -import re import json import sys from github import Github, Auth as GithubAuth @@ -360,20 +359,21 @@ def update_pr_comment( test_history_url: str, is_dry_run: bool, ): - header = f"" - header_re = re.compile(header.format(r"(\d+)")) + header = f"" body = None + comment = None for c in pr.get_issue_comments(): - if matches := header_re.match(c.body): - if int(matches[1]) == run_number: - body = [c.body, "", "---", ""] + if c.body.startswith(header): + print(f"Found comment with id={c.id}") + comment = c + body = [c.body] + break if body is None: - body = [ - header.format(run_number), - ] + body = [header] + if is_dry_run: body.extend( [ @@ -389,12 +389,19 @@ def update_pr_comment( "", ] ) + else: + body.extend(["", ""]) body.extend(get_comment_text(pr, summary, build_preset, test_history_url)) body = "\n".join(body) - pr.create_issue_comment(body) + if comment is None: + print("Creating new comment") + pr.create_issue_comment(body) + else: + print("Updating existing comment") + comment.edit(body) def main(): diff --git a/.github/workflows/build_and_test_on_demand.yaml b/.github/workflows/build_and_test_on_demand.yaml index 722b66ba7b6..ddb3ff92c5e 100644 --- a/.github/workflows/build_and_test_on_demand.yaml +++ b/.github/workflows/build_and_test_on_demand.yaml @@ -253,6 +253,7 @@ jobs: clean_ya_dir: ${{ github.event == 'workflow_dispatch' && 'no' || inputs.clean_ya_dir }} use_network_cache: ${{ github.event == 'workflow_dispatch' && 'yes'|| inputs.use_network_cache }} nebius: ${{ needs.provide-runner.outputs.nebius }} + truncate_enabled: ${{ contains(github.event.pull_request.labels.*.name, 'disable_truncate') && 'no' || 'yes' }} secrets: inherit sleep-if-needed: diff --git a/.github/workflows/build_and_test_ya.yaml b/.github/workflows/build_and_test_ya.yaml index 02d6cacee32..c037f1ab3d9 100644 --- a/.github/workflows/build_and_test_ya.yaml +++ b/.github/workflows/build_and_test_ya.yaml @@ -86,6 +86,10 @@ on: type: string default: "no" description: "Run on nebius runners" + truncate_enabled: + type: string + default: "yes" + description: "Truncate enabled" outputs: sleep_after_tests: description: "sleep_after_tests" @@ -166,7 +170,7 @@ jobs: clean_ya_dir: ${{ inputs.run_build && 'no' || inputs.clean_ya_dir }} use_network_cache: ${{ inputs.use_network_cache }} nebius: ${{ inputs.nebius }} - + truncate_enabled: ${{ inputs.truncate_enabled }} - id: failure name: set sleep_after_tests in case of failure if: failure() diff --git a/.github/workflows/create_and_delete_vm.yaml b/.github/workflows/create_and_delete_vm.yaml index f0010dabe8e..75358da17b1 100644 --- a/.github/workflows/create_and_delete_vm.yaml +++ b/.github/workflows/create_and_delete_vm.yaml @@ -179,6 +179,7 @@ jobs: clean_ya_dir: "no" use_network_cache: "yes" nebius: ${{ needs.provide-runner.outputs.nebius }} + truncate_enabled: ${{ contains(github.event.pull_request.labels.*.name, 'disable_truncate') && 'no' || 'yes' }} secrets: inherit release-runner: diff --git a/.github/workflows/nightly.yaml b/.github/workflows/nightly.yaml index ef27c53491c..198b11f97a8 100644 --- a/.github/workflows/nightly.yaml +++ b/.github/workflows/nightly.yaml @@ -105,6 +105,6 @@ jobs: test_threads: 32 disk_type: 'network-ssd-nonreplicated' use_network_cache: "yes" - upload_ya_dir: "yes" + upload_ya_dir: "no" clean_ya_dir: "yes" nebius: ${{ vars.GLOBAL_RUN_ON_NEBIUS }} diff --git a/.github/workflows/packer.yaml b/.github/workflows/packer.yaml index 73da7d20318..3837ad8d559 100644 --- a/.github/workflows/packer.yaml +++ b/.github/workflows/packer.yaml @@ -157,16 +157,16 @@ jobs: - name: Set up Nebius CLI if: inputs.nebius == 'yes' run: | - curl -sSL https://storage.ai.nebius.cloud/nebius/install.sh | bash + curl -sSL https://storage.eu-north1.nebius.cloud/cli/install.sh | bash cat < sa.json ${sa_json} EOF - jq -r '."subject-credentials"."private-key"' sa.json> private.pem + jq -r '."subject-credentials"."private-key"' sa.json > private.pem echo "::add-mask::$(jq -r '."subject-credentials"."kid"' sa.json)" public_key_id=$(jq -r '."subject-credentials"."kid"' sa.json) echo "::add-mask::$(jq -r '."subject-credentials"."iss"' sa.json)" - service_account_id=$(jq -r '."subject-credentials"."iss" sa.json') + service_account_id=$(jq -r '."subject-credentials"."iss"' sa.json) echo "::add-mask::tenant-e00en3r863f7me6wtd" nebius profile create --endpoint api.eu-north1.nebius.cloud \ --profile nbs-github-user-sa \ diff --git a/GITHUB.md b/GITHUB.md index a718159f5c7..681d23685ef 100644 --- a/GITHUB.md +++ b/GITHUB.md @@ -10,6 +10,7 @@ There is also a list of labels that slightly alters how and which tests are run: 4. `asan` or `tsan` to add address sanitizer or thread sanitizer builds on top of the regular build. 5. `recheck` trigger checks without commit. Removed automatically after launch. 6. `allow-downgrade` allows to downgrade VM preset dynamically in case of problems with resources +7. `disable_truncate`, by default, .err files are truncated to 1GiB, this disables that for this PR Also, you can launch [ya make](https://github.com/ydb-platform/nbs/actions/workflows/build_and_test_on_demand.yaml) or [cmake](https://github.com/ydb-platform/nbs/actions/workflows/build_and_test_on_demand_cmake.yaml) builds on your branch with any timeout you want (but please do not do more than 12 hours, VMs are expensive). You can find the IP of the VM inside of the jobs. The first occurrence happens in the `Prepare runner` job in the `Configure NCP` step, later IP is set in the header of jobs. You must use your GitHub key for it. User `github`. Feature not available for non-members. diff --git a/VSCODE.md b/VSCODE.md index 4de4ef95265..08032721aae 100644 --- a/VSCODE.md +++ b/VSCODE.md @@ -33,3 +33,16 @@ Enable git hooks for pre-commit checks ``` git config core.hooksPath .githooks ``` + +### Debugging + +If you want to use debugging in VS Code you should to enable the static linkage. + +Add section below to '~/.ya/ya.conf' +[[target_platform]] +platform_name = "default-linux-x86_64" +build_type = "relwithdebinfo" +#build_type = "release" + +[target_platform.flags] +FORCE_STATIC_LINKING="yes" diff --git a/cloud/blockstore/apps/server/ya.make b/cloud/blockstore/apps/server/ya.make index 9b0d0cca3b7..aceb01cf95d 100644 --- a/cloud/blockstore/apps/server/ya.make +++ b/cloud/blockstore/apps/server/ya.make @@ -24,7 +24,7 @@ PEERDIR( library/cpp/getopt ) -IF (BUILD_TYPE != "PROFILE" AND BUILD_TYPE != "DEBUG") +IF (BUILD_TYPE != "PROFILE" AND BUILD_TYPE != "DEBUG" AND BUILD_TYPE != "RELWITHDEBINFO") SPLIT_DWARF() ENDIF() diff --git a/cloud/blockstore/config/diagnostics.proto b/cloud/blockstore/config/diagnostics.proto index cee11ee8635..47a4d984941 100644 --- a/cloud/blockstore/config/diagnostics.proto +++ b/cloud/blockstore/config/diagnostics.proto @@ -4,6 +4,7 @@ package NCloud.NBlockStore.NProto; option go_package = "github.com/ydb-platform/nbs/cloud/blockstore/config"; +import "cloud/storage/core/protos/diagnostics.proto"; import "cloud/storage/core/protos/trace.proto"; //////////////////////////////////////////////////////////////////////////////// @@ -219,4 +220,7 @@ message TDiagnosticsConfig // Performance measurements coefficients for local HDD disks. optional TVolumePerfSettings LocalHDDPerfSettings = 51; + + // Type of fetching CPU stats + optional NCloud.NProto.EStatsFetcherType StatsFetcherType = 52; } diff --git a/cloud/blockstore/config/disk.proto b/cloud/blockstore/config/disk.proto index ed488ec2c3d..cc290743eae 100644 --- a/cloud/blockstore/config/disk.proto +++ b/cloud/blockstore/config/disk.proto @@ -150,6 +150,22 @@ message TStorageDiscoveryConfig //////////////////////////////////////////////////////////////////////////////// +message TDiskAgentThrottlingConfig +{ + // Host limits. + optional string InfraThrottlingConfigPath = 1; + optional uint32 DefaultNetworkMbitThroughput = 2; + + // Fraction of network throughput utilized for migrations and shadow + // disk fill. + optional double DirectCopyBandwidthFraction = 3; + + // Maximum bandwidth for one device in MiB/s. + optional uint64 MaxDeviceBandwidthMiB = 4; +} + +//////////////////////////////////////////////////////////////////////////////// + message TDiskAgentConfig { optional bool Enabled = 1; @@ -275,6 +291,13 @@ message TDiskAgentConfig } repeated TPathToSerialNumber PathToSerialNumberMapping = 37; + + // Settings for traffic shaping. + optional TDiskAgentThrottlingConfig ThrottlingConfig = 38; + + // If enabled, IOParserActor allocates a storage buffer and copies the + // request data into it. + optional bool IOParserActorAllocateStorageEnabled = 39; } //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/blockstore/config/storage.proto b/cloud/blockstore/config/storage.proto index 2fbe1424e4c..60be1fe6137 100644 --- a/cloud/blockstore/config/storage.proto +++ b/cloud/blockstore/config/storage.proto @@ -682,7 +682,8 @@ message TStorageServiceConfig // Number of ranges to process in a single Compaction run. optional uint32 CompactionRangeCountPerRun = 264; - // Specifies whether to use CompactionRangeCountPerRun. + // Specifies whether to use CompactionRangeCountPerRun, + // GarbageCompactionRangeCountPerRun and ForcedCompactionRangeCountPerRun. optional bool BatchCompactionEnabled = 265; // Timeout before allowing infra to withdraw our unavailable agents. @@ -1089,7 +1090,27 @@ message TStorageServiceConfig // Enable buttons for device state changing, when they in error state. optional bool EnableToChangeErrorStatesFromDiskRegistryMonpage = 398; - + // Enabling UsedQuota calculation as UsedIopsQuota + UsedBandwidthQuota optional bool CalculateSplittedUsedQuotaMetric = 399; + + // Number of ranges to process in a single Compaction run + // in the garbage mode if batch compaction enabled. + optional uint32 GarbageCompactionRangeCountPerRun = 400; + + // Number of ranges to process in a single Compaction run + // in the range mode if batch compaction enabled. + optional uint32 ForcedCompactionRangeCountPerRun = 401; + + optional bool YdbViewerServiceEnabled = 402; + + // When enabled, tag "use-intermediate-write-buffer" will be added + // after scrubbing finds a mismatch + optional bool AutomaticallyEnableBufferCopyingAfterChecksumMismatch = 403; + + // Enabling direct sending AcquireDevices to disk agent. + optional bool NonReplicatedVolumeDirectAcquireEnabled = 404; + + // Timeout for TDestroyVolumeActor (in milliseconds) + optional uint32 DestroyVolumeTimeout = 405; } diff --git a/cloud/blockstore/libs/client/session_ut.cpp b/cloud/blockstore/libs/client/session_ut.cpp index 8c15e5867dd..babb7f0719a 100644 --- a/cloud/blockstore/libs/client/session_ut.cpp +++ b/cloud/blockstore/libs/client/session_ut.cpp @@ -1757,6 +1757,67 @@ Y_UNIT_TEST_SUITE(TSessionTest) // Unfreeze request handlers promise.SetValue(); } + + Y_UNIT_TEST(ShouldPassErrorFlags) + { + auto client = std::make_shared(); + + size_t sessionNum = 0; + client->MountVolumeHandler = + [&](std::shared_ptr request) + { + NProto::TMountVolumeResponse response; + response.SetSessionId(ToString(++sessionNum)); + + auto& volume = *response.MutableVolume(); + volume.SetDiskId(request->GetDiskId()); + volume.SetBlockSize(4 * 1024); + volume.SetBlocksCount(1024); + + return MakeFuture(response); + }; + + client->UnmountVolumeHandler = + [](std::shared_ptr request) + { + Y_UNUSED(request); + return MakeFuture(); + }; + + client->WriteBlocksLocalHandler = + [](std::shared_ptr request) + { + Y_UNUSED(request); + + NProto::TError result = MakeError( + E_IO_SILENT, + "IO error", + NCloud::NProto::EF_HW_PROBLEMS_DETECTED); + return MakeFuture( + TErrorResponse(std::move(result))); + }; + + auto bootstrap = CreateBootstrap(client); + + auto session = bootstrap->GetSession(); + + bootstrap->Start(); + + { + auto res = session->MountVolume().GetValueSync(); + UNIT_ASSERT_C(!HasError(res), res.GetError().GetMessage()); + } + + { + auto res = WriteBlocks(session); + UNIT_ASSERT(res.GetError().GetCode() == E_IO_SILENT); + UNIT_ASSERT( + res.GetError().GetFlags() == + NCloud::NProto::EF_HW_PROBLEMS_DETECTED); + } + + bootstrap->Stop(); + } } } // namespace NCloud::NBlockStore::NClient diff --git a/cloud/blockstore/libs/daemon/common/bootstrap.cpp b/cloud/blockstore/libs/daemon/common/bootstrap.cpp index f06be81fef5..7f86a2ddaf1 100644 --- a/cloud/blockstore/libs/daemon/common/bootstrap.cpp +++ b/cloud/blockstore/libs/daemon/common/bootstrap.cpp @@ -86,7 +86,7 @@ #include #include #include -#include +#include #include #include #include @@ -861,7 +861,7 @@ void TBootstrapBase::Start() START_KIKIMR_COMPONENT(NotifyService); START_COMMON_COMPONENT(Monitoring); START_COMMON_COMPONENT(ProfileLog); - START_KIKIMR_COMPONENT(CgroupStatsFetcher); + START_KIKIMR_COMPONENT(StatsFetcher); START_COMMON_COMPONENT(DiscoveryService); START_COMMON_COMPONENT(TraceProcessor); START_KIKIMR_COMPONENT(TraceSerializer); @@ -967,7 +967,7 @@ void TBootstrapBase::Stop() STOP_KIKIMR_COMPONENT(TraceSerializer); STOP_COMMON_COMPONENT(TraceProcessor); STOP_COMMON_COMPONENT(DiscoveryService); - STOP_KIKIMR_COMPONENT(CgroupStatsFetcher); + STOP_KIKIMR_COMPONENT(StatsFetcher); STOP_COMMON_COMPONENT(ProfileLog); STOP_COMMON_COMPONENT(Monitoring); STOP_KIKIMR_COMPONENT(LogbrokerService); diff --git a/cloud/blockstore/libs/daemon/common/bootstrap.h b/cloud/blockstore/libs/daemon/common/bootstrap.h index 79bf75c2329..8df63dfe345 100644 --- a/cloud/blockstore/libs/daemon/common/bootstrap.h +++ b/cloud/blockstore/libs/daemon/common/bootstrap.h @@ -108,7 +108,7 @@ class TBootstrapBase virtual IStartable* GetTraceSerializer() = 0; virtual IStartable* GetLogbrokerService() = 0; virtual IStartable* GetNotifyService() = 0; - virtual IStartable* GetCgroupStatsFetcher() = 0; + virtual IStartable* GetStatsFetcher() = 0; virtual IStartable* GetIamTokenClient() = 0; virtual IStartable* GetComputeClient() = 0; virtual IStartable* GetKmsClient() = 0; diff --git a/cloud/blockstore/libs/daemon/common/config_initializer.cpp b/cloud/blockstore/libs/daemon/common/config_initializer.cpp index cde897ab583..db2b58769cd 100644 --- a/cloud/blockstore/libs/daemon/common/config_initializer.cpp +++ b/cloud/blockstore/libs/daemon/common/config_initializer.cpp @@ -84,8 +84,8 @@ void TConfigInitializerCommon::InitDiskAgentConfig() DiskAgentConfig = std::make_shared( std::move(diskAgentConfig), - Rack - ); + Rack, + HostPerformanceProfile.NetworkMbitThroughput); } void TConfigInitializerCommon::InitRdmaConfig() diff --git a/cloud/blockstore/libs/daemon/local/bootstrap.h b/cloud/blockstore/libs/daemon/local/bootstrap.h index 6c1c428a825..9a3e1ab4491 100644 --- a/cloud/blockstore/libs/daemon/local/bootstrap.h +++ b/cloud/blockstore/libs/daemon/local/bootstrap.h @@ -32,7 +32,7 @@ class TBootstrapLocal final IStartable* GetTraceSerializer() override { return nullptr; } IStartable* GetLogbrokerService() override { return nullptr; } IStartable* GetNotifyService() override { return nullptr; } - IStartable* GetCgroupStatsFetcher() override { return nullptr; } + IStartable* GetStatsFetcher() override { return nullptr; } IStartable* GetIamTokenClient() override { return nullptr; } IStartable* GetComputeClient() override { return nullptr; } IStartable* GetKmsClient() override { return nullptr; } diff --git a/cloud/blockstore/libs/daemon/ydb/bootstrap.cpp b/cloud/blockstore/libs/daemon/ydb/bootstrap.cpp index 941075b4573..5d1d7dabfc5 100644 --- a/cloud/blockstore/libs/daemon/ydb/bootstrap.cpp +++ b/cloud/blockstore/libs/daemon/ydb/bootstrap.cpp @@ -51,7 +51,7 @@ #include #include #include -#include +#include #include #include #include @@ -131,7 +131,7 @@ IStartable* TBootstrapYdb::GetYdbStorage() { return YdbStorage.get(); } IStartable* TBootstrapYdb::GetTraceSerializer() { return TraceSerializer.get(); } IStartable* TBootstrapYdb::GetLogbrokerService() { return LogbrokerService.get(); } IStartable* TBootstrapYdb::GetNotifyService() { return NotifyService.get(); } -IStartable* TBootstrapYdb::GetCgroupStatsFetcher() { return CgroupStatsFetcher.get(); } +IStartable* TBootstrapYdb::GetStatsFetcher() { return StatsFetcher.get(); } IStartable* TBootstrapYdb::GetIamTokenClient() { return IamTokenClient.get(); } IStartable* TBootstrapYdb::GetComputeClient() { return ComputeClient.get(); } IStartable* TBootstrapYdb::GetKmsClient() { return KmsClient.get(); } @@ -499,11 +499,11 @@ void TBootstrapYdb::InitKikimrService() STORAGE_INFO("ProfileLog initialized"); - CgroupStatsFetcher = BuildCgroupStatsFetcher( + StatsFetcher = NCloud::NStorage::BuildStatsFetcher( + Configs->DiagnosticsConfig->GetStatsFetcherType(), Configs->DiagnosticsConfig->GetCpuWaitFilename(), Log, - logging, - "BLOCKSTORE_CGROUPS"); + logging); if (Configs->StorageConfig->GetBlockDigestsEnabled()) { if (Configs->StorageConfig->GetUseTestBlockDigestGenerator()) { @@ -553,7 +553,7 @@ void TBootstrapYdb::InitKikimrService() args.LogbrokerService = LogbrokerService; args.NotifyService = NotifyService; args.VolumeStats = VolumeStats; - args.CgroupStatsFetcher = CgroupStatsFetcher; + args.StatsFetcher = StatsFetcher; args.RdmaServer = nullptr; args.RdmaClient = RdmaClient; args.Logging = logging; diff --git a/cloud/blockstore/libs/daemon/ydb/bootstrap.h b/cloud/blockstore/libs/daemon/ydb/bootstrap.h index b87ed13bbb6..74fa0c66efb 100644 --- a/cloud/blockstore/libs/daemon/ydb/bootstrap.h +++ b/cloud/blockstore/libs/daemon/ydb/bootstrap.h @@ -87,7 +87,7 @@ struct TBootstrapYdb final ITraceSerializerPtr TraceSerializer; NLogbroker::IServicePtr LogbrokerService; NNotify::IServicePtr NotifyService; - NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher; + NCloud::NStorage::IStatsFetcherPtr StatsFetcher; NIamClient::IIamTokenClientPtr IamTokenClient; IComputeClientPtr ComputeClient; IKmsClientPtr KmsClient; @@ -115,7 +115,7 @@ struct TBootstrapYdb final IStartable* GetTraceSerializer() override; IStartable* GetLogbrokerService() override; IStartable* GetNotifyService() override; - IStartable* GetCgroupStatsFetcher() override; + IStartable* GetStatsFetcher() override; IStartable* GetIamTokenClient() override; IStartable* GetComputeClient() override; IStartable* GetKmsClient() override; diff --git a/cloud/blockstore/libs/daemon/ydb/config_initializer.cpp b/cloud/blockstore/libs/daemon/ydb/config_initializer.cpp index f6c0c40980b..b77125e2e31 100644 --- a/cloud/blockstore/libs/daemon/ydb/config_initializer.cpp +++ b/cloud/blockstore/libs/daemon/ydb/config_initializer.cpp @@ -326,7 +326,8 @@ void TConfigInitializerYdb::ApplyDiskAgentConfig(const TString& text) DiskAgentConfig = std::make_shared( std::move(config), - Rack); + Rack, + HostPerformanceProfile.NetworkMbitThroughput); } void TConfigInitializerYdb::ApplyDiskRegistryProxyConfig(const TString& text) diff --git a/cloud/blockstore/libs/diagnostics/config.cpp b/cloud/blockstore/libs/diagnostics/config.cpp index 27d7618ede6..70c0698e597 100644 --- a/cloud/blockstore/libs/diagnostics/config.cpp +++ b/cloud/blockstore/libs/diagnostics/config.cpp @@ -56,6 +56,7 @@ namespace { xxx(LocalHDDDowntimeThreshold, TDuration, TDuration::Seconds(15) )\ xxx(ReportHistogramAsMultipleCounters, bool, true )\ xxx(ReportHistogramAsSingleCounter, bool, false )\ + xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\ // BLOCKSTORE_DIAGNOSTICS_CONFIG #define BLOCKSTORE_DIAGNOSTICS_DECLARE_CONFIG(name, type, value) \ @@ -287,3 +288,12 @@ void Out( { OutRequestThresholds(out, value); } + +template <> +void Out( + IOutputStream& out, + NCloud::NProto::EStatsFetcherType statsFetcherType) +{ + out << NCloud::NProto::EStatsFetcherType_Name( + statsFetcherType); +} diff --git a/cloud/blockstore/libs/diagnostics/config.h b/cloud/blockstore/libs/diagnostics/config.h index cefd0acaf70..47616ec0328 100644 --- a/cloud/blockstore/libs/diagnostics/config.h +++ b/cloud/blockstore/libs/diagnostics/config.h @@ -159,6 +159,8 @@ class TDiagnosticsConfig TRequestThresholds GetRequestThresholds() const; EHistogramCounterOptions GetHistogramCounterOptions() const; + NCloud::NProto::EStatsFetcherType GetStatsFetcherType() const; + void Dump(IOutputStream& out) const; void DumpHtml(IOutputStream& out) const; }; diff --git a/cloud/blockstore/libs/diagnostics/profile_log.cpp b/cloud/blockstore/libs/diagnostics/profile_log.cpp index 6448dee20c8..44c053c9490 100644 --- a/cloud/blockstore/libs/diagnostics/profile_log.cpp +++ b/cloud/blockstore/libs/diagnostics/profile_log.cpp @@ -114,6 +114,8 @@ class TProfileLog final { } + ~TProfileLog() override; + public: void Start() override; void Stop() override; @@ -126,6 +128,11 @@ class TProfileLog final void DoFlush(); }; +TProfileLog::~TProfileLog() +{ + DoFlush(); +} + void TProfileLog::Start() { ScheduleFlush(); diff --git a/cloud/blockstore/libs/diagnostics/profile_log_ut.cpp b/cloud/blockstore/libs/diagnostics/profile_log_ut.cpp index adbe544a85f..83d2a157ec6 100644 --- a/cloud/blockstore/libs/diagnostics/profile_log_ut.cpp +++ b/cloud/blockstore/libs/diagnostics/profile_log_ut.cpp @@ -113,9 +113,11 @@ struct TEnv ProfileLog->Stop(); } - void ProcessLog() + void ProcessLog(bool runScheduler = true) { - Scheduler->RunAllScheduledTasks(); + if (runScheduler) { + Scheduler->RunAllScheduledTasks(); + } EventProcessor.FlatMessages.clear(); const char* argv[] = {"foo", Settings.FilePath.c_str()}; @@ -388,6 +390,26 @@ Y_UNIT_TEST_SUITE(TProfileLogTest) env.EventProcessor.FlatMessages[0] ); } + + Y_UNIT_TEST(TestFlushOnDestruct) + { + TEnv env; + env.ProfileLog->Write( + {"disk2", + TInstant::Seconds(3), + IProfileLog::TReadWriteRequest{ + EBlockStoreRequest::WriteBlocks, + TDuration::MilliSeconds(300), + TDuration::MilliSeconds(42), + TBlockRange64::WithLength(10, 20), + }}); + env.ProfileLog = CreateProfileLogStub(); + env.ProcessLog(false); + UNIT_ASSERT_VALUES_EQUAL(1, env.EventProcessor.FlatMessages.size()); + UNIT_ASSERT_VALUES_EQUAL( + "disk2\t3000000\tR\t9\t300000\t42000\t10,20", + env.EventProcessor.FlatMessages[0]); + } } } // namespace NCloud::NBlockStore diff --git a/cloud/blockstore/libs/disk_agent/bootstrap.cpp b/cloud/blockstore/libs/disk_agent/bootstrap.cpp index d2b3ca7ad1e..5c2c61a38f9 100644 --- a/cloud/blockstore/libs/disk_agent/bootstrap.cpp +++ b/cloud/blockstore/libs/disk_agent/bootstrap.cpp @@ -346,6 +346,7 @@ void TBootstrap::InitRdmaServer(NRdma::TRdmaConfig& config) bool TBootstrap::InitKikimrService() { + Configs->Log = Log; Configs->InitKikimrConfig(); Configs->InitServerConfig(); Configs->InitFeaturesConfig(); diff --git a/cloud/blockstore/libs/disk_agent/config_initializer.cpp b/cloud/blockstore/libs/disk_agent/config_initializer.cpp index f50e1d29b63..b6dd8d07b2f 100644 --- a/cloud/blockstore/libs/disk_agent/config_initializer.cpp +++ b/cloud/blockstore/libs/disk_agent/config_initializer.cpp @@ -8,12 +8,12 @@ #include #include #include - #include #include #include #include +#include #include #include @@ -23,8 +23,54 @@ namespace NCloud::NBlockStore::NServer { +namespace { + //////////////////////////////////////////////////////////////////////////////// +std::optional ReadJsonFile( + TLog& Log, + const TString& filename) +{ + if (filename.empty()) { + return std::nullopt; + } + + try { + TFileInput in(filename); + return NJson::ReadJsonTree(&in, true); + } catch (...) { + STORAGE_ERROR( + "Failed to read file: " << filename.Quote() << " with error: " + << CurrentExceptionMessage().c_str()); + return std::nullopt; + } +} + +ui32 ReadNetworkMbitThroughput( + TLog& Log, + const NProto::TDiskAgentConfig& diskAgentConfig) +{ + const auto& config = diskAgentConfig.GetThrottlingConfig(); + ui32 networkThroughput = config.GetDefaultNetworkMbitThroughput(); + + if (auto json = ReadJsonFile(Log, config.GetInfraThrottlingConfigPath())) { + try { + if (auto* value = json->GetValueByPath("interfaces.[0].eth0.speed")) + { + networkThroughput = FromString(value->GetStringSafe()); + } + } catch (...) { + STORAGE_ERROR( + "Failed to read NetworkMbitThroughput. Error: " + << CurrentExceptionMessage().c_str()); + } + } + + return networkThroughput; +} + +} // namespace + void TConfigInitializer::ApplyCMSConfigs(NKikimrConfig::TAppConfig cmsConfig) { if (cmsConfig.HasBlobStorageConfig()) { @@ -167,10 +213,11 @@ void TConfigInitializer::InitDiskAgentConfig() SetupDiskAgentConfig(diskAgentConfig); ApplySpdkEnvConfig(diskAgentConfig.GetSpdkEnvConfig()); + const ui32 networkMbitThroughput = ReadNetworkMbitThroughput(Log, diskAgentConfig); DiskAgentConfig = std::make_shared( std::move(diskAgentConfig), - Rack - ); + Rack, + networkMbitThroughput); } void TConfigInitializer::InitDiskRegistryProxyConfig() @@ -428,9 +475,11 @@ void TConfigInitializer::ApplyDiskAgentConfig(const TString& text) DiskAgentConfig->GetStorageDiscoveryConfig()); } + const ui32 networkMbitThroughput = ReadNetworkMbitThroughput(Log, config); DiskAgentConfig = std::make_shared( std::move(config), - Rack); + Rack, + networkMbitThroughput); } void TConfigInitializer::ApplyDiskRegistryProxyConfig(const TString& text) diff --git a/cloud/blockstore/libs/disk_agent/config_initializer.h b/cloud/blockstore/libs/disk_agent/config_initializer.h index e5dba610314..ba71082f702 100644 --- a/cloud/blockstore/libs/disk_agent/config_initializer.h +++ b/cloud/blockstore/libs/disk_agent/config_initializer.h @@ -12,13 +12,15 @@ #include #include #include -#include #include +#include #include #include #include +#include + namespace NCloud::NBlockStore::NServer { //////////////////////////////////////////////////////////////////////////////// @@ -38,6 +40,7 @@ struct TConfigInitializer NRdma::TRdmaConfigPtr RdmaConfig; TString Rack; + TLog Log; TConfigInitializer(TOptionsPtr options) : Options(std::move(options)) diff --git a/cloud/blockstore/libs/endpoint_proxy/server/server.cpp b/cloud/blockstore/libs/endpoint_proxy/server/server.cpp index d38aa6dcf25..648b42cf4bf 100644 --- a/cloud/blockstore/libs/endpoint_proxy/server/server.cpp +++ b/cloud/blockstore/libs/endpoint_proxy/server/server.cpp @@ -851,7 +851,7 @@ struct TServer: IEndpointProxyServer void DoProcessRequest( const NProto::TResizeProxyDeviceRequest& request, TEndpoint& ep, - NProto::TResizeProxyDeviceResponse& response) const + NProto::TResizeProxyDeviceResponse& response) { if (ep.NbdDevice) { auto err = ep.NbdDevice->Resize(request.GetDeviceSizeInBytes()) @@ -874,6 +874,9 @@ struct TServer: IEndpointProxyServer ep.NbdOptions.BlocksCount = request.GetDeviceSizeInBytes() / ep.NbdOptions.BlockSize; + + StoreEndpointIfNeeded(ep); + STORAGE_INFO( request.ShortDebugString().Quote() << " - NBD device was resized"); diff --git a/cloud/blockstore/libs/service_local/ut/ya.make b/cloud/blockstore/libs/service_local/ut/ya.make index a711812d404..36ab932aabf 100644 --- a/cloud/blockstore/libs/service_local/ut/ya.make +++ b/cloud/blockstore/libs/service_local/ut/ya.make @@ -2,8 +2,6 @@ UNITTEST_FOR(cloud/blockstore/libs/service_local) INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/medium.inc) -TIMEOUT(180) - SRCS( compound_storage_ut.cpp file_io_service_provider_ut.cpp diff --git a/cloud/blockstore/libs/storage/api/disk_registry.h b/cloud/blockstore/libs/storage/api/disk_registry.h index 2ec01040517..7f7513b90d9 100644 --- a/cloud/blockstore/libs/storage/api/disk_registry.h +++ b/cloud/blockstore/libs/storage/api/disk_registry.h @@ -53,6 +53,7 @@ namespace NCloud::NBlockStore::NStorage { xxx(GetCheckpointDataState, __VA_ARGS__) \ xxx(SetCheckpointDataState, __VA_ARGS__) \ xxx(GetAgentNodeId, __VA_ARGS__) \ + xxx(AddLaggingDevices, __VA_ARGS__) \ // BLOCKSTORE_DISK_REGISTRY_REQUESTS_PROTO // requests forwarded from service to disk_registry @@ -211,6 +212,9 @@ struct TEvDiskRegistry EvQueryAgentsInfoRequest = EvBegin + 75, EvQueryAgentsInfoResponse = EvBegin + 76, + EvAddLaggingDevicesRequest = EvBegin + 77, + EvAddLaggingDevicesResponse = EvBegin + 78, + EvEnd }; diff --git a/cloud/blockstore/libs/storage/api/partition.h b/cloud/blockstore/libs/storage/api/partition.h index a53fbae21e3..d1a11c40452 100644 --- a/cloud/blockstore/libs/storage/api/partition.h +++ b/cloud/blockstore/libs/storage/api/partition.h @@ -15,7 +15,7 @@ namespace NCloud::NBlockStore::NStorage::NPartition { xxx(Drain, __VA_ARGS__) \ // BLOCKSTORE_PARTITION_REQUESTS -// requests forwarded from service to partion +// requests forwarded from service to partition #define BLOCKSTORE_PARTITION_REQUESTS_FWD_SERVICE(xxx, ...) \ xxx(ReadBlocks, __VA_ARGS__) \ xxx(WriteBlocks, __VA_ARGS__) \ @@ -94,6 +94,34 @@ struct TEvPartition {} }; + // + // AddLaggingAgent + // + + struct TAddLaggingAgentRequest + { + // 0 - for main devices; 1,2 - for mirror replicas + ui32 ReplicaIndex; + TString AgentId; + TAddLaggingAgentRequest(ui32 replicaIndex, TString agentId) + : ReplicaIndex(replicaIndex) + , AgentId(std::move(agentId)) + {} + }; + + // + // RemoveLaggingAgent + // + + struct TRemoveLaggingReplicaRequest + { + // 0 - for main devices; 1,2 - for mirror replicas + const ui32 ReplicaIndex; + explicit TRemoveLaggingReplicaRequest(ui32 replicaIndex) + : ReplicaIndex(replicaIndex) + {} + }; + // // Events declaration // @@ -115,6 +143,9 @@ struct TEvPartition EvGarbageCollectorCompleted = EvBegin + 8, + EvAddLaggingAgentRequest = EvBegin + 9, + EvRemoveLaggingReplicaRequest = EvBegin + 10, + EvEnd }; @@ -132,6 +163,16 @@ struct TEvPartition TGarbageCollectorCompleted, EvGarbageCollectorCompleted >; + + using TEvAddLaggingAgentRequest = TRequestEvent< + TAddLaggingAgentRequest, + EvAddLaggingAgentRequest + >; + + using TEvRemoveLaggingReplicaRequest = TRequestEvent< + TRemoveLaggingReplicaRequest, + EvRemoveLaggingReplicaRequest + >; }; } // namespace NCloud::NBlockStore::NStorage::NPartition diff --git a/cloud/blockstore/libs/storage/api/public.h b/cloud/blockstore/libs/storage/api/public.h index ea53c0cd95e..b54d57410b0 100644 --- a/cloud/blockstore/libs/storage/api/public.h +++ b/cloud/blockstore/libs/storage/api/public.h @@ -6,6 +6,11 @@ namespace NCloud::NBlockStore::NStorage { +//////////////////////////////////////////////////////////////////////////////// + +constexpr TStringBuf IntermediateWriteBufferTagName = + "use-intermediate-write-buffer"; + //////////////////////////////////////////////////////////////////////////////// // BackpressureReport event descriptor diff --git a/cloud/blockstore/libs/storage/api/service.h b/cloud/blockstore/libs/storage/api/service.h index c4557506bd2..f56a9f9a4b0 100644 --- a/cloud/blockstore/libs/storage/api/service.h +++ b/cloud/blockstore/libs/storage/api/service.h @@ -19,6 +19,7 @@ namespace NCloud::NBlockStore::NStorage { xxx(ChangeVolumeBinding, __VA_ARGS__) \ xxx(GetVolumeStats, __VA_ARGS__) \ xxx(RunVolumesLivenessCheck, __VA_ARGS__) \ + xxx(AddTags, __VA_ARGS__) \ // BLOCKSTORE_SERVICE_REQUESTS //////////////////////////////////////////////////////////////////////////////// @@ -160,6 +161,28 @@ struct TEvService {} }; + // + // AddTags + // + + struct TAddTagsRequest + { + const TString DiskId; + const TVector Tags; + + TAddTagsRequest() = default; + + TAddTagsRequest( + TString diskId, + TVector tags) + : DiskId(std::move(diskId)) + , Tags(std::move(tags)) + {} + }; + + struct TAddTagsResponse + {}; + // // VolumeMountStateChanged // @@ -317,6 +340,9 @@ struct TEvService EvQueryAgentsInfoRequest = EvBegin + 89, EvQueryAgentsInfoResponse = EvBegin + 90, + EvAddTagsRequest = EvBegin + 91, + EvAddTagsResponse = EvBegin + 92, + EvEnd }; diff --git a/cloud/blockstore/libs/storage/api/volume.h b/cloud/blockstore/libs/storage/api/volume.h index 1e2e18e6fd4..515bf915ae1 100644 --- a/cloud/blockstore/libs/storage/api/volume.h +++ b/cloud/blockstore/libs/storage/api/volume.h @@ -35,6 +35,7 @@ namespace NCloud::NBlockStore::NStorage { xxx(UpdateVolumeParams, __VA_ARGS__) \ xxx(ChangeStorageConfig, __VA_ARGS__) \ xxx(GetStorageConfig, __VA_ARGS__) \ + xxx(GracefulShutdown, __VA_ARGS__) \ // BLOCKSTORE_VOLUME_REQUESTS @@ -331,6 +332,9 @@ struct TEvVolume EvGetStorageConfigRequest = EvBegin + 58, EvGetStorageConfigResponse = EvBegin + 59, + EvGracefulShutdownRequest = EvBegin + 60, + EvGracefulShutdownResponse = EvBegin + 61, + EvEnd }; diff --git a/cloud/blockstore/libs/storage/core/compaction_map.cpp b/cloud/blockstore/libs/storage/core/compaction_map.cpp index 49c246dd01d..25fded3c9b9 100644 --- a/cloud/blockstore/libs/storage/core/compaction_map.cpp +++ b/cloud/blockstore/libs/storage/core/compaction_map.cpp @@ -524,6 +524,40 @@ TVector TCompactionMap::GetNonEmptyRanges() const return result; } +TVector TCompactionMap::GetNonEmptyRanges( + ui32 blockIndex, + ui32 rangesCount) const +{ + if (!rangesCount) { + return {}; + } + + const ui32 groupStart = GetGroupStart(blockIndex, Impl->RangeSize); + auto rangeIndex = (blockIndex - groupStart) / Impl->RangeSize; + + TVector result(Reserve(rangesCount)); + + for (auto groupIt = TImpl::TGroupByBlockIndexTree::TIterator( + Impl->GroupByBlockIndex.Find(groupStart)); + groupIt != Impl->GroupByBlockIndex.End(); + ++groupIt, rangeIndex = 0) + { + const auto& group = static_cast(*groupIt); + for (; rangeIndex < group.Stats.size(); ++rangeIndex) { + if (group.Stats[rangeIndex].BlobCount > 0) { + const auto groupRangeStart = + group.BlockIndex + (rangeIndex * Impl->RangeSize); + result.emplace_back(groupRangeStart, group.Stats[rangeIndex]); + if (result.size() == rangesCount) { + return result; + } + } + } + } + + return result; +} + ui32 TCompactionMap::GetNonEmptyRangeCount() const { return Impl->GetNonEmptyRangeCount(); diff --git a/cloud/blockstore/libs/storage/core/compaction_map.h b/cloud/blockstore/libs/storage/core/compaction_map.h index 394c06a8374..9a739e65727 100644 --- a/cloud/blockstore/libs/storage/core/compaction_map.h +++ b/cloud/blockstore/libs/storage/core/compaction_map.h @@ -79,6 +79,8 @@ class TCompactionMap TVector GetTop(size_t count) const; TVector GetTopByGarbageBlockCount(size_t count) const; TVector GetNonEmptyRanges() const; + // Returns non-empty ranges, starting from the blockIndex + TVector GetNonEmptyRanges(ui32 blockIndex, ui32 rangesCount) const; ui32 GetNonEmptyRangeCount() const; ui32 GetRangeStart(ui32 blockIndex) const; ui32 GetRangeIndex(ui32 blockIndex) const; diff --git a/cloud/blockstore/libs/storage/core/compaction_map_ut.cpp b/cloud/blockstore/libs/storage/core/compaction_map_ut.cpp index ca4cc89c5b2..fae13492518 100644 --- a/cloud/blockstore/libs/storage/core/compaction_map_ut.cpp +++ b/cloud/blockstore/libs/storage/core/compaction_map_ut.cpp @@ -244,6 +244,56 @@ Y_UNIT_TEST_SUITE(TCompactionMapTest) UNIT_ASSERT_VALUES_EQUAL(false, stat.Compacted); } } + + Y_UNIT_TEST(ShouldHaveNonEmptyRanges) + { + TCompactionMap map(RangeSize, BuildDefaultCompactionPolicy(5)); + const auto blockCount = 123; + const auto usedBlockCount = 23; + for (size_t i = 1; i <= 100; ++i) { + map.Update(GetGroupIndex(i), i, blockCount, usedBlockCount, false); + } + + { + const auto nonEmptyCount = map.GetNonEmptyRanges().size(); + const auto nonEmptyRanges = map.GetNonEmptyRanges(GetGroupIndex(1), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyCount, map.GetNonEmptyRangeCount()); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyCount, 100); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges.size(), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges[0].BlockIndex, GetGroupIndex(1)); + } + + // empty range must be skipped + map.Update(GetGroupIndex(46), 0, blockCount, usedBlockCount, false); + { + const auto nonEmptyCount = map.GetNonEmptyRanges().size(); + const auto nonEmptyRanges = map.GetNonEmptyRanges(GetGroupIndex(45), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyCount, map.GetNonEmptyRangeCount()); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyCount, 99); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges.size(), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges[1].BlockIndex, GetGroupIndex(47)); + } + + // if first range is empty need to start from first non empty after it + map.Update(GetGroupIndex(45), 0, blockCount, usedBlockCount, false); + { + const auto nonEmptyRanges = map.GetNonEmptyRanges(GetGroupIndex(45), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges.size(), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges[0].BlockIndex, GetGroupIndex(47)); + } + + { + const auto nonEmptyRanges = map.GetNonEmptyRanges(GetGroupIndex(95), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges.size(), 6); + } + + map.Update(GetGroupIndex(96), 0, blockCount, usedBlockCount, false); + map.Update(GetGroupIndex(99), 0, blockCount, usedBlockCount, false); + { + const auto nonEmptyRanges = map.GetNonEmptyRanges(GetGroupIndex(95), 10); + UNIT_ASSERT_VALUES_EQUAL(nonEmptyRanges.size(), 4); + } + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/core/config.cpp b/cloud/blockstore/libs/storage/core/config.cpp index 22056244bcd..bf5ebda9508 100644 --- a/cloud/blockstore/libs/storage/core/config.cpp +++ b/cloud/blockstore/libs/storage/core/config.cpp @@ -126,6 +126,7 @@ TDuration MSeconds(ui32 value) xxx(ConfigDispatcherSettings, \ NCloud::NProto::TConfigDispatcherSettings, \ {} )\ + xxx(YdbViewerServiceEnabled, bool, false )\ // BLOCKSTORE_STORAGE_CONFIG_RO #define BLOCKSTORE_STORAGE_CONFIG_RW(xxx) \ @@ -156,10 +157,12 @@ TDuration MSeconds(ui32 value) xxx(TargetCompactionBytesPerOp, ui64, 64_KB )\ xxx(MaxSkippedBlobsDuringCompaction, ui32, 3 )\ xxx(IncrementalCompactionEnabled, bool, false )\ - xxx(CompactionRangeCountPerRun, ui32, 3 )\ xxx(CompactionCountPerRunIncreasingThreshold, ui32, 0 )\ xxx(CompactionCountPerRunDecreasingThreshold, ui32, 0 )\ + xxx(CompactionRangeCountPerRun, ui32, 3 )\ xxx(MaxCompactionRangeCountPerRun, ui32, 8 )\ + xxx(GarbageCompactionRangeCountPerRun, ui32, 1 )\ + xxx(ForcedCompactionRangeCountPerRun, ui32, 1 )\ xxx(CompactionCountPerRunChangingPeriod, TDuration, Seconds(60) )\ xxx(BatchCompactionEnabled, bool, false )\ xxx(BlobPatchingEnabled, bool, false )\ @@ -491,6 +494,7 @@ TDuration MSeconds(ui32 value) xxx(VolumeProxyCacheRetryDuration, TDuration, Seconds(15) )\ \ xxx(UseDirectCopyRange, bool, false )\ + xxx(NonReplicatedVolumeDirectAcquireEnabled, bool, false )\ xxx(MaxShadowDiskFillBandwidth, ui32, 512 )\ xxx(MaxShadowDiskFillIoDepth, ui32, 1 )\ xxx(BackgroundOperationsTotalBandwidth, ui32, 1024 )\ @@ -509,6 +513,7 @@ TDuration MSeconds(ui32 value) xxx(ScrubbingBandwidth, ui64, 20 )\ xxx(MaxScrubbingBandwidth, ui64, 50 )\ xxx(MinScrubbingBandwidth, ui64, 5 )\ + xxx(AutomaticallyEnableBufferCopyingAfterChecksumMismatch, bool, false )\ \ xxx(OptimizeVoidBuffersTransferForReadsEnabled, bool, false )\ xxx(VolumeHistoryCleanupItemCount, ui32, 100'000 )\ @@ -524,6 +529,8 @@ TDuration MSeconds(ui32 value) xxx(EnableToChangeStatesFromDiskRegistryMonpage, bool, false )\ xxx(EnableToChangeErrorStatesFromDiskRegistryMonpage, bool, false )\ xxx(CalculateSplittedUsedQuotaMetric, bool, false )\ + \ + xxx(DestroyVolumeTimeout, TDuration, Seconds(30) )\ // BLOCKSTORE_STORAGE_CONFIG_RW #define BLOCKSTORE_STORAGE_CONFIG(xxx) \ diff --git a/cloud/blockstore/libs/storage/core/config.h b/cloud/blockstore/libs/storage/core/config.h index ce6b04e382c..bf48eab3566 100644 --- a/cloud/blockstore/libs/storage/core/config.h +++ b/cloud/blockstore/libs/storage/core/config.h @@ -95,6 +95,8 @@ class TStorageConfig ui32 GetCompactionCountPerRunDecreasingThreshold() const; ui32 GetCompactionRangeCountPerRun() const; ui32 GetMaxCompactionRangeCountPerRun() const; + ui32 GetGarbageCompactionRangeCountPerRun() const; + ui32 GetForcedCompactionRangeCountPerRun() const; TDuration GetCompactionCountPerRunChangingPeriod() const; bool GetBatchCompactionEnabled() const; bool GetBlobPatchingEnabled() const; @@ -627,6 +629,12 @@ class TStorageConfig GetEnableToChangeErrorStatesFromDiskRegistryMonpage() const; [[nodiscard]] bool GetCalculateSplittedUsedQuotaMetric() const; + + bool GetYdbViewerServiceEnabled() const; + + bool GetAutomaticallyEnableBufferCopyingAfterChecksumMismatch() const; + [[nodiscard]] bool GetNonReplicatedVolumeDirectAcquireEnabled() const; + [[nodiscard]] TDuration GetDestroyVolumeTimeout() const; }; ui64 GetAllocationUnit( diff --git a/cloud/blockstore/libs/storage/core/proto_helpers.cpp b/cloud/blockstore/libs/storage/core/proto_helpers.cpp index 4f1c472d4f7..c56b9c4e04a 100644 --- a/cloud/blockstore/libs/storage/core/proto_helpers.cpp +++ b/cloud/blockstore/libs/storage/core/proto_helpers.cpp @@ -413,16 +413,22 @@ TBlockRange64 BuildRequestBlockRange( TBlockRange64 BuildRequestBlockRange( const TEvDiskAgent::TEvWriteDeviceBlocksRequest& request) +{ + return BuildRequestBlockRange(request.Record); +} + +TBlockRange64 BuildRequestBlockRange( + const NProto::TWriteDeviceBlocksRequest& request) { ui64 totalSize = 0; - for (const auto& buffer: request.Record.GetBlocks().GetBuffers()) { + for (const auto& buffer: request.GetBlocks().GetBuffers()) { totalSize += buffer.length(); } - Y_ABORT_UNLESS(totalSize % request.Record.GetBlockSize() == 0); + Y_ABORT_UNLESS(totalSize % request.GetBlockSize() == 0); return TBlockRange64::WithLength( - request.Record.GetStartIndex(), - totalSize / request.Record.GetBlockSize()); + request.GetStartIndex(), + totalSize / request.GetBlockSize()); } TBlockRange64 BuildRequestBlockRange( @@ -436,7 +442,12 @@ TBlockRange64 BuildRequestBlockRange( ui64 GetVolumeRequestId( const TEvDiskAgent::TEvWriteDeviceBlocksRequest& request) { - return request.Record.GetVolumeRequestId(); + return GetVolumeRequestId(request.Record); +} + +ui64 GetVolumeRequestId(const NProto::TWriteDeviceBlocksRequest& request) +{ + return request.GetVolumeRequestId(); } ui64 GetVolumeRequestId(const TEvDiskAgent::TEvZeroDeviceBlocksRequest& request) @@ -444,4 +455,15 @@ ui64 GetVolumeRequestId(const TEvDiskAgent::TEvZeroDeviceBlocksRequest& request) return request.Record.GetVolumeRequestId(); } +TString LogDevices(const TVector& devices) +{ + TStringBuilder sb; + sb << "( "; + for (const auto& d: devices) { + sb << d.GetDeviceUUID() << "@" << d.GetAgentId() << " "; + } + sb << ")"; + return sb; +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/core/proto_helpers.h b/cloud/blockstore/libs/storage/core/proto_helpers.h index 0fbecf7cbc8..86c4f5f5e58 100644 --- a/cloud/blockstore/libs/storage/core/proto_helpers.h +++ b/cloud/blockstore/libs/storage/core/proto_helpers.h @@ -217,10 +217,15 @@ TBlockRange64 BuildRequestBlockRange( TBlockRange64 BuildRequestBlockRange( const TEvDiskAgent::TEvWriteDeviceBlocksRequest& request); +TBlockRange64 BuildRequestBlockRange( + const NProto::TWriteDeviceBlocksRequest& request); + TBlockRange64 BuildRequestBlockRange( const TEvDiskAgent::TEvZeroDeviceBlocksRequest& request); ui64 GetVolumeRequestId(const TEvDiskAgent::TEvWriteDeviceBlocksRequest& request); +ui64 GetVolumeRequestId(const NProto::TWriteDeviceBlocksRequest& request); ui64 GetVolumeRequestId(const TEvDiskAgent::TEvZeroDeviceBlocksRequest& request); +TString LogDevices(const TVector& devices); } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.cpp b/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.cpp index ec5261af0c7..05ae155e7a0 100644 --- a/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.cpp @@ -31,10 +31,12 @@ auto PrepareRequest( TDirectCopyActor::TDirectCopyActor( const TActorId& source, TRequestInfoPtr requestInfo, - NProto::TDirectCopyBlocksRequest request) + NProto::TDirectCopyBlocksRequest request, + ui64 recommendedBandwidth) : Source(source) , RequestInfo(std::move(requestInfo)) , Request(std::move(request)) + , RecommendedBandwidth(recommendedBandwidth) {} void TDirectCopyActor::Bootstrap(const TActorContext& ctx) @@ -88,6 +90,7 @@ void TDirectCopyActor::Done(const NActors::TActorContext& ctx) response->Record.SetAllZeroes(AllZeroes); response->Record.SetReadDuration(readDuration.MicroSeconds()); response->Record.SetWriteDuration(writeDuration.MicroSeconds()); + response->Record.SetRecommendedBandwidth(RecommendedBandwidth); NCloud::Reply(ctx, *RequestInfo, std::move(response)); diff --git a/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.h b/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.h index 6ab322554d5..88a7f5ea0eb 100644 --- a/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.h +++ b/cloud/blockstore/libs/storage/disk_agent/actors/direct_copy_actor.h @@ -20,6 +20,7 @@ class TDirectCopyActor final const NActors::TActorId Source; const TRequestInfoPtr RequestInfo; const NProto::TDirectCopyBlocksRequest Request; + const ui64 RecommendedBandwidth; bool AllZeroes = false; TInstant ReadStartAt; @@ -29,7 +30,8 @@ class TDirectCopyActor final TDirectCopyActor( const NActors::TActorId& source, TRequestInfoPtr requestInfo, - NProto::TDirectCopyBlocksRequest request); + NProto::TDirectCopyBlocksRequest request, + ui64 recommendedBandwidth); void Bootstrap(const NActors::TActorContext& ctx); diff --git a/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.cpp b/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.cpp index dba36fbf162..77e467187fa 100644 --- a/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.cpp @@ -21,23 +21,25 @@ class TIORequestParserActor: public TActor { private: const TActorId Owner; + TStorageBufferAllocator Allocator; public: - explicit TIORequestParserActor(const TActorId& owner) + TIORequestParserActor( + const TActorId& owner, + TStorageBufferAllocator allocator) : TActor(&TIORequestParserActor::StateWork) , Owner(owner) + , Allocator(std::move(allocator)) {} private: STFUNC(StateWork) { switch (ev->GetTypeRewrite()) { - HFunc(NActors::TEvents::TEvPoisonPill, HandlePoisonPill); + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); case TEvDiskAgent::EvWriteDeviceBlocksRequest: - HandleRequest( - ev, - TEvDiskAgentPrivate::EvParsedWriteDeviceBlocksRequest); + HandleWriteDeviceBlocks(ev); break; case TEvDiskAgent::EvReadDeviceBlocksRequest: @@ -69,6 +71,48 @@ class TIORequestParserActor: public TActor Die(ctx); } + void HandleWriteDeviceBlocks(TAutoPtr& ev) + { + auto request = std::make_unique< + TEvDiskAgentPrivate::TEvParsedWriteDeviceBlocksRequest>(); + + // parse protobuf + auto* msg = ev->Get(); + request->Record.Swap(&msg->Record); + + if (Allocator) { + const auto& buffers = request->Record.GetBlocks().GetBuffers(); + + ui64 bytesCount = 0; + for (const auto& buffer: buffers) { + bytesCount += buffer.size(); + } + + request->Storage = Allocator(bytesCount); + request->StorageSize = bytesCount; + + char* dst = request->Storage.get(); + for (const auto& buffer: buffers) { + std::memcpy(dst, buffer.data(), buffer.size()); + dst += buffer.size(); + } + request->Record.ClearBlocks(); + } + + auto newEv = std::make_unique( + ev->Recipient, + ev->Sender, + request.release(), + ev->Flags, + ev->Cookie, + nullptr, // forwardOnNondelivery + std::move(ev->TraceId)); + + newEv->Rewrite(newEv->Type, Owner); + + ActorContext().Send(std::move(newEv)); + } + template void HandleRequest(TAutoPtr& ev, ui32 typeRewrite) { @@ -85,9 +129,11 @@ class TIORequestParserActor: public TActor //////////////////////////////////////////////////////////////////////////////// -std::unique_ptr CreateIORequestParserActor(const TActorId& owner) +std::unique_ptr CreateIORequestParserActor( + const TActorId& owner, + TStorageBufferAllocator allocator) { - return std::make_unique(owner); + return std::make_unique(owner, std::move(allocator)); } } // namespace NCloud::NBlockStore::NStorage::NDiskAgent diff --git a/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.h b/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.h index 1f674e4a4a2..9485c0abd44 100644 --- a/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.h +++ b/cloud/blockstore/libs/storage/disk_agent/actors/io_request_parser.h @@ -2,13 +2,18 @@ #include +#include #include namespace NCloud::NBlockStore::NStorage::NDiskAgent { //////////////////////////////////////////////////////////////////////////////// +using TStorageBufferAllocator = + std::function(ui64 bytesCount)>; + std::unique_ptr CreateIORequestParserActor( - const NActors::TActorId& owner); + const NActors::TActorId& owner, + TStorageBufferAllocator allocator); } // namespace NCloud::NBlockStore::NStorage::NDiskAgent diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.cpp index c10c5de69c3..9feac6ccbc8 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.cpp @@ -372,13 +372,9 @@ STFUNC(TDiskAgentActor::StateWork) TEvDiskAgent::TEvDisableConcreteAgentRequest, HandleDisableConcreteAgent); - case TEvDiskAgentPrivate::EvParsedWriteDeviceBlocksRequest: - HandleWriteDeviceBlocks( - *reinterpret_cast< - typename TEvDiskAgent::TEvWriteDeviceBlocksRequest::TPtr*>( - &ev), - ActorContext()); - break; + HFunc( + TEvDiskAgentPrivate::TEvParsedWriteDeviceBlocksRequest, + HandleParsedWriteDeviceBlocks); case TEvDiskAgentPrivate::EvParsedReadDeviceBlocksRequest: HandleReadDeviceBlocks( diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.h b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.h index 53681323a1f..768c5abce1d 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.h +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -71,6 +72,8 @@ class TDiskAgentActor final // Pending WaitReady requests TDeque PendingRequests; + TBandwidthCalculator BandwidthCalculator {*AgentConfig}; + ERegistrationState RegistrationState = ERegistrationState::NotStarted; NActors::TActorId StatsActor; @@ -134,10 +137,10 @@ class TDiskAgentActor final void SendRegisterRequest(const NActors::TActorContext& ctx); - template + template void PerformIO( const NActors::TActorContext& ctx, - const typename TMethod::TRequest::TPtr& ev, + const TEv& ev, TOp operation); template @@ -222,6 +225,10 @@ class TDiskAgentActor final const TEvDiskAgentPrivate::TEvCancelSuspensionRequest::TPtr& ev, const NActors::TActorContext& ctx); + void HandleParsedWriteDeviceBlocks( + const TEvDiskAgentPrivate::TEvParsedWriteDeviceBlocksRequest::TPtr& ev, + const NActors::TActorContext& ctx); + bool HandleRequests(STFUNC_SIG); bool RejectRequests(STFUNC_SIG); diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_direct_copy.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_direct_copy.cpp index aa8ed22f154..f680bf5e2dc 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_direct_copy.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_direct_copy.cpp @@ -29,11 +29,16 @@ void TDiskAgentActor::HandleDirectCopyBlocks( record.GetBlockCount())) .c_str()); + ui64 recommendedBandwidth = BandwidthCalculator.RegisterRequest( + record.GetSourceDeviceUUID(), + ctx.Now()); + NCloud::Register( ctx, SelfId(), CreateRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), - std::move(record)); + std::move(record), + recommendedBandwidth); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_init.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_init.cpp index 1f30059eece..50963f521fb 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_init.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_init.cpp @@ -142,9 +142,25 @@ void TDiskAgentActor::HandleInitAgentCompleted( ctx, TBlockStoreComponents::DISK_AGENT, "Create " << count << " IORequestParserActor actors"); + + NDiskAgent::TStorageBufferAllocator allocator; + if (AgentConfig->GetIOParserActorAllocateStorageEnabled() && + AgentConfig->GetBackend() == NProto::DISK_AGENT_BACKEND_AIO) + { + allocator = [](ui64 byteCount) + { + return std::shared_ptr( + static_cast( + std::aligned_alloc(DefaultBlockSize, byteCount)), + std::free); + }; + } + IOParserActors.reserve(count); for (ui32 i = 0; i != count; ++i) { - auto actor = NDiskAgent::CreateIORequestParserActor(ctx.SelfID); + auto actor = + NDiskAgent::CreateIORequestParserActor(ctx.SelfID, allocator); + IOParserActors.push_back(ctx.Register( actor.release(), TMailboxType::TinyReadAsFilled, diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_io.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_io.cpp index d5f55290426..3b7f782d443 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_io.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_io.cpp @@ -17,6 +17,28 @@ namespace { //////////////////////////////////////////////////////////////////////////////// +ui64 GetVolumeRequestId( + const TEvDiskAgentPrivate::TParsedWriteDeviceBlocksRequest& request) +{ + return NStorage::GetVolumeRequestId(request.Record); +} + +TBlockRange64 BuildRequestBlockRange( + const TEvDiskAgentPrivate::TParsedWriteDeviceBlocksRequest& request) +{ + if (!request.StorageSize) { + return NStorage::BuildRequestBlockRange(request.Record); + } + + Y_ABORT_UNLESS(request.StorageSize % request.Record.GetBlockSize() == 0); + + return TBlockRange64::WithLength( + request.Record.GetStartIndex(), + request.StorageSize / request.Record.GetBlockSize()); +} + +//////////////////////////////////////////////////////////////////////////////// + template constexpr bool IsWriteDeviceMethod = std::is_same_v || @@ -113,10 +135,10 @@ std::pair HandleException( //////////////////////////////////////////////////////////////////////////////// -template +template void TDiskAgentActor::PerformIO( const TActorContext& ctx, - const typename TMethod::TRequest::TPtr& ev, + const TEv& ev, TOp operation) { auto* msg = ev->Get(); @@ -327,6 +349,58 @@ void TDiskAgentActor::HandleWriteDeviceBlocks( PerformIO(ctx, ev, &TDiskAgentState::Write); } +void TDiskAgentActor::HandleParsedWriteDeviceBlocks( + const TEvDiskAgentPrivate::TEvParsedWriteDeviceBlocksRequest::TPtr& ev, + const TActorContext& ctx) +{ + BLOCKSTORE_DISK_AGENT_COUNTER(WriteDeviceBlocks); + + using TMethod = TEvDiskAgent::TWriteDeviceBlocksMethod; + + if (CheckIntersection(ctx, ev)) { + return; + } + + auto* msg = ev->Get(); + + if (!msg->Storage) { + PerformIO(ctx, ev, &TDiskAgentState::Write); + return; + } + + // Attach storage to NProto::TWriteBlocksRequest + struct TWriteBlocksRequestWithStorage + : NProto::TWriteBlocksRequest + { + TStorageBuffer Storage; + }; + + PerformIO( + ctx, + ev, + [storage = std::move(msg->Storage), storageSize = msg->StorageSize]( + TDiskAgentState& self, + TInstant now, + NProto::TWriteDeviceBlocksRequest request) mutable + { + auto writeRequest = + std::make_shared(); + writeRequest->MutableHeaders()->Swap(request.MutableHeaders()); + writeRequest->MutableBlocks()->Swap(request.MutableBlocks()); + writeRequest->SetStartIndex(request.GetStartIndex()); + writeRequest->Storage = std::move(storage); + + TStringBuf buffer{writeRequest->Storage.get(), storageSize}; + + return self.WriteBlocks( + now, + request.GetDeviceUUID(), + std::move(writeRequest), + request.GetBlockSize(), + buffer); + }); +} + void TDiskAgentActor::HandleZeroDeviceBlocks( const TEvDiskAgent::TEvZeroDeviceBlocksRequest::TPtr& ev, const TActorContext& ctx) diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_ut.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_ut.cpp index 8eab7729019..10a2def21e4 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_actor_ut.cpp @@ -349,6 +349,10 @@ struct TCopyRangeFixture: public NUnitTest::TBaseFixture config.SetIOParserActorCount(0); config.SetBackend(NProto::DISK_AGENT_BACKEND_AIO); + auto* throttling = config.MutableThrottlingConfig(); + throttling->SetDirectCopyBandwidthFraction(0.5); + throttling->SetDefaultNetworkMbitThroughput(800); + for (const auto& memDevice: config.GetMemoryDevices()) { *config.AddMemoryDevices() = PrepareMemoryDevice( memDevice.GetDeviceId(), @@ -4843,6 +4847,7 @@ Y_UNIT_TEST_SUITE(TDiskAgentTest) auto config = DiskAgentConfig({deviceId}); config.SetIOParserActorCount(4); config.SetOffloadAllIORequestsParsingEnabled(true); + config.SetIOParserActorAllocateStorageEnabled(true); return config; }(); @@ -5665,6 +5670,60 @@ Y_UNIT_TEST_SUITE(TDiskAgentTest) .MilliSeconds(), 10); } + + Y_UNIT_TEST_F(ShouldPerformDirectCopyAndCalcBandwidth, TCopyRangeFixture) + { + { // The first disk agent has network bandwidth config. It should + // recommend the bandwidth. + auto request = + std::make_unique(); + request->Record.MutableHeaders()->SetClientId(SourceClientId); + + request->Record.SetSourceDeviceUUID("DA1-1"); + request->Record.SetSourceStartIndex(SourceStartIndex); + request->Record.SetBlockSize(BlockSize); + request->Record.SetBlockCount(BlockCount); + + request->Record.SetTargetNodeId(Runtime->GetNodeId(1)); + request->Record.SetTargetClientId(TargetClientId); + request->Record.SetTargetDeviceUUID("DA2-1"); + request->Record.SetTargetStartIndex(TargetStartIndex); + + DiskAgent1->SendRequest(std::move(request)); + auto response = + DiskAgent1 + ->RecvResponse(); + UNIT_ASSERT(!HasError(response->GetError())); + UNIT_ASSERT_VALUES_EQUAL( + 50_MB, + response->Record.GetRecommendedBandwidth()); + } + { // The second disk agent has no network bandwidth configuration. It + // shouldn't recommend the bandwidth. + auto request = + std::make_unique(); + request->Record.MutableHeaders()->SetClientId(TargetClientId); + + request->Record.SetSourceDeviceUUID("DA2-2"); + request->Record.SetSourceStartIndex(SourceStartIndex); + request->Record.SetBlockSize(BlockSize); + request->Record.SetBlockCount(BlockCount); + + request->Record.SetTargetNodeId(Runtime->GetNodeId(0)); + request->Record.SetTargetClientId(ClientId); + request->Record.SetTargetDeviceUUID("DA1-1"); + request->Record.SetTargetStartIndex(TargetStartIndex); + + DiskAgent2->SendRequest(std::move(request)); + auto response = + DiskAgent2 + ->RecvResponse(); + UNIT_ASSERT(!HasError(response->GetError())); + UNIT_ASSERT_VALUES_EQUAL( + 0, + response->Record.GetRecommendedBandwidth()); + } + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_private.h b/cloud/blockstore/libs/storage/disk_agent/disk_agent_private.h index 94cfa9f17d0..034a110abfc 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_private.h +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_private.h @@ -157,6 +157,17 @@ struct TEvDiskAgentPrivate struct TCancelSuspensionRequest {}; + // + // ParsedWriteDeviceBlocksRequest + // + + struct TParsedWriteDeviceBlocksRequest + { + NProto::TWriteDeviceBlocksRequest Record; + TStorageBuffer Storage; + ui64 StorageSize = 0; + }; + // // Events declaration // @@ -207,6 +218,10 @@ struct TEvDiskAgentPrivate TCancelSuspensionRequest, EvCancelSuspensionRequest>; + using TEvParsedWriteDeviceBlocksRequest = TRequestEvent< + TParsedWriteDeviceBlocksRequest, + EvParsedWriteDeviceBlocksRequest>; + BLOCKSTORE_DECLARE_EVENTS(UpdateSessionCache) }; diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.cpp index 0c955b57742..44d79e8be6d 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.cpp @@ -645,35 +645,49 @@ TFuture TDiskAgentState::Write( TInstant now, NProto::TWriteDeviceBlocksRequest request) { - CheckIfDeviceIsDisabled( - request.GetDeviceUUID(), - request.GetHeaders().GetClientId()); - - const auto& device = GetDeviceState( - request.GetDeviceUUID(), - request.GetHeaders().GetClientId(), - NProto::VOLUME_ACCESS_READ_WRITE); - auto writeRequest = std::make_shared(); writeRequest->MutableHeaders()->CopyFrom(request.GetHeaders()); writeRequest->SetStartIndex(request.GetStartIndex()); writeRequest->MutableBlocks()->Swap(request.MutableBlocks()); - WriteProfileLog( + return WriteBlocks( now, request.GetDeviceUUID(), - *writeRequest, + std::move(writeRequest), request.GetBlockSize(), - ESysRequestType::WriteDeviceBlocks + {} // buffer ); +} + +TFuture TDiskAgentState::WriteBlocks( + TInstant now, + const TString& deviceUUID, + std::shared_ptr request, + ui32 blockSize, + TStringBuf buffer) +{ + CheckIfDeviceIsDisabled( + deviceUUID, + request->GetHeaders().GetClientId()); + + const auto& device = GetDeviceState( + deviceUUID, + request->GetHeaders().GetClientId(), + NProto::VOLUME_ACCESS_READ_WRITE); + + WriteProfileLog( + now, + deviceUUID, + *request, + blockSize, + ESysRequestType::WriteDeviceBlocks); auto result = device.StorageAdapter->WriteBlocks( now, MakeIntrusive(), - std::move(writeRequest), - request.GetBlockSize(), - {} // no data buffer - ); + std::move(request), + blockSize, + buffer); return result.Apply( [] (const auto& future) { diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.h b/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.h index e36ce10810b..8937d5538ed 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.h +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_state.h @@ -103,6 +103,13 @@ class TDiskAgentState TInstant now, NProto::TWriteDeviceBlocksRequest request); + NThreading::TFuture WriteBlocks( + TInstant now, + const TString& deviceUUID, + std::shared_ptr request, + ui32 blockSize, + TStringBuf buffer); + NThreading::TFuture WriteZeroes( TInstant now, NProto::TZeroDeviceBlocksRequest request); diff --git a/cloud/blockstore/libs/storage/disk_agent/disk_agent_state_ut.cpp b/cloud/blockstore/libs/storage/disk_agent/disk_agent_state_ut.cpp index 8d3f394294e..6d10fd691e5 100644 --- a/cloud/blockstore/libs/storage/disk_agent/disk_agent_state_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/disk_agent_state_ut.cpp @@ -114,7 +114,7 @@ auto CreateSpdkConfig() device.SetDeviceId("uuid-" + ToString(i + 1)); } - return std::make_shared(std::move(config), "rack"); + return std::make_shared(std::move(config), "rack", 1000); } struct TNullConfigParams @@ -150,7 +150,7 @@ auto CreateNullConfig(TNullConfigParams params) config.SetCachedConfigPath(std::move(params.CachedConfigPath)); config.SetCachedSessionsPath(std::move(params.CachedSessionsPath)); - return std::make_shared(std::move(config), "rack"); + return std::make_shared(std::move(config), "rack", 1000); } TStorageConfigPtr CreateStorageConfig() @@ -736,7 +736,7 @@ Y_UNIT_TEST_SUITE(TDiskAgentStateTest) size_t errors, bool checkLockedDevices) { - auto config = std::make_shared(cfg, "rack"); + auto config = std::make_shared(cfg, "rack", 1000); TDiskAgentState state( CreateStorageConfig(), @@ -812,7 +812,7 @@ Y_UNIT_TEST_SUITE(TDiskAgentStateTest) TDiskAgentState state( CreateStorageConfig(), - std::make_shared(std::move(config), "rack"), + std::make_shared(std::move(config), "rack", 1000), nullptr, // spdk CreateTestAllocator(), NServer::CreateNullStorageProvider(), diff --git a/cloud/blockstore/libs/storage/disk_agent/hash_table_storage.cpp b/cloud/blockstore/libs/storage/disk_agent/hash_table_storage.cpp index 7cf9cc2e135..7500f974a9c 100644 --- a/cloud/blockstore/libs/storage/disk_agent/hash_table_storage.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/hash_table_storage.cpp @@ -109,7 +109,7 @@ struct THashTableStorage final return MakeFuture(std::move(response)); } - auto sglist = guard.Get(); + const auto& sglist = guard.Get(); auto b = request->GetStartIndex(); auto e = request->GetStartIndex() + request->BlocksCount; @@ -120,11 +120,15 @@ struct THashTableStorage final return MakeFuture(std::move(response)); } - while (b < e) { - Blocks[b] = sglist[b - request->GetStartIndex()].AsStringBuf(); + TSgList dst(request->BlocksCount); + while (b < e) { + auto& block = Blocks[b]; + block.resize(request->BlockSize); + dst[b - request->GetStartIndex()] = {block.data(), block.size()}; ++b; } + SgListCopy(sglist, dst); return MakeFuture(std::move(response)); } diff --git a/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator.cpp b/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator.cpp new file mode 100644 index 00000000000..ab8edff7e81 --- /dev/null +++ b/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator.cpp @@ -0,0 +1,63 @@ +#include "bandwidth_calculator.h" + +#include + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +constexpr auto WindowDuration = TDuration::Seconds(1); + +ui64 GetNetworkBandwidth(const TDiskAgentConfig& config) +{ + const ui64 networkMbitThroughput = + config.GetNetworkMbitThroughput() + ? config.GetNetworkMbitThroughput() + : config.GetThrottlerConfig().GetDefaultNetworkMbitThroughput(); + return (networkMbitThroughput * 1_MB / 8) * + config.GetThrottlerConfig().GetDirectCopyBandwidthFraction(); +} + +} // namespace + +TBandwidthCalculator::TBandwidthCalculator(const TDiskAgentConfig& config) + : MaxTotalBandwidth(GetNetworkBandwidth(config)) + , MaxDeviceBandwidth( + config.GetThrottlerConfig().GetMaxDeviceBandwidthMiB() * 1_MB) +{} + +TBandwidthCalculator::~TBandwidthCalculator() = default; + +ui64 TBandwidthCalculator::RegisterRequest( + const TString& deviceUUID, + TInstant now) +{ + DeviceLastRequest[deviceUUID] = now; + ClearHistory(now - WindowDuration); + return GetRecommendedBandwidth(); +} + +void TBandwidthCalculator::ClearHistory(TInstant deadline) +{ + for (auto it = DeviceLastRequest.begin(); it != DeviceLastRequest.end();) { + if (it->second < deadline) { + DeviceLastRequest.erase(it++); + } else { + ++it; + } + } +} + +ui64 TBandwidthCalculator::GetRecommendedBandwidth() const +{ + Y_ABORT_UNLESS(DeviceLastRequest.size()); + ui64 result = MaxTotalBandwidth / DeviceLastRequest.size(); + if (MaxDeviceBandwidth) { + result = Min(MaxDeviceBandwidth, result); + } + return result; +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator.h b/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator.h new file mode 100644 index 00000000000..c2c084f48b4 --- /dev/null +++ b/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator.h @@ -0,0 +1,32 @@ +#pragma once + +#include + +#include +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +class TBandwidthCalculator +{ + const ui64 MaxTotalBandwidth; + const ui64 MaxDeviceBandwidth; + + THashMap DeviceLastRequest; + +public: + explicit TBandwidthCalculator(const TDiskAgentConfig& config); + ~TBandwidthCalculator(); + + // Returns the recommended bandwidth for the next request. + ui64 RegisterRequest(const TString& deviceUUID, TInstant now); + +private: + void ClearHistory(TInstant deadline); + [[nodiscard]] ui64 GetRecommendedBandwidth() const; +}; + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator_ut.cpp b/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator_ut.cpp new file mode 100644 index 00000000000..a34439e1899 --- /dev/null +++ b/cloud/blockstore/libs/storage/disk_agent/model/bandwidth_calculator_ut.cpp @@ -0,0 +1,76 @@ +#include "bandwidth_calculator.h" + +#include "public.h" + +#include +#include + +#include + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(TBandwidthCalculatorTest) +{ + Y_UNIT_TEST(ShouldUseDefaultBandwidth) + { + NProto::TDiskAgentConfig proto; + auto* throttling = proto.MutableThrottlingConfig(); + throttling->SetDefaultNetworkMbitThroughput(384); // 48 MiB + throttling->SetDirectCopyBandwidthFraction(0.5); // 24 MiB + throttling->SetMaxDeviceBandwidthMiB(20); // 20 MiB + + TDiskAgentConfig config(std::move(proto), "rack", 0); + TBandwidthCalculator bandwidthCalculator(config); + + TInstant now = TInstant::Now(); + // One client is limited to the MaxDeviceBandwidth + ui64 bandwidth = bandwidthCalculator.RegisterRequest("device_1", now); + UNIT_ASSERT_VALUES_EQUAL(20_MB, bandwidth); + + // Two clients receive half of the bandwidth each + bandwidth = bandwidthCalculator.RegisterRequest("device_2", now); + UNIT_ASSERT_VALUES_EQUAL(12_MB, bandwidth); + bandwidth = bandwidthCalculator.RegisterRequest("device_1", now); + UNIT_ASSERT_VALUES_EQUAL(12_MB, bandwidth); + + // Three clients receive a third of the bandwidth each + bandwidth = bandwidthCalculator.RegisterRequest("device_3", now); + UNIT_ASSERT_VALUES_EQUAL(8_MB, bandwidth); + bandwidth = bandwidthCalculator.RegisterRequest("device_2", now); + UNIT_ASSERT_VALUES_EQUAL(8_MB, bandwidth); + bandwidth = bandwidthCalculator.RegisterRequest("device_1", now); + UNIT_ASSERT_VALUES_EQUAL(8_MB, bandwidth); + + // After a second, we assume that the clients have left + now = now + TDuration::MilliSeconds(1001); + bandwidth = bandwidthCalculator.RegisterRequest("device_1", now); + UNIT_ASSERT_VALUES_EQUAL(20_MB, bandwidth); + } + + Y_UNIT_TEST(ShouldUseInfraNetworkBandwidth) + { + NProto::TDiskAgentConfig proto; + auto* throttling = proto.MutableThrottlingConfig(); + throttling->SetDefaultNetworkMbitThroughput(100); // 12.5 MiB + throttling->SetDirectCopyBandwidthFraction(0.6); + + TDiskAgentConfig config( + std::move(proto), + "rack", + 800 // 100 MiB + ); + TBandwidthCalculator bandwidthCalculator(config); + + // One client got all the bandwidth + UNIT_ASSERT_VALUES_EQUAL( + 60_MB, + bandwidthCalculator.RegisterRequest("device_1", TInstant::Now())); + UNIT_ASSERT_VALUES_EQUAL( + 60_MB, + bandwidthCalculator.RegisterRequest("device_1", TInstant::Now())); + } +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_agent/model/config.cpp b/cloud/blockstore/libs/storage/disk_agent/model/config.cpp index 270a9516cca..bf7c665bd1c 100644 --- a/cloud/blockstore/libs/storage/disk_agent/model/config.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/model/config.cpp @@ -49,6 +49,7 @@ namespace { xxx(MaxAIOContextEvents, ui32, 1024 )\ xxx(PathsPerFileIOService, ui32, 0 )\ xxx(DisableBrokenDevices, bool, 0 )\ + xxx(IOParserActorAllocateStorageEnabled, bool, 0 )\ // BLOCKSTORE_AGENT_CONFIG #define BLOCKSTORE_DECLARE_CONFIG(name, type, value) \ diff --git a/cloud/blockstore/libs/storage/disk_agent/model/config.h b/cloud/blockstore/libs/storage/disk_agent/model/config.h index f7b6b224ddf..01cd5c69b3e 100644 --- a/cloud/blockstore/libs/storage/disk_agent/model/config.h +++ b/cloud/blockstore/libs/storage/disk_agent/model/config.h @@ -19,15 +19,18 @@ class TDiskAgentConfig private: NProto::TDiskAgentConfig Config; TString Rack; + ui32 NetworkMbitThroughput = 0; public: TDiskAgentConfig() = default; TDiskAgentConfig( NProto::TDiskAgentConfig config, - TString rack) + TString rack, + ui32 networkMbitThroughput) : Config(std::move(config)) , Rack(std::move(rack)) + , NetworkMbitThroughput(networkMbitThroughput) {} bool GetEnabled() const; @@ -105,6 +108,7 @@ class TDiskAgentConfig ui32 GetIOParserActorCount() const; bool GetOffloadAllIORequestsParsingEnabled() const; + bool GetIOParserActorAllocateStorageEnabled() const; bool GetDisableNodeBrokerRegistrationOnDevicelessAgent() const; ui32 GetMaxAIOContextEvents() const; ui32 GetPathsPerFileIOService() const; @@ -120,6 +124,16 @@ class TDiskAgentConfig return Config.GetPathToSerialNumberMapping(); } + const NProto::TDiskAgentThrottlingConfig& GetThrottlerConfig() const + { + return Config.GetThrottlingConfig(); + } + + ui32 GetNetworkMbitThroughput() const + { + return NetworkMbitThroughput; + } + void Dump(IOutputStream& out) const; void DumpHtml(IOutputStream& out) const; }; diff --git a/cloud/blockstore/libs/storage/disk_agent/model/ut/ya.make b/cloud/blockstore/libs/storage/disk_agent/model/ut/ya.make index e49a8656d3e..6d2c35d6ee4 100644 --- a/cloud/blockstore/libs/storage/disk_agent/model/ut/ya.make +++ b/cloud/blockstore/libs/storage/disk_agent/model/ut/ya.make @@ -3,10 +3,11 @@ UNITTEST_FOR(cloud/blockstore/libs/storage/disk_agent/model) INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/small.inc) SRCS( + bandwidth_calculator_ut.cpp compare_configs_ut.cpp device_client_ut.cpp - device_guard_ut.cpp device_generator_ut.cpp + device_guard_ut.cpp device_scanner_ut.cpp ) diff --git a/cloud/blockstore/libs/storage/disk_agent/model/ya.make b/cloud/blockstore/libs/storage/disk_agent/model/ya.make index b5be88fbe6d..3eab4e02310 100644 --- a/cloud/blockstore/libs/storage/disk_agent/model/ya.make +++ b/cloud/blockstore/libs/storage/disk_agent/model/ya.make @@ -3,6 +3,7 @@ LIBRARY() INCLUDE(${ARCADIA_ROOT}/cloud/storage/deny_ydb_dependency.inc) SRCS( + bandwidth_calculator.cpp compare_configs.cpp config.cpp device_client.cpp diff --git a/cloud/blockstore/libs/storage/disk_agent/spdk_initializer_ut.cpp b/cloud/blockstore/libs/storage/disk_agent/spdk_initializer_ut.cpp index c561b6d6a57..7e9f38dc475 100644 --- a/cloud/blockstore/libs/storage/disk_agent/spdk_initializer_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/spdk_initializer_ut.cpp @@ -122,9 +122,13 @@ TInitializeSpdkResult InitializeSpdkSync( NProto::TDiskAgentConfig config) { return InitializeSpdk( - std::make_shared(std::move(config), "rack"), - std::make_shared(), - ICachingAllocatorPtr()).GetValueSync(); + std::make_shared( + std::move(config), + "rack", + 25000), + std::make_shared(), + ICachingAllocatorPtr()) + .GetValueSync(); } } // namespace diff --git a/cloud/blockstore/libs/storage/disk_agent/storage_initializer_ut.cpp b/cloud/blockstore/libs/storage/disk_agent/storage_initializer_ut.cpp index 66c4b6df3e6..e8ae7b0dc2d 100644 --- a/cloud/blockstore/libs/storage/disk_agent/storage_initializer_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/storage_initializer_ut.cpp @@ -166,7 +166,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(DefaultConfig, "rack"), + std::make_shared(DefaultConfig, "rack", 1000), StorageProvider, std::make_shared(pathToSerial)); @@ -208,7 +208,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future1 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(DefaultConfig, "rack"), + std::make_shared(DefaultConfig, "rack", 1000), StorageProvider, std::make_shared(pathToSerial)); @@ -229,7 +229,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future2 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(DefaultConfig, "rack"), + std::make_shared(DefaultConfig, "rack", 1000), StorageProvider, std::make_shared(newPathToSerial)); @@ -260,7 +260,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future3 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(DefaultConfig, "rack"), + std::make_shared(DefaultConfig, "rack", 1000), StorageProvider, std::make_shared(newPathToSerial)); @@ -305,7 +305,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future1 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(DefaultConfig, "rack"), + std::make_shared(DefaultConfig, "rack", 1000), StorageProvider, std::make_shared(pathToSerial)); @@ -330,7 +330,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future2 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(newConfig, "rack"), + std::make_shared(newConfig, "rack", 1000), StorageProvider, std::make_shared(newPathToSerial)); @@ -381,7 +381,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future1 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(DefaultConfig, "rack"), + std::make_shared(DefaultConfig, "rack", 1000), StorageProvider, std::make_shared(pathToSerial)); @@ -402,7 +402,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future2 = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(newConfig, "rack"), + std::make_shared(newConfig, "rack", 1000), StorageProvider, std::make_shared(newPathToSerial)); @@ -468,7 +468,7 @@ Y_UNIT_TEST_SUITE(TInitializerTest) auto future = InitializeStorage( Logging->CreateLog("Test"), StorageConfig, - std::make_shared(staticConfig, "rack"), + std::make_shared(staticConfig, "rack", 1000), StorageProvider, std::make_shared(pathToSerial)); diff --git a/cloud/blockstore/libs/storage/disk_agent/testlib/test_env.cpp b/cloud/blockstore/libs/storage/disk_agent/testlib/test_env.cpp index 8024a2a7cc0..188c320a48e 100644 --- a/cloud/blockstore/libs/storage/disk_agent/testlib/test_env.cpp +++ b/cloud/blockstore/libs/storage/disk_agent/testlib/test_env.cpp @@ -352,8 +352,8 @@ TTestEnv TTestEnvBuilder::Build() ); auto agentConfig = std::make_shared( std::move(AgentConfigProto), - "the-rack" - ); + "the-rack", + 0); if (!Spdk && agentConfig->GetBackend() == NProto::DISK_AGENT_BACKEND_SPDK) { Spdk = NSpdk::CreateEnvStub(); @@ -402,7 +402,8 @@ TTestEnv TTestEnvBuilder::Build() config, std::make_shared( std::move(SecondAgentConfigProto), - "the-rack"), + "the-rack", + 0), nullptr, // rdmaConfig Spdk, allocator, @@ -720,6 +721,7 @@ NProto::TDiskAgentConfig CreateDefaultAgentConfig() config.SetIOParserActorCount(4); config.SetOffloadAllIORequestsParsingEnabled(true); + config.SetIOParserActorAllocateStorageEnabled(true); return config; } diff --git a/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor.cpp b/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor.cpp index eda96d8f559..df564becf8c 100644 --- a/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor.cpp @@ -233,7 +233,7 @@ TStringBuf NormalizeMirrorId(TStringBuf diskId) { bool CheckMirrorDiskId( const TSet& disksInSS, - const TVector disksInBackup, + const TVector& disksInBackup, const NProto::TDiskConfig& disk) { const TString& diskId = disk.GetDiskId(); @@ -532,12 +532,21 @@ void TRestoreValidationActor::HandleListVolumesResponse( ValidSnapshot.DisksToCleanup, NormalizeMirrorId(itr->GetDiskId()))) { - LOG_WARN_S( - ctx, - Component, - RESTORE_PREFIX - << " DiskID " << itr->GetDiskId().Quote() - << " is found in backup but not in SS"); + const bool isShadowDisk = + !itr->GetCheckpointReplica().GetCheckpointId().empty(); + if (isShadowDisk) { + LOG_WARN_S( + ctx, + Component, + RESTORE_PREFIX << " ShadowDisk " << itr->GetDiskId().Quote() + << " is found in backup"); + } else { + LOG_WARN_S( + ctx, + Component, + RESTORE_PREFIX << " DiskID " << itr->GetDiskId().Quote() + << " is found in backup, but not in SS"); + } SetErrorDevicesInBackup(itr->GetDeviceUUIDs(), ctx.Now()); DisksInBackup.erase(itr->GetDiskId()); itr = ValidSnapshot.Disks.erase(itr); diff --git a/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor_ut.cpp b/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor_ut.cpp index 4b8e58c9a40..02e0d5b96b9 100644 --- a/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/actors/restore_validator_actor_ut.cpp @@ -790,6 +790,86 @@ Y_UNIT_TEST_SUITE(TRestoreValidatorActorTest) UNIT_ASSERT_EQUAL( state.Disks[4].GetDiskId(), "Disk 4/1"); } + + Y_UNIT_TEST_F(CheckSkipRestoreShadowDisk, TSetupEnvironment) + { + TDiskRegistryStateSnapshot backup; + { // Source disk + auto& diskConfig = backup.Disks.emplace_back(); + diskConfig.SetDiskId("Disk 1"); + diskConfig.SetFolderId("Folder 1"); + diskConfig.SetCloudId("Cloud 1"); + diskConfig.SetBlockSize(41); + } + { // checkpoint disk + auto& diskConfig = backup.Disks.emplace_back(); + diskConfig.SetDiskId("Disk 1-cp1"); + diskConfig.SetFolderId("Folder 1"); + diskConfig.SetCloudId("Cloud 1"); + diskConfig.SetBlockSize(41); + auto* checkpoint = diskConfig.MutableCheckpointReplica(); + checkpoint->SetSourceDiskId("Disk 1"); + checkpoint->SetCheckpointId("cp1"); + } + + auto validatorId = ActorSystem.Register( + new TRestoreValidationActor(EdgeActor, {}, 0, backup)); + + ActorSystem.GrabEdgeEvent(); + + auto volumeListResponse = + std::make_unique(); + volumeListResponse->Record.AddVolumes("Disk 1"); + + ActorSystem.Send(new NActors::IEventHandle( + validatorId, + EdgeActor, + volumeListResponse.release())); + + ActorSystem.GrabEdgeEvent(); + { + NKikimrSchemeOp::TPathDescription description; + auto* mutableVolumeConfig = + description.MutableBlockStoreVolumeDescription() + ->MutableVolumeConfig(); + mutableVolumeConfig->SetDiskId("Disk 1"); + mutableVolumeConfig->SetBlockSize(41); + mutableVolumeConfig->SetFolderId("Folder 1"); + mutableVolumeConfig->SetCloudId("Cloud 1"); + auto describeVolumeResponse = + std::make_unique( + "", + std::move(description)); + + ActorSystem.Send(new NActors::IEventHandle( + validatorId, + EdgeActor, + describeVolumeResponse.release())); + } + + { + UNIT_ASSERT_EQUAL( + ActorSystem.GrabEdgeEvent() + ->Record.GetDiskId(), + "Disk 1"); + + auto volumeInfoResponse = + std::make_unique(); + auto& volume = *volumeInfoResponse->Record.MutableVolume(); + volume.SetDiskId("Disk 1"); + + ActorSystem.Send(new NActors::IEventHandle( + validatorId, + EdgeActor, + volumeInfoResponse.release())); + } + + auto response = ActorSystem.GrabEdgeEvent< + TEvDiskRegistryPrivate::TEvRestoreDiskRegistryValidationResponse>(); + auto& state = response->LoadDBState; + UNIT_ASSERT_EQUAL(state.Disks.size(), 1); + UNIT_ASSERT_EQUAL(state.Disks[0].GetDiskId(), "Disk 1"); + } } } // namespace NDiskRegistry diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.cpp index 5f251d85cb0..f69c49e7fd4 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.cpp @@ -588,6 +588,18 @@ void TDiskRegistryActor::HandleOperationCompleted( Actors.erase(ev->Sender); } +void TDiskRegistryActor::HandleAddLaggingDevices( + const TEvDiskRegistry::TEvAddLaggingDevicesRequest::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + Y_UNUSED(ctx); + + BLOCKSTORE_DISK_REGISTRY_COUNTER(AddLaggingDevices); + + // TODO(komarevtsev-d): Implement this. +} + //////////////////////////////////////////////////////////////////////////////// STFUNC(TDiskRegistryActor::StateBoot) @@ -879,19 +891,6 @@ bool ToLogicalBlocks(NProto::TDeviceConfig& device, ui32 logicalBlockSize) //////////////////////////////////////////////////////////////////////////////// -TString LogDevices(const TVector& devices) -{ - TStringBuilder sb; - sb << "( "; - for (const auto& d: devices) { - sb << d.GetDeviceUUID() << "@" << d.GetAgentId() << " "; - } - sb << ")"; - return sb; -} - -//////////////////////////////////////////////////////////////////////////////// - void TDiskRegistryActor::OnDiskAcquired( TVector sentAcquireRequests) { @@ -943,6 +942,9 @@ void TDiskRegistryActor::SendCachedAcquireRequestsToAgent( const TActorContext& ctx, const NProto::TAgentConfig& config) { + if (Config->GetNonReplicatedVolumeDirectAcquireEnabled()) { + return; + } auto& acquireCacheByAgentId = State->GetAcquireCacheByAgentId(); auto cacheIt = acquireCacheByAgentId.find(config.GetAgentId()); if (cacheIt == acquireCacheByAgentId.end()) { diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h index 951be732909..cf04576b522 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor.h @@ -515,6 +515,4 @@ class TDiskRegistryActor final TDiskRegistryStateSnapshot MakeNewLoadState( NProto::TDiskRegistryStateBackup&& backup); bool ToLogicalBlocks(NProto::TDeviceConfig& device, ui32 logicalBlockSize); -TString LogDevices(const TVector& devices); - } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_acquire.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_acquire.cpp index f7d78f8ef35..6280e97df91 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_acquire.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_acquire.cpp @@ -1,6 +1,7 @@ #include "disk_registry_actor.h" #include +#include #include diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp index 1cb6486f6e8..4900810f621 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_monitoring.cpp @@ -713,6 +713,10 @@ void TDiskRegistryActor::RenderDiskHtmlInfo( << "Search this volume" << ""; } + DIV() { + out << "Volume kind: " + << NProto::EStorageMediaKind_Name(info.MediaKind); + } DIV() { out << "State: "; DumpDiskState(out, info.State); } DIV() { out << "State timestamp: " << info.StateTs; } if (info.MigrationStartTs) { @@ -896,7 +900,7 @@ void TDiskRegistryActor::RenderDiskHtmlInfo( TABLEH() { out << "SeqNo"; } } - for (const auto& [uuid, seqNo]: info.FinishedMigrations) { + for (const auto& [uuid, seqNo, _]: info.FinishedMigrations) { TABLER() { TABLED() { DumpDeviceLink(out, TabletID(), uuid); } TABLED() { out << seqNo; } diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_release.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_release.cpp index 2cff5a6d1a6..2ed2c92a084 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_release.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_actor_release.cpp @@ -1,4 +1,5 @@ #include "disk_registry_actor.h" +#include #include diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_database.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_database.h index 33cd368cea5..7720bff258d 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_database.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_database.h @@ -30,6 +30,10 @@ struct TFinishedMigration { TString DeviceId; ui64 SeqNo = 0; + bool IsCanceled = // by default, this value is set to true, because + // we may not start migration if this field is set to + // false + true; }; //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp index be4236d81e4..c862d68cc43 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.cpp @@ -2975,7 +2975,7 @@ auto TDiskRegistryState::DeallocateSimpleDisk( } } - for (const auto& [uuid, seqNo]: disk.FinishedMigrations) { + for (const auto& [uuid, seqNo, _]: disk.FinishedMigrations) { Y_UNUSED(seqNo); if (DeviceList.ReleaseDevice(uuid)) { @@ -3960,26 +3960,18 @@ NProto::TError TDiskRegistryState::UpdateAgentCounters( for (const auto& device: stats.GetDeviceStats()) { const auto& uuid = device.GetDeviceUUID(); - const bool deviceIsUnknown = std::ranges::any_of( - agent->GetUnknownDevices(), - [&uuid](const NProto::TDeviceConfig& device) - { return device.GetDeviceUUID() == uuid; }); - if (deviceIsUnknown) { - return MakeError( - S_FALSE, - TStringBuilder() << "Device: \"" << uuid << "\" is unknown"); + const NProto::TDeviceConfig* knownDevice = DeviceList.FindDevice(uuid); + if (!knownDevice) { + continue; } - const NProto::TDeviceConfig* knownDevice = DeviceList.FindDevice(uuid); - if (!knownDevice || stats.GetNodeId() != knownDevice->GetNodeId()) { + if (stats.GetNodeId() != knownDevice->GetNodeId()) { return MakeError( E_ARGUMENT, TStringBuilder() - << "Unexpected device. DeviceId: \"" << uuid - << "\" Sender node id: " << stats.GetNodeId() - << " Found node id: " - << (knownDevice ? ToString(knownDevice->GetNodeId()) - : "null")); + << "Unexpected device. DeviceId: " << uuid.Quote() + << " Sender node id: " << stats.GetNodeId() + << " Found node id: " << knownDevice->GetNodeId()); } } @@ -4882,7 +4874,7 @@ NProto::TDiskConfig TDiskRegistryState::BuildDiskConfig( config.SetStorageMediaKind(diskState.MediaKind); config.SetMigrationStartTs(diskState.MigrationStartTs.MicroSeconds()); - for (const auto& [uuid, seqNo]: diskState.FinishedMigrations) { + for (const auto& [uuid, seqNo, _]: diskState.FinishedMigrations) { Y_UNUSED(seqNo); auto& m = *config.AddFinishedMigrations(); m.SetDeviceId(uuid); @@ -5031,19 +5023,19 @@ void TDiskRegistryState::ApplyAgentStateChange( } if (agent.GetState() == NProto::AGENT_STATE_WARNING) { - if (disk.MigrationSource2Target.contains(deviceId)) { - // migration already started - continue; - } + if (MigrationCanBeStarted(disk, deviceId)) { + if (!FindPtr(disk.Devices, deviceId)) { + ReportDiskRegistryWrongMigratedDeviceOwnership(Sprintf( + "ApplyAgentStateChange: device[DeviceUUID = %s] not " + "found in disk[DiskId " + "= %s]", + deviceId.c_str(), + diskId.c_str())); + continue; + } - if (Find(disk.Devices, deviceId) == disk.Devices.end()) { - ReportDiskRegistryWrongMigratedDeviceOwnership( - TStringBuilder() << "ApplyAgentStateChange: device " - << deviceId << " not found"); - continue; + AddMigration(disk, diskId, deviceId); } - - AddMigration(disk, diskId, deviceId); } else { if (agent.GetState() == NProto::AGENT_STATE_UNAVAILABLE && disk.MasterDiskId) @@ -6027,7 +6019,7 @@ void TDiskRegistryState::ApplyDeviceStateChange( return; } - if (!disk->MigrationSource2Target.contains(uuid)) { + if (MigrationCanBeStarted(*disk, uuid)) { AddMigration(*disk, diskId, uuid); } } @@ -6100,10 +6092,8 @@ void TDiskRegistryState::CancelDeviceMigration( const ui64 seqNo = AddReallocateRequest(db, diskId); - disk.FinishedMigrations.push_back({ - .DeviceId = targetId, - .SeqNo = seqNo - }); + disk.FinishedMigrations.push_back( + {.DeviceId = targetId, .SeqNo = seqNo, .IsCanceled = true}); NProto::TDiskHistoryItem historyItem; historyItem.SetTimestamp(now.MicroSeconds()); @@ -6172,7 +6162,9 @@ NProto::TError TDiskRegistryState::FinishDeviceMigration( const ui64 seqNo = AddReallocateRequest(db, diskId); *devIt = targetId; - disk.FinishedMigrations.push_back({.DeviceId = sourceId, .SeqNo = seqNo}); + + disk.FinishedMigrations.push_back( + {.DeviceId = sourceId, .SeqNo = seqNo, .IsCanceled = false}); if (disk.MasterDiskId) { const bool replaced = @@ -7569,4 +7561,24 @@ std::optional TDiskRegistryState::GetDiskBlockCount( return diskInfo.GetBlocksCount(); } +// static +bool TDiskRegistryState::MigrationCanBeStarted( + const TDiskState& disk, + const TString& deviceUUID) +{ + if (disk.MigrationSource2Target.contains(deviceUUID)) { + // migration already started + return false; + } + + for (const auto& m: disk.FinishedMigrations) { + if (m.DeviceId == deviceUUID && !m.IsCanceled) { + // there is a finished migration for the device + return false; + } + } + + return true; +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h index 249906cb29f..cdee55c8483 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state.h @@ -581,7 +581,7 @@ class TDiskRegistryState } TVector CollectBrokenDevices(const NProto::TAgentStats& stats) const; - NProto::TError UpdateAgentCounters(const NProto::TAgentStats& source); + NProto::TError UpdateAgentCounters(const NProto::TAgentStats& stats); void PublishCounters(TInstant now); void DeleteDiskStateChanges( @@ -1312,6 +1312,10 @@ class TDiskRegistryState void CleanupAgentConfig( TDiskRegistryDatabase& db, const NProto::TAgentConfig& agent); + + static bool MigrationCanBeStarted( + const TDiskState& disk, + const TString& deviceUUID); }; } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_benchmark.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_benchmark.cpp index 18dad19c534..d7a1a163505 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_benchmark.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_benchmark.cpp @@ -19,6 +19,12 @@ namespace { const TString BackupFileEnv = "DISK_REGISTRY_BACKUP_PATH"; +enum ERegisterAgent +{ + ChangeNodeId, + KeepNodeId, +}; + TString BackupFilePath() { auto result = GetEnv(BackupFileEnv); @@ -103,13 +109,51 @@ static void PublishCounters_DisableFullGroups(benchmark::State& benchmarkState) } } +static void DoRegisterAgent(benchmark::State& benchmarkState, bool changeNodeId) +{ + auto state = Load(true); + auto agents = state.GetAgents(); + + TTestExecutor executor; + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); + + TVector agentsToRegister; + for (size_t i = 0; i < agents.size(); ++i) { + auto& agent = agents[i]; + if (agent.GetNodeId() != 0) { + agentsToRegister.push_back(i); + if (changeNodeId) { + agent.SetNodeId(agentsToRegister.size()); + } + } + } + + size_t i = 0; + for (const auto _: benchmarkState) { + size_t agentIndex = agentsToRegister[(i++) % agentsToRegister.size()]; + auto& agent = agents[agentIndex]; + if (changeNodeId) { + agent.SetNodeId(agent.GetNodeId() + agentsToRegister.size()); + } + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [r, error] = + state.RegisterAgent(db, agent, TInstant::Now()); + + if (HasError(error)) { + Cout << error.GetMessage() << Endl; + } + }); + } +} + void DoCreateDeleteDisk( benchmark::State& benchmarkState, const TDiskRegistryState::TAllocateDiskParams& diskParams) { TTestExecutor executor; - executor.WriteTx([&](TDiskRegistryDatabase db) mutable - { db.InitSchema(); }); + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); auto state = Load(false); for (const auto _: benchmarkState) { @@ -166,6 +210,15 @@ void DoCreateDeleteDisk( } \ BENCHMARK(CreateDeleteMirrorDisk_##deviceCount); +#define REGISTER_AGENT(changeNodeId) \ + void RegisterAgent##changeNodeId(benchmark::State& benchmarkState) \ + { \ + DoRegisterAgent( \ + benchmarkState, \ + (changeNodeId) == ERegisterAgent::ChangeNodeId); \ + } \ + BENCHMARK(RegisterAgent##changeNodeId); + BENCHMARK(PublishCounters_All); BENCHMARK(PublishCounters_DisableFullGroups); CREATE_AND_DELETE_NRD_DISK(1); @@ -174,6 +227,8 @@ CREATE_AND_DELETE_NRD_DISK(100); CREATE_AND_DELETE_MIRROR_DISK(1); CREATE_AND_DELETE_MIRROR_DISK(10); CREATE_AND_DELETE_MIRROR_DISK(100); +REGISTER_AGENT(KeepNodeId); +REGISTER_AGENT(ChangeNodeId); /////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp index c6871eee036..d168c26ef87 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut.cpp @@ -1852,10 +1852,11 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) }); auto config1 = AgentConfig(1000, { - Device("dev-1", "uuid-1", "rack-1"), + Device("dev-1", "uuid-1.1"), }); auto config2 = AgentConfig(1001, { - Device("dev-2", "uuid-2", "rack-2"), + Device("dev-1", "uuid-2.1"), + Device("dev-3", "uuid-2.3"), }); auto monitoring = CreateMonitoringServiceStub(); @@ -1865,9 +1866,16 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) TDiskRegistryState state = TDiskRegistryStateBuilder() .With(diskRegistryGroup) - .WithConfig({ config1 }) + .WithConfig({ config1, config2 }) .Build(); + config2 = AgentConfig(1001, { + Device("dev-1", "uuid-2.1"), + // add an unknown device to #1001 + Device("dev-2", "uuid-2.2"), + Device("dev-3", "uuid-2.3"), + }); + executor.WriteTx([&] (TDiskRegistryDatabase db) { UNIT_ASSERT_SUCCESS(RegisterAgent(state, db, config1)); UNIT_ASSERT_SUCCESS(RegisterAgent(state, db, config2)); @@ -1877,9 +1885,9 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) { NProto::TAgentStats stats; - stats.SetNodeId(1000); + stats.SetNodeId(1001); auto* d = stats.AddDeviceStats(); - d->SetDeviceUUID("garbage"); + d->SetDeviceUUID("uuid-1.1"); // uuid-1.1 belongs to agent-1000 auto error = state.UpdateAgentCounters(stats); UNIT_ASSERT_VALUES_EQUAL(E_ARGUMENT, error.GetCode()); @@ -1888,14 +1896,78 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) { NProto::TAgentStats stats; stats.SetNodeId(1001); - auto* d = stats.AddDeviceStats(); - d->SetDeviceUUID("uuid-2"); + + { + auto* d = stats.AddDeviceStats(); + d->SetDeviceUUID("uuid-2.1"); + d->SetDeviceName("dev-1"); + d->SetBytesRead(4_KB); + d->SetNumReadOps(1); + } + + { + auto* d = stats.AddDeviceStats(); + d->SetDeviceUUID("uuid-2.2"); + d->SetDeviceName("dev-2"); + d->SetBytesRead(8_KB); + d->SetNumReadOps(2); + } + + { + auto* d = stats.AddDeviceStats(); + d->SetDeviceUUID("uuid-2.3"); + d->SetDeviceName("dev-3"); + d->SetBytesRead(12_KB); + d->SetNumReadOps(3); + } auto error = state.UpdateAgentCounters(stats); - UNIT_ASSERT_VALUES_EQUAL(S_FALSE, error.GetCode()); + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); } state.PublishCounters(Now()); + + UNIT_ASSERT(diskRegistryGroup->FindSubgroup("agent", "agent-1000")); + + auto counters = diskRegistryGroup->FindSubgroup("agent", "agent-1001"); + UNIT_ASSERT(counters); + + auto totalReadCount = counters->FindCounter("ReadCount"); + UNIT_ASSERT(totalReadCount); + UNIT_ASSERT_VALUES_EQUAL(4, totalReadCount->Val()); + + auto totalReadBytes = counters->FindCounter("ReadBytes"); + UNIT_ASSERT(totalReadBytes); + UNIT_ASSERT_VALUES_EQUAL(16_KB, totalReadBytes->Val()); + + { + auto device = counters->FindSubgroup("device", "agent-1001:dev-1"); + UNIT_ASSERT(device); + + auto readCount = device->FindCounter("ReadCount"); + UNIT_ASSERT(readCount); + UNIT_ASSERT_VALUES_EQUAL(1, readCount->Val()); + + auto readBytes = device->FindCounter("ReadBytes"); + UNIT_ASSERT(readBytes); + UNIT_ASSERT_VALUES_EQUAL(4_KB, readBytes->Val()); + } + + // no metrics for the unknown device + UNIT_ASSERT(!counters->FindSubgroup("device", "agent-1001:dev-2")); + + { + auto device = counters->FindSubgroup("device", "agent-1001:dev-3"); + UNIT_ASSERT(device); + + auto readCount = device->FindCounter("ReadCount"); + UNIT_ASSERT(readCount); + UNIT_ASSERT_VALUES_EQUAL(3, readCount->Val()); + + auto readBytes = device->FindCounter("ReadBytes"); + UNIT_ASSERT(readBytes); + UNIT_ASSERT_VALUES_EQUAL(12_KB, readBytes->Val()); + } } Y_UNIT_TEST(ShouldRemoveAgentWithSameId) @@ -12160,9 +12232,8 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) device.SetId(leakedSuspendedDevice); db.UpdateSuspendedDevice(device); db.AddAutomaticallyReplacedDevice( - TAutomaticallyReplacedDeviceInfo{ - leakedAutomaticallyReplacedDevice, - Now()}); + {.DeviceId = leakedAutomaticallyReplacedDevice, + .ReplacementTs = Now()}); }); // Register agent. @@ -12248,4 +12319,5 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateTest) }); } } + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp index 619b180e294..b8eca5915f9 100644 --- a/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp +++ b/cloud/blockstore/libs/storage/disk_registry/disk_registry_state_ut_migration.cpp @@ -39,6 +39,55 @@ auto ChangeAgentState( return affectedDisks; }; +TResultOrError AllocateDisk( + TDiskRegistryDatabase& db, + TDiskRegistryState& state, + TString diskId) +{ + TDiskRegistryState::TAllocateDiskResult result{}; + + auto error = state.AllocateDisk( + Now(), + db, + TDiskRegistryState::TAllocateDiskParams{ + .DiskId = std::move(diskId), + .BlockSize = DefaultLogicalBlockSize, + .BlocksCount = 20_GB / DefaultLogicalBlockSize}, + &result); + if (HasError(error)) { + return error; + } + + return result; +} + +TVector CreateSeveralAgents() +{ + return { + AgentConfig( + 1, + { + Device("dev-1", "uuid-1.1", "rack-1"), + Device("dev-2", "uuid-1.2", "rack-1"), + }), + AgentConfig( + 2, + { + Device("dev-1", "uuid-2.1", "rack-2"), + Device("dev-2", "uuid-2.2", "rack-2"), + })}; +} + +TDiskRegistryState CreateTestState(const TVector& agents) +{ + return TDiskRegistryStateBuilder() + .WithKnownAgents(agents) + .WithDisks({ + Disk("disk-1", {"uuid-1.1", "uuid-1.2"}), + }) + .Build(); +} + } //namespace //////////////////////////////////////////////////////////////////////////////// @@ -339,9 +388,7 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMigrationTest) } UNIT_ASSERT(state.IsMigrationListEmpty()); - // Now bug is fixed, but, if it reproduce in future, we must report - // event. - UNIT_ASSERT_VALUES_EQUAL(1, configCounter->Val()); + UNIT_ASSERT_VALUES_EQUAL(0, configCounter->Val()); } Y_UNIT_TEST(ShouldEraseMigrationsForDeletedDisk) @@ -1115,6 +1162,508 @@ Y_UNIT_TEST_SUITE(TDiskRegistryStateMigrationTest) config.SetMaxNonReplicatedDeviceMigrationPercentageInProgress(34); DoTestShouldNotMigrateMoreThanNDevicesAtTheSameTime(std::move(config)); } + + Y_UNIT_TEST(ShouldNotStartAlreadyFinishedMigrationAgent) + { + TTestExecutor executor; + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); + + const TVector agents = CreateSeveralAgents(); + + TDiskRegistryState state = CreateTestState(agents); + + UNIT_ASSERT_VALUES_EQUAL(0, state.BuildMigrationList().size()); + UNIT_ASSERT(state.IsMigrationListEmpty()); + + NMonitoring::TDynamicCountersPtr counters = + new NMonitoring::TDynamicCounters(); + InitCriticalEventsCounter(counters); + auto critCounter = counters->GetCounter( + "AppCriticalEvents/DiskRegistryWrongMigratedDeviceOwnership", + true); + UNIT_ASSERT_VALUES_EQUAL(0, critCounter->Val()); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, result.Migrations.size()); + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + TVector affectedDisks; + TDuration timeout; + auto error = state.UpdateCmsHostState( + db, + agents[0].agentid(), + NProto::AGENT_STATE_WARNING, + Now(), + false, // dryRun + affectedDisks, + timeout); + + UNIT_ASSERT_VALUES_EQUAL(error.code(), E_TRY_AGAIN); + UNIT_ASSERT_VALUES_EQUAL(1, affectedDisks.size()); + UNIT_ASSERT(!state.IsMigrationListEmpty()); + }); + + const auto migrations = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(2, migrations.size()); + + TVector targets; + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + for (const auto& [diskId, uuid]: migrations) { + auto [config, error] = + state.StartDeviceMigration(Now(), db, diskId, uuid); + UNIT_ASSERT_SUCCESS(error); + targets.push_back(config.GetDeviceUUID()); + } + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(2, result.Migrations.size()); + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.FinishedMigrations.size()); + } + + UNIT_ASSERT_VALUES_EQUAL(0, state.GetDirtyDevices().size()); + + // finish migrations + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + for (size_t i = 0; i < migrations.size(); ++i) { + const auto& diskId = migrations[i].DiskId; + const auto& uuid = migrations[i].SourceDeviceId; + const auto& target = targets[i]; + + bool updated = false; + auto error = state.FinishDeviceMigration( + db, + diskId, + uuid, + target, + TInstant::Now(), + &updated); + + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + } + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.FinishedMigrations.size()); + } + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + TVector affectedDisks; + TDuration timeout; + auto error = state.UpdateCmsHostState( + db, + agents[0].agentid(), + NProto::AGENT_STATE_WARNING, + Now(), + false, // dryRun + affectedDisks, + timeout); + + UNIT_ASSERT_VALUES_EQUAL(error.code(), E_TRY_AGAIN); + UNIT_ASSERT(state.IsMigrationListEmpty()); + }); + + auto migrationsAfterSecondRequest = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(0, migrationsAfterSecondRequest.size()); + critCounter = counters->GetCounter( + "AppCriticalEvents/DiskRegistryWrongMigratedDeviceOwnership", + true); + UNIT_ASSERT_VALUES_EQUAL(0, critCounter->Val()); + } + + Y_UNIT_TEST(ShouldStartCanceledMigrationAgent) + { + TTestExecutor executor; + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); + + const TVector agents = CreateSeveralAgents(); + + TDiskRegistryState state = CreateTestState(agents); + + UNIT_ASSERT_VALUES_EQUAL(0, state.BuildMigrationList().size()); + UNIT_ASSERT(state.IsMigrationListEmpty()); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, result.Migrations.size()); + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto affectedDisks = UpdateAgentState( + state, + db, + agents[0], + NProto::AGENT_STATE_WARNING); + UNIT_ASSERT_VALUES_EQUAL(1, affectedDisks.size()); + UNIT_ASSERT(!state.IsMigrationListEmpty()); + }); + + const auto migrations = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(2, migrations.size()); + + TVector targets; + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + for (const auto& [diskId, uuid]: migrations) { + auto [config, error] = + state.StartDeviceMigration(Now(), db, diskId, uuid); + UNIT_ASSERT_SUCCESS(error); + targets.push_back(config.GetDeviceUUID()); + } + }); + Sort(targets); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(2, result.Migrations.size()); + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.FinishedMigrations.size()); + } + + UNIT_ASSERT_VALUES_EQUAL(0, state.GetDirtyDevices().size()); + + // cancel migrations + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + TVector affectedDisks; + TDuration timeout; + auto error = state.UpdateCmsHostState( + db, + agents[0].agentid(), + NProto::AGENT_STATE_ONLINE, + Now(), + false, // dryRun + affectedDisks, + timeout); + UNIT_ASSERT_VALUES_EQUAL(1, affectedDisks.size()); + UNIT_ASSERT(state.IsMigrationListEmpty()); + }); + + UNIT_ASSERT_VALUES_EQUAL(0, state.GetDirtyDevices().size()); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.FinishedMigrations.size()); + } + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + TVector affectedDisks; + TDuration timeout; + auto error = state.UpdateCmsHostState( + db, + agents[0].agentid(), + NProto::AGENT_STATE_WARNING, + Now(), + false, // dryRun + affectedDisks, + timeout); + + UNIT_ASSERT_VALUES_EQUAL(error.code(), E_TRY_AGAIN); + UNIT_ASSERT(!state.IsMigrationListEmpty()); + }); + + auto migrationsAfterSecondRequest = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(2, migrationsAfterSecondRequest.size()); + } + + Y_UNIT_TEST(ShouldNotStartAlreadyFinishedMigrationDevice) + { + TTestExecutor executor; + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); + + const TVector agents = CreateSeveralAgents(); + + TDiskRegistryState state = CreateTestState(agents); + + UNIT_ASSERT_VALUES_EQUAL(0, state.BuildMigrationList().size()); + UNIT_ASSERT(state.IsMigrationListEmpty()); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, result.Migrations.size()); + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto result = state.UpdateCmsDeviceState( + db, + agents[0].agentid(), + agents[0].GetDevices()[0].GetDeviceName(), + NProto::DEVICE_STATE_WARNING, + Now(), + false, // shouldResumeDevice + false); // dryRun + + UNIT_ASSERT_VALUES_EQUAL(result.Error.code(), E_TRY_AGAIN); + UNIT_ASSERT_VALUES_EQUAL(1, result.AffectedDisks.size()); + UNIT_ASSERT(!state.IsMigrationListEmpty()); + }); + + const auto migrations = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(1, migrations.size()); + const auto& migration = migrations[0]; + + TString target; + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto [config, error] = state.StartDeviceMigration( + Now(), + db, + migration.DiskId, + migration.SourceDeviceId); + UNIT_ASSERT_SUCCESS(error); + target = config.GetDeviceUUID(); + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(1, result.Migrations.size()); + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(1, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.FinishedMigrations.size()); + } + + UNIT_ASSERT_VALUES_EQUAL(0, state.GetDirtyDevices().size()); + + // finish migration + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + const auto& diskId = migration.DiskId; + const auto& uuid = migration.SourceDeviceId; + + bool updated = false; + auto error = state.FinishDeviceMigration( + db, + diskId, + uuid, + target, + TInstant::Now(), + &updated); + + UNIT_ASSERT_VALUES_EQUAL(S_OK, error.GetCode()); + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(1, diskInfo.FinishedMigrations.size()); + } + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto result = state.UpdateCmsDeviceState( + db, + agents[0].agentid(), + agents[0].GetDevices()[0].GetDeviceName(), + NProto::DEVICE_STATE_WARNING, + Now(), + false, // shouldResumeDevice + false); // dryRun + + UNIT_ASSERT_VALUES_EQUAL(result.Error.code(), E_TRY_AGAIN); + UNIT_ASSERT(state.IsMigrationListEmpty()); + }); + + auto migrationsAfterSecondRequest = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(0, migrationsAfterSecondRequest.size()); + } + + Y_UNIT_TEST(ShouldStartCanceledMigrationDevice) + { + TTestExecutor executor; + executor.WriteTx([&](TDiskRegistryDatabase db) { db.InitSchema(); }); + + const TVector agents = CreateSeveralAgents(); + + TDiskRegistryState state = CreateTestState(agents); + + UNIT_ASSERT_VALUES_EQUAL(0, state.BuildMigrationList().size()); + UNIT_ASSERT(state.IsMigrationListEmpty()); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, result.Migrations.size()); + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto result = state.UpdateCmsDeviceState( + db, + agents[0].agentid(), + agents[0].GetDevices()[0].GetDeviceName(), + NProto::DEVICE_STATE_WARNING, + Now(), + false, // shouldResumeDevice + false); // dryRun + + UNIT_ASSERT_VALUES_EQUAL(result.Error.code(), E_TRY_AGAIN); + UNIT_ASSERT_VALUES_EQUAL(1, result.AffectedDisks.size()); + UNIT_ASSERT(!state.IsMigrationListEmpty()); + }); + + const auto migrations = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(1, migrations.size()); + const auto& migration = migrations[0]; + + TString target; + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto [config, error] = state.StartDeviceMigration( + Now(), + db, + migration.DiskId, + migration.SourceDeviceId); + UNIT_ASSERT_SUCCESS(error); + target = config.GetDeviceUUID(); + }); + + executor.WriteTx( + [&](TDiskRegistryDatabase db) + { + auto [result, error] = AllocateDisk(db, state, "disk-1"); + UNIT_ASSERT_SUCCESS(error); + + UNIT_ASSERT_VALUES_EQUAL(2, result.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(1, result.Migrations.size()); + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(1, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.FinishedMigrations.size()); + } + + UNIT_ASSERT_VALUES_EQUAL(0, state.GetDirtyDevices().size()); + + // cancel migration + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto result = state.UpdateCmsDeviceState( + db, + agents[0].agentid(), + agents[0].GetDevices()[0].GetDeviceName(), + NProto::DEVICE_STATE_ONLINE, + Now(), + false, // shouldResumeDevice + false); // dryRun + UNIT_ASSERT(state.IsMigrationListEmpty()); + }); + + { + TDiskInfo diskInfo; + UNIT_ASSERT_SUCCESS(state.GetDiskInfo("disk-1", diskInfo)); + UNIT_ASSERT_VALUES_EQUAL(2, diskInfo.Devices.size()); + UNIT_ASSERT_VALUES_EQUAL(0, diskInfo.Migrations.size()); + UNIT_ASSERT_VALUES_EQUAL(1, diskInfo.FinishedMigrations.size()); + } + + executor.WriteTx( + [&](TDiskRegistryDatabase db) mutable + { + auto result = state.UpdateCmsDeviceState( + db, + agents[0].agentid(), + agents[0].GetDevices()[0].GetDeviceName(), + NProto::DEVICE_STATE_WARNING, + Now(), + false, // shouldResumeDevice + false); // dryRun + + UNIT_ASSERT_VALUES_EQUAL(result.Error.code(), E_TRY_AGAIN); + UNIT_ASSERT(!state.IsMigrationListEmpty()); + }); + + auto migrationsAfterSecondRequest = state.BuildMigrationList(); + UNIT_ASSERT_VALUES_EQUAL(1, migrationsAfterSecondRequest.size()); + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/init/disk_agent/actorsystem.cpp b/cloud/blockstore/libs/storage/init/disk_agent/actorsystem.cpp index 77a9a783586..92016b6ec2e 100644 --- a/cloud/blockstore/libs/storage/init/disk_agent/actorsystem.cpp +++ b/cloud/blockstore/libs/storage/init/disk_agent/actorsystem.cpp @@ -178,6 +178,8 @@ IActorSystemPtr CreateDiskAgentActorSystem(const TDiskAgentActorSystemArgs& daAr servicesMask.EnableLocalService = 0; // configured manually servicesMask.EnableConfigsDispatcher = daArgs.StorageConfig->GetConfigsDispatcherServiceEnabled(); + servicesMask.EnableViewerService = + daArgs.StorageConfig->GetYdbViewerServiceEnabled(); TActorSystemArgs args{ .ModuleFactories = daArgs.ModuleFactories, diff --git a/cloud/blockstore/libs/storage/init/server/actorsystem.cpp b/cloud/blockstore/libs/storage/init/server/actorsystem.cpp index 3b4adabaf95..92f636ade37 100644 --- a/cloud/blockstore/libs/storage/init/server/actorsystem.cpp +++ b/cloud/blockstore/libs/storage/init/server/actorsystem.cpp @@ -308,7 +308,7 @@ class TStorageServicesInitializer final auto volumeBalancerService = CreateVolumeBalancerActor( Args.StorageConfig, Args.VolumeStats, - Args.CgroupStatsFetcher, + Args.StatsFetcher, Args.VolumeBalancerSwitch, MakeStorageServiceId()); @@ -546,6 +546,8 @@ IActorSystemPtr CreateActorSystem(const TServerActorSystemArgs& sArgs) servicesMask.EnableSchemeBoardMonitoring = 1; servicesMask.EnableConfigsDispatcher = storageConfig->GetConfigsDispatcherServiceEnabled(); + servicesMask.EnableViewerService = + storageConfig->GetYdbViewerServiceEnabled(); auto nodeId = sArgs.NodeId; auto onStart = [=] (IActorSystem& actorSystem) { diff --git a/cloud/blockstore/libs/storage/init/server/actorsystem.h b/cloud/blockstore/libs/storage/init/server/actorsystem.h index 2c30e6edf1e..b8c8c8dff68 100644 --- a/cloud/blockstore/libs/storage/init/server/actorsystem.h +++ b/cloud/blockstore/libs/storage/init/server/actorsystem.h @@ -61,7 +61,7 @@ struct TServerActorSystemArgs IVolumeStatsPtr VolumeStats; NRdma::IServerPtr RdmaServer; NRdma::IClientPtr RdmaClient; - NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher; + NCloud::NStorage::IStatsFetcherPtr StatsFetcher; TManuallyPreemptedVolumesPtr PreemptedVolumes; NNvme::INvmeManagerPtr NvmeManager; IVolumeBalancerSwitchPtr VolumeBalancerSwitch; diff --git a/cloud/blockstore/libs/storage/partition/part_actor_compaction.cpp b/cloud/blockstore/libs/storage/partition/part_actor_compaction.cpp index 70f75adf578..7b07df87456 100644 --- a/cloud/blockstore/libs/storage/partition/part_actor_compaction.cpp +++ b/cloud/blockstore/libs/storage/partition/part_actor_compaction.cpp @@ -1275,31 +1275,39 @@ void TPartitionActor::HandleCompaction( TVector tops; + const bool batchCompactionEnabledForCloud = + Config->IsBatchCompactionFeatureEnabled( + PartitionConfig.GetCloudId(), + PartitionConfig.GetFolderId(), + PartitionConfig.GetDiskId()); + const bool batchCompactionEnabled = + Config->GetBatchCompactionEnabled() || batchCompactionEnabledForCloud; + const auto& cm = State->GetCompactionMap(); if (msg->BlockIndex.Defined()) { - const auto startIndex = cm.GetRangeStart(*msg->BlockIndex); - tops.push_back({startIndex, cm.Get(startIndex)}); + if (batchCompactionEnabled) { + tops = cm.GetNonEmptyRanges( + *msg->BlockIndex, Config->GetForcedCompactionRangeCountPerRun()); + } else { + const auto startIndex = cm.GetRangeStart(*msg->BlockIndex); + tops.push_back({startIndex, cm.Get(startIndex)}); + } State->OnNewCompactionRange(); } else if (msg->Mode == TEvPartitionPrivate::GarbageCompaction) { - const auto& top = State->GetCompactionMap().GetTopByGarbageBlockCount(); - tops.push_back({top.BlockIndex, top.Stat}); - } else { - ui32 rangeCount = 1; - - const bool batchCompactionEnabledForCloud = - Config->IsBatchCompactionFeatureEnabled( - PartitionConfig.GetCloudId(), - PartitionConfig.GetFolderId(), - PartitionConfig.GetDiskId()); - const bool batchCompactionEnabled = - Config->GetBatchCompactionEnabled() || batchCompactionEnabledForCloud; - - if (batchCompactionEnabled) { - rangeCount = State->GetCompactionRangeCountPerRun(); + if (batchCompactionEnabled && + Config->GetGarbageCompactionRangeCountPerRun() > 1) + { + tops = cm.GetTopByGarbageBlockCount( + Config->GetGarbageCompactionRangeCountPerRun()); + } else { + const auto& top = cm.GetTopByGarbageBlockCount(); + tops.push_back({top.BlockIndex, top.Stat}); } - - tops = State->GetCompactionMap().GetTopsFromGroups(rangeCount); + } else { + tops = cm.GetTopsFromGroups( + batchCompactionEnabled ? State->GetCompactionRangeCountPerRun() + : 1); } if (tops.empty() || !tops.front().Stat.BlobCount) { diff --git a/cloud/blockstore/libs/storage/partition/part_actor_readblob.cpp b/cloud/blockstore/libs/storage/partition/part_actor_readblob.cpp index fd2bdbc7910..881c503b1eb 100644 --- a/cloud/blockstore/libs/storage/partition/part_actor_readblob.cpp +++ b/cloud/blockstore/libs/storage/partition/part_actor_readblob.cpp @@ -107,8 +107,10 @@ void TPartitionActor::HandleReadBlobCompleted( >= Config->GetMaxReadBlobErrorsBeforeSuicide()) { LOG_WARN(ctx, TBlockStoreComponents::PARTITION, - "[%lu] Stop tablet because of too many ReadBlob errors: %s", + "[%lu] Stop tablet because of too many ReadBlob errors (actor %s, group %u): %s", TabletID(), + ev->Sender.ToString().c_str(), + msg->GroupId, FormatError(msg->GetError()).data()); ReportTabletBSFailure(); diff --git a/cloud/blockstore/libs/storage/partition/part_actor_writeblob.cpp b/cloud/blockstore/libs/storage/partition/part_actor_writeblob.cpp index 40c1b6224b3..0ba70fb9524 100644 --- a/cloud/blockstore/libs/storage/partition/part_actor_writeblob.cpp +++ b/cloud/blockstore/libs/storage/partition/part_actor_writeblob.cpp @@ -429,8 +429,10 @@ void TPartitionActor::HandleWriteBlobCompleted( if (FAILED(msg->GetStatus())) { LOG_WARN(ctx, TBlockStoreComponents::PARTITION, - "[%lu] Stop tablet because of WriteBlob error: %s", + "[%lu] Stop tablet because of WriteBlob error (actor %s, group %u): %s", TabletID(), + ev->Sender.ToString().c_str(), + groupId, FormatError(msg->GetError()).data()); ReportTabletBSFailure(); diff --git a/cloud/blockstore/libs/storage/partition/part_ut.cpp b/cloud/blockstore/libs/storage/partition/part_ut.cpp index ee4a63c6aab..e2c97ce07b6 100644 --- a/cloud/blockstore/libs/storage/partition/part_ut.cpp +++ b/cloud/blockstore/libs/storage/partition/part_ut.cpp @@ -74,11 +74,11 @@ TString GetBlockContent(char fill = 0, size_t size = DefaultBlockSize) TString GetBlocksContent( char fill = 0, - ui32 blocksCount = 1, + ui32 blockCount = 1, size_t blockSize = DefaultBlockSize) { TString result; - for (ui32 i = 0; i < blocksCount; ++i) { + for (ui32 i = 0; i < blockCount; ++i) { result += GetBlockContent(fill, blockSize); } return result; @@ -160,7 +160,7 @@ class TDummyActor final void InitTestActorRuntime( TTestActorRuntime& runtime, const NProto::TStorageServiceConfig& config, - ui32 blocksCount, + ui32 blockCount, ui32 channelCount, std::unique_ptr tabletInfo, TTestPartitionInfo partitionInfo = TTestPartitionInfo(), @@ -181,7 +181,7 @@ void InitTestActorRuntime( partConfig.SetStorageMediaKind(partitionInfo.MediaKind); partConfig.SetBlockSize(DefaultBlockSize); - partConfig.SetBlocksCount(blocksCount); + partConfig.SetBlocksCount(blockCount); auto* cps = partConfig.MutableExplicitChannelProfiles(); cps->Add()->SetDataKind(static_cast(EChannelDataKind::System)); @@ -274,7 +274,7 @@ void InitLogSettings(TTestActorRuntime& runtime) std::unique_ptr PrepareTestActorRuntime( NProto::TStorageServiceConfig config = DefaultConfig(), - ui32 blocksCount = 1024, + ui32 blockCount = 1024, TMaybe channelsCount = {}, const TTestPartitionInfo& testPartitionInfo = TTestPartitionInfo(), IActorPtr volumeProxy = {}, @@ -322,7 +322,7 @@ std::unique_ptr PrepareTestActorRuntime( InitTestActorRuntime( *runtime, config, - blocksCount, + blockCount, channelsCount ? *channelsCount : tabletInfo->Channels.size(), std::move(tabletInfo), testPartitionInfo, @@ -797,12 +797,12 @@ class TPartitionClient std::unique_ptr CreateDescribeBlocksRequest( ui32 startIndex, - ui32 blocksCount, + ui32 blockCount, const TString& checkpointId = "") { auto request = std::make_unique(); request->Record.SetStartIndex(startIndex); - request->Record.SetBlocksCount(blocksCount); + request->Record.SetBlocksCount(blockCount); request->Record.SetCheckpointId(checkpointId); return request; } @@ -840,11 +840,11 @@ class TPartitionClient std::unique_ptr CreateCompactRangeRequest( ui32 blockIndex, - ui32 blocksCount) + ui32 blockCount) { auto request = std::make_unique(); request->Record.SetStartIndex(blockIndex); - request->Record.SetBlocksCount(blocksCount); + request->Record.SetBlocksCount(blockCount); return request; } @@ -1042,20 +1042,20 @@ struct TBlob TBlob( ui32 number, ui8 offset, - ui8 blocksCount = 1, + ui8 blockCount = 1, ui32 channel = 0, ui32 generation = 0 ) : Number(number) , Offset(offset) - , BlocksCount(blocksCount) + , BlockCount(blockCount) , Channel(channel) , Generation(generation) {} ui32 Number; ui8 Offset; - ui8 BlocksCount; + ui8 BlockCount; ui32 Channel; ui32 Generation; }; @@ -1091,7 +1091,7 @@ class TTestVolumeProxyActor final TString BaseDiskId; TString BaseDiskCheckpointId; TPartitionContent BasePartitionContent; - ui32 BlocksCount; + ui32 BlockCount; ui32 BaseBlockSize; public: @@ -1100,7 +1100,7 @@ class TTestVolumeProxyActor final const TString& baseDiskId, const TString& baseDiskCheckpointId, const TPartitionContent& basePartitionContent, - ui32 blocksCount, + ui32 blockCount, ui32 baseBlockSize = DefaultBlockSize); void Bootstrap(const TActorContext& ctx); @@ -1128,13 +1128,13 @@ TTestVolumeProxyActor::TTestVolumeProxyActor( const TString& baseDiskId, const TString& baseDiskCheckpointId, const TPartitionContent& basePartitionContent, - ui32 blocksCount, + ui32 blockCount, ui32 baseBlockSize) : BaseTabletId(baseTabletId) , BaseDiskId(baseDiskId) , BaseDiskCheckpointId(baseDiskCheckpointId) , BasePartitionContent(std::move(basePartitionContent)) - , BlocksCount(blocksCount) + , BlockCount(blockCount) , BaseBlockSize(baseBlockSize) {} @@ -1178,8 +1178,8 @@ void TTestVolumeProxyActor::HandleDescribeBlocksRequest( auto* range = blobPiece.AddRanges(); range->SetBlobOffset(blob.Offset); range->SetBlockIndex(blockIndex); - range->SetBlocksCount(blob.BlocksCount); - blockIndex += blob.BlocksCount; + range->SetBlocksCount(blob.BlockCount); + blockIndex += blob.BlockCount; } else if (std::holds_alternative(descr)) { const auto& fresh = std::get(descr); auto& freshBlockRange = *response->Record.AddFreshBlockRanges(); @@ -1210,13 +1210,13 @@ void TTestVolumeProxyActor::HandleGetUsedBlocksRequest( auto response = std::make_unique(); ui64 blockIndex = 0; - TCompressedBitmap bitmap(BlocksCount); + TCompressedBitmap bitmap(BlockCount); for (const auto& descr: BasePartitionContent) { if (std::holds_alternative(descr)) { const auto& blob = std::get(descr); - bitmap.Set(blockIndex, blockIndex + blob.BlocksCount); - blockIndex += blob.BlocksCount; + bitmap.Set(blockIndex, blockIndex + blob.BlockCount); + blockIndex += blob.BlockCount; } else if (std::holds_alternative(descr)) { bitmap.Set(blockIndex, blockIndex + 1); ++blockIndex; @@ -1271,10 +1271,10 @@ void TTestVolumeProxyActor::HandleGetChangedBlocksRequest( for (const auto& descr: BasePartitionContent) { if (std::holds_alternative(descr)) { const auto& blob = std::get(descr); - for (ui64 block = blockIndex; block < blockIndex + blob.BlocksCount; block++) { + for (ui64 block = blockIndex; block < blockIndex + blob.BlockCount; block++) { fillBlock(block); } - blockIndex += blob.BlocksCount; + blockIndex += blob.BlockCount; } else if (std::holds_alternative(descr)) { fillBlock(blockIndex); ++blockIndex; @@ -1376,14 +1376,14 @@ TPartitionWithRuntime SetupOverlayPartition( const TPartitionContent& basePartitionContent = {}, TMaybe channelsCount = {}, ui32 blockSize = DefaultBlockSize, - ui32 blocksCount = 1024, + ui32 blockCount = 1024, const NProto::TStorageServiceConfig& config = DefaultConfig()) { TPartitionWithRuntime result; result.Runtime = PrepareTestActorRuntime( config, - blocksCount, + blockCount, channelsCount, { "overlay-disk", @@ -1398,7 +1398,7 @@ TPartitionWithRuntime SetupOverlayPartition( "base-disk", "checkpoint", basePartitionContent, - blocksCount, + blockCount, blockSize)); bool baseDiskIsMapped = false; @@ -1450,7 +1450,7 @@ TString GetBlocksContent( result += TString(blockSize, char(0)); } else if (std::holds_alternative(descr)) { const auto& blob = std::get(descr); - for (auto i = 0; i < blob.BlocksCount; ++i) { + for (auto i = 0; i < blob.BlockCount; ++i) { const auto blobOffset = blob.Offset + i; // Debugging is easier when block content is equal to blob offset. result += TString(blockSize, char(blobOffset)); @@ -1643,22 +1643,25 @@ Y_UNIT_TEST_SUITE(TPartitionTest) ); } - Y_UNIT_TEST(ShouldBatchSmallWrites) + Y_UNIT_TEST(ShouldBatchSmallWritesToMixedChannelIfThresholdExceeded) { NProto::TStorageServiceConfig config; config.SetWriteRequestBatchingEnabled(true); + config.SetWriteBlobThreshold(2_MB); auto runtime = PrepareTestActorRuntime(config); TPartitionClient partition(*runtime); partition.WaitReady(); - runtime->SetObserverFunc(PartitionBatchWriteCollector(*runtime, 1000)); + const ui32 blockCount = 1000; + runtime->SetObserverFunc( + PartitionBatchWriteCollector(*runtime, blockCount)); - for (ui32 i = 0; i < 1000; ++i) { + for (ui32 i = 0; i < blockCount; ++i) { partition.SendWriteBlocksRequest(i, i); } - for (ui32 i = 0; i < 1000; ++i) { + for (ui32 i = 0; i < blockCount; ++i) { auto response = partition.RecvWriteBlocksResponse(); UNIT_ASSERT(SUCCEEDED(response->GetStatus())); } @@ -1666,28 +1669,27 @@ Y_UNIT_TEST_SUITE(TPartitionTest) auto response = partition.StatPartition(); const auto& stats = response->Record.GetStats(); UNIT_ASSERT(stats.GetMixedBlobsCount()); - UNIT_ASSERT_VALUES_EQUAL(1000, stats.GetUsedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(blockCount - 1, stats.GetMixedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(0, stats.GetMergedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(1, stats.GetFreshBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(blockCount, stats.GetUsedBlocksCount()); UNIT_ASSERT_VALUES_EQUAL( - 1000, - stats.GetUserWriteCounters().GetRequestsCount() - ); + blockCount, + stats.GetUserWriteCounters().GetRequestsCount()); const auto batchCount = stats.GetUserWriteCounters().GetBatchCount(); - UNIT_ASSERT(batchCount < 1000); + UNIT_ASSERT(batchCount < blockCount); UNIT_ASSERT(batchCount > 0); - for (ui32 i = 0; i < 1000; ++i) { + for (ui32 i = 0; i < blockCount; ++i) { UNIT_ASSERT_VALUES_EQUAL( GetBlockContent(i), - GetBlockContent(partition.ReadBlocks(i)) - ); + GetBlockContent(partition.ReadBlocks(i))); } UNIT_ASSERT(stats.GetUserWriteCounters().GetExecTime() != 0); // checking that drain-related counters are in a consistent state partition.Drain(); - - // TODO: explicitly test the case when mixed blobs are generated via batching } Y_UNIT_TEST(ShouldBatchIntersectingWrites) @@ -1705,8 +1707,7 @@ Y_UNIT_TEST_SUITE(TPartitionTest) for (ui32 j = 0; j < 100; ++j) { partition.SendWriteBlocksRequest( TBlockRange32::WithLength(i * 100, j + 1), - i + 1 - ); + i + 1); } } @@ -1718,11 +1719,13 @@ Y_UNIT_TEST_SUITE(TPartitionTest) auto response = partition.StatPartition(); const auto& stats = response->Record.GetStats(); UNIT_ASSERT(stats.GetMixedBlobsCount()); + UNIT_ASSERT_VALUES_EQUAL(999, stats.GetMixedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(0, stats.GetMergedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(100, stats.GetFreshBlocksCount()); UNIT_ASSERT_VALUES_EQUAL(1000, stats.GetUsedBlocksCount()); UNIT_ASSERT_VALUES_EQUAL( 1000, - stats.GetUserWriteCounters().GetRequestsCount() - ); + stats.GetUserWriteCounters().GetRequestsCount()); const auto batchCount = stats.GetUserWriteCounters().GetBatchCount(); UNIT_ASSERT(batchCount < 1000); UNIT_ASSERT(batchCount > 0); @@ -1731,8 +1734,7 @@ Y_UNIT_TEST_SUITE(TPartitionTest) for (ui32 j = 0; j < 100; ++j) { UNIT_ASSERT_VALUES_EQUAL( GetBlockContent(i + 1), - GetBlockContent(partition.ReadBlocks(i * 100 + j)) - ); + GetBlockContent(partition.ReadBlocks(i * 100 + j))); } } @@ -6203,11 +6205,11 @@ Y_UNIT_TEST_SUITE(TPartitionTest) Y_UNIT_TEST(ShouldCorrectlyCopyUsedBlocksCountForOverlayDisk) { - ui32 blocksCount = 1024 * 1024 * 1024; + ui32 blockCount = 1024 * 1024 * 1024; ui32 usedBlocksCount = 0; TPartitionContent baseContent; - for (size_t i = 0; i < blocksCount/4; i += 50) { + for (size_t i = 0; i < blockCount/4; i += 50) { baseContent.push_back(TBlob(i, 0, 49)); usedBlocksCount += 49; baseContent.push_back(TEmpty()); @@ -6219,7 +6221,7 @@ Y_UNIT_TEST_SUITE(TPartitionTest) baseContent, {}, DefaultBlockSize, - blocksCount, + blockCount, DefaultConfig()); auto& partition = *partitionWithRuntime.Partition; @@ -6236,7 +6238,7 @@ Y_UNIT_TEST_SUITE(TPartitionTest) Y_UNIT_TEST(ShouldCorrectlyCalculateLogicalUsedBlocksCountForOverlayDisk) { - ui32 blocksCount = 1024 * 128; + ui32 blockCount = 1024 * 128; TPartitionContent baseContent; for (size_t i = 0; i < 100; ++i) { baseContent.push_back(TEmpty()); @@ -6254,7 +6256,7 @@ Y_UNIT_TEST_SUITE(TPartitionTest) baseContent, {}, DefaultBlockSize, - blocksCount, + blockCount, DefaultConfig()); auto& partition = *partitionWithRuntime.Partition; @@ -10091,9 +10093,9 @@ Y_UNIT_TEST_SUITE(TPartitionTest) config.SetSSDMaxBlobsPerUnit(7); config.SetHDDMaxBlobsPerUnit(7); - ui32 blocksCount = 1024 * 1024; + ui32 blockCount = 1024 * 1024; - auto runtime = PrepareTestActorRuntime(config, blocksCount); + auto runtime = PrepareTestActorRuntime(config, blockCount); TPartitionClient partition(*runtime); partition.WaitReady(); @@ -11006,9 +11008,9 @@ Y_UNIT_TEST_SUITE(TPartitionTest) config.SetSSDMaxBlobsPerUnit(7); config.SetHDDMaxBlobsPerUnit(7); - ui32 blocksCount = 1024 * 1024; + ui32 blockCount = 1024 * 1024; - auto runtime = PrepareTestActorRuntime(config, blocksCount); + auto runtime = PrepareTestActorRuntime(config, blockCount); TPartitionClient partition(*runtime); partition.WaitReady(); @@ -11109,9 +11111,9 @@ Y_UNIT_TEST_SUITE(TPartitionTest) config.SetSSDMaxBlobsPerUnit(999999999); config.SetHDDMaxBlobsPerUnit(999999999); - ui32 blocksCount = 10 * 1024; + ui32 blockCount = 10 * 1024; - auto runtime = PrepareTestActorRuntime(config, blocksCount); + auto runtime = PrepareTestActorRuntime(config, blockCount); TPartitionClient partition(*runtime); partition.WaitReady(); @@ -11332,6 +11334,213 @@ Y_UNIT_TEST_SUITE(TPartitionTest) auto response = partition.RecvCompactionResponse(); UNIT_ASSERT_VALUES_EQUAL(E_TRY_AGAIN, response->GetStatus()); } + + Y_UNIT_TEST(ShouldProcessMultipleRangesUponGarbageCompaction) + { + auto config = DefaultConfig(); + config.SetBatchCompactionEnabled(true); + config.SetGarbageCompactionRangeCountPerRun(3); + config.SetV1GarbageCompactionEnabled(true); + config.SetCompactionGarbageThreshold(20); + config.SetCompactionRangeGarbageThreshold(999999); + + auto runtime = PrepareTestActorRuntime(config, MaxPartitionBlocksCount); + + TPartitionClient partition(*runtime); + partition.WaitReady(); + + { + const auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(0, stats.GetMergedBlobsCount()); + } + + TAutoPtr compactionRequest; + const auto interceptCompactionRequest = + [&compactionRequest](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvPartitionPrivate::EvCompactionRequest) + { + auto* msg = + event->Get(); + if (msg->Mode == TEvPartitionPrivate::GarbageCompaction) { + compactionRequest = event.Release(); + return TTestActorRuntimeBase::EEventAction::DROP; + } + } + return TTestActorRuntime::DefaultObserverFunc(event); + }; + runtime->SetObserverFunc(interceptCompactionRequest); + + const auto blockRange1 = TBlockRange32::WithLength(0, 1024); + const auto blockRange2 = TBlockRange32::WithLength(1024 * 1024, 1024); + const auto blockRange3 = + TBlockRange32::WithLength(2 * 1024 * 1024, 1024); + + partition.WriteBlocks(blockRange1, 1); + partition.WriteBlocks(blockRange1, 2); + + partition.WriteBlocks(blockRange2, 3); + partition.WriteBlocks(blockRange2, 4); + partition.WriteBlocks(blockRange2, 5); + + partition.WriteBlocks(blockRange3, 6); + partition.WriteBlocks(blockRange3, 7); + partition.WriteBlocks(blockRange3, 8); + + { + const auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(8, stats.GetMergedBlobsCount()); + } + + runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); + + UNIT_ASSERT(compactionRequest); + runtime->Send(compactionRequest.Release()); + + runtime->DispatchEvents(TDispatchOptions(), TDuration::Seconds(1)); + + partition.Cleanup(); + + // checking that data wasn't corrupted + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(2), + GetBlockContent(partition.ReadBlocks(blockRange1.Start))); + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(2), + GetBlockContent(partition.ReadBlocks(blockRange1.End))); + + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(5), + GetBlockContent(partition.ReadBlocks(blockRange2.Start))); + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(5), + GetBlockContent(partition.ReadBlocks(blockRange2.End))); + + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(8), + GetBlockContent(partition.ReadBlocks(blockRange3.Start))); + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(8), + GetBlockContent(partition.ReadBlocks(blockRange3.End))); + + // checking that we now have 1 blob in each of the ranges + { + const auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(3, stats.GetMergedBlobsCount()); + } + } + + Y_UNIT_TEST(ShouldProcessMultipleRangesUponForceCompaction) + { + auto config = DefaultConfig(); + config.SetBatchCompactionEnabled(true); + config.SetForcedCompactionRangeCountPerRun(3); + config.SetV1GarbageCompactionEnabled(false); + + auto runtime = PrepareTestActorRuntime(config, MaxPartitionBlocksCount); + + TPartitionClient partition(*runtime); + partition.WaitReady(); + + { + const auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(0, stats.GetMergedBlobsCount()); + } + + const auto blockRange1 = TBlockRange32::WithLength(0, 1024); + const auto blockRange2 = TBlockRange32::WithLength(1024 * 1024, 1024); + const auto blockRange3 = + TBlockRange32::WithLength(2 * 1024 * 1024, 1024); + + partition.WriteBlocks(blockRange1, 1); + partition.WriteBlocks(blockRange1, 2); + + partition.WriteBlocks(blockRange2, 3); + partition.WriteBlocks(blockRange2, 4); + partition.WriteBlocks(blockRange2, 5); + + partition.WriteBlocks(blockRange3, 6); + partition.WriteBlocks(blockRange3, 7); + partition.WriteBlocks(blockRange3, 8); + + { + const auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(8, stats.GetMergedBlobsCount()); + } + + TCompactionOptions options; + options.set(ToBit(ECompactionOption::Forced)); + partition.Compaction(0, options); + partition.Cleanup(); + + // checking that data wasn't corrupted + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(2), + GetBlockContent(partition.ReadBlocks(blockRange1.Start))); + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(2), + GetBlockContent(partition.ReadBlocks(blockRange1.End))); + + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(5), + GetBlockContent(partition.ReadBlocks(blockRange2.Start))); + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(5), + GetBlockContent(partition.ReadBlocks(blockRange2.End))); + + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(8), + GetBlockContent(partition.ReadBlocks(blockRange3.Start))); + UNIT_ASSERT_VALUES_EQUAL( + GetBlockContent(8), + GetBlockContent(partition.ReadBlocks(blockRange3.End))); + + // checking that we now have 1 blob in each of the ranges + { + const auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(3, stats.GetMergedBlobsCount()); + } + } + + Y_UNIT_TEST(ShouldBatchSmallWritesToFreshChannelIfThresholdNotExceeded) + { + NProto::TStorageServiceConfig config; + config.SetWriteRequestBatchingEnabled(true); + config.SetWriteBlobThreshold(2_MB); + auto runtime = PrepareTestActorRuntime(config); + + TPartitionClient partition(*runtime); + partition.WaitReady(); + + const ui32 blockCount = 500; + runtime->SetObserverFunc( + PartitionBatchWriteCollector(*runtime, blockCount)); + + for (ui32 i = 0; i < blockCount; ++i) { + partition.SendWriteBlocksRequest(i, i); + } + + for (ui32 i = 0; i < blockCount; ++i) { + auto response = partition.RecvWriteBlocksResponse(); + UNIT_ASSERT(SUCCEEDED(response->GetStatus())); + } + + auto response = partition.StatPartition(); + const auto& stats = response->Record.GetStats(); + UNIT_ASSERT_VALUES_EQUAL(0, stats.GetMixedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(0, stats.GetMergedBlocksCount()); + UNIT_ASSERT_VALUES_EQUAL(blockCount, stats.GetFreshBlocksCount()); + + // checking that drain-related counters are in a consistent state + partition.Drain(); + } } } // namespace NCloud::NBlockStore::NStorage::NPartition diff --git a/cloud/blockstore/libs/storage/partition2/part2_actor_readblob.cpp b/cloud/blockstore/libs/storage/partition2/part2_actor_readblob.cpp index 4516d58748e..22d80305cc5 100644 --- a/cloud/blockstore/libs/storage/partition2/part2_actor_readblob.cpp +++ b/cloud/blockstore/libs/storage/partition2/part2_actor_readblob.cpp @@ -429,8 +429,10 @@ void TPartitionActor::HandleReadBlobCompleted( >= Config->GetMaxReadBlobErrorsBeforeSuicide()) { LOG_WARN(ctx, TBlockStoreComponents::PARTITION, - "[%lu] Stop tablet because of too many ReadBlob errors: %s", + "[%lu] Stop tablet because of too many ReadBlob errors (actor %s, group %u): %s", TabletID(), + ev->Sender.ToString().c_str(), + msg->GroupId, FormatError(msg->GetError()).data()); ReportTabletBSFailure(); diff --git a/cloud/blockstore/libs/storage/partition2/part2_actor_writeblob.cpp b/cloud/blockstore/libs/storage/partition2/part2_actor_writeblob.cpp index 27a71bec5d6..35fc05521e2 100644 --- a/cloud/blockstore/libs/storage/partition2/part2_actor_writeblob.cpp +++ b/cloud/blockstore/libs/storage/partition2/part2_actor_writeblob.cpp @@ -359,8 +359,10 @@ void TPartitionActor::HandleWriteBlobCompleted( if (FAILED(msg->GetStatus())) { LOG_WARN(ctx, TBlockStoreComponents::PARTITION, - "[%lu] Stop tablet because of WriteBlob error: %s", + "[%lu] Stop tablet because of WriteBlob error (actor %s, group %u): %s", TabletID(), + ev->Sender.ToString().c_str(), + group, FormatError(msg->GetError()).data()); ReportTabletBSFailure(); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/config.h b/cloud/blockstore/libs/storage/partition_nonrepl/config.h index 0aee46709a8..d12b337a517 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/config.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/config.h @@ -61,6 +61,8 @@ class TNonreplicatedPartitionConfig const NActors::TActorId ParentActorId; const bool MuteIOErrors; const THashSet FreshDeviceIds; + // List of devices that have outdated data. Can only appear on mirror disks. + const THashSet LaggingDeviceIds; const TDuration MaxTimedOutDeviceStateDuration; const bool MaxTimedOutDeviceStateDurationOverridden; const bool UseSimpleMigrationBandwidthLimiter; @@ -77,6 +79,7 @@ class TNonreplicatedPartitionConfig NActors::TActorId parentActorId, bool muteIOErrors, THashSet freshDeviceIds, + THashSet laggingDeviceIds, TDuration maxTimedOutDeviceStateDuration, bool maxTimedOutDeviceStateDurationOverridden, bool useSimpleMigrationBandwidthLimiter) @@ -88,6 +91,7 @@ class TNonreplicatedPartitionConfig , ParentActorId(std::move(parentActorId)) , MuteIOErrors(muteIOErrors) , FreshDeviceIds(std::move(freshDeviceIds)) + , LaggingDeviceIds(std::move(laggingDeviceIds)) , MaxTimedOutDeviceStateDuration(maxTimedOutDeviceStateDuration) , MaxTimedOutDeviceStateDurationOverridden(maxTimedOutDeviceStateDurationOverridden) , UseSimpleMigrationBandwidthLimiter(useSimpleMigrationBandwidthLimiter) @@ -105,9 +109,15 @@ class TNonreplicatedPartitionConfig TNonreplicatedPartitionConfigPtr Fork(TDevices devices) const { THashSet freshDeviceIds; + THashSet laggingDeviceIds; for (const auto& device: devices) { - if (FreshDeviceIds.contains(device.GetDeviceUUID())) { - freshDeviceIds.insert(device.GetDeviceUUID()); + const auto& uuid = device.GetDeviceUUID(); + + if (FreshDeviceIds.contains(uuid)) { + freshDeviceIds.insert(uuid); + } + if (LaggingDeviceIds.contains(uuid)) { + laggingDeviceIds.insert(uuid); } } @@ -120,6 +130,7 @@ class TNonreplicatedPartitionConfig ParentActorId, MuteIOErrors, std::move(freshDeviceIds), + std::move(laggingDeviceIds), MaxTimedOutDeviceStateDuration, MaxTimedOutDeviceStateDurationOverridden, UseSimpleMigrationBandwidthLimiter @@ -176,6 +187,11 @@ class TNonreplicatedPartitionConfig return FreshDeviceIds; } + const THashSet& GetLaggingDeviceIds() const + { + return LaggingDeviceIds; + } + auto GetMaxTimedOutDeviceStateDuration() const { return MaxTimedOutDeviceStateDuration; @@ -229,7 +245,8 @@ class TNonreplicatedPartitionConfig Y_UNUSED(relativeRange); return !Devices[i].GetDeviceUUID() - || FreshDeviceIds.contains(Devices[i].GetDeviceUUID()); + || FreshDeviceIds.contains(Devices[i].GetDeviceUUID()) + || LaggingDeviceIds.contains(Devices[i].GetDeviceUUID()); }); } diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/copy_range.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/copy_range.cpp index 0b43a185365..f3f92b03ebe 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/copy_range.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/copy_range.cpp @@ -165,6 +165,7 @@ void TCopyRangeActor::Done(const TActorContext& ctx, NProto::TError error) WriteStartTs, WriteDuration, std::move(AffectedBlockInfos), + 0, // RecommendedBandwidth, AllZeroes, RequestInfo->GetExecCycles()); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.cpp index 9d2d4b654b0..9455fb9fe6b 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.cpp @@ -151,6 +151,7 @@ void TDirectCopyRangeActor::Done(const TActorContext& ctx, NProto::TError error) writeTs, WriteDuration, TVector(), + RecommendedBandwidth, AllZeroes, RequestInfo->GetExecCycles()); @@ -210,6 +211,7 @@ void TDirectCopyRangeActor::HandleDirectCopyBlocksResponse( ReadDuration = TDuration::MicroSeconds(msg->Record.GetReadDuration()); WriteDuration = TDuration::MicroSeconds(msg->Record.GetReadDuration()); AllZeroes = msg->Record.GetAllZeroes(); + RecommendedBandwidth = msg->Record.GetRecommendedBandwidth(); } Done(ctx, msg->GetError()); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.h b/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.h index 8cfed03aa9a..6094ee2bf23 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/direct_copy_range.h @@ -36,6 +36,7 @@ class TDirectCopyRangeActor final TInstant StartTs; TDuration ReadDuration; TDuration WriteDuration; + ui64 RecommendedBandwidth = 0; bool AllZeroes = false; TDeviceInfoResponse SourceInfo; diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.cpp index dedd108e59a..0ca38fd8a8e 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.cpp @@ -23,6 +23,12 @@ TMigrationTimeoutCalculator::TMigrationTimeoutCalculator( TDuration TMigrationTimeoutCalculator::CalculateTimeout( TBlockRange64 nextProcessingRange) const { + if (RecommendedBandwidth) { + auto rangesPerSecond = + static_cast(RecommendedBandwidth) / ProcessingRangeSize; + return TDuration::Seconds(1) / Max(rangesPerSecond, 1.0); + } + // migration range is 4_MB const double processingRangeSizeMiBs = static_cast(ProcessingRangeSize) / (1024 * 1024); @@ -80,4 +86,9 @@ void TMigrationTimeoutCalculator::HandleUpdateBandwidthLimit( LimitedBandwidthMiBs = msg->LimitedBandwidthMiBs; } +void TMigrationTimeoutCalculator::SetRecommendedBandwidth(ui64 bandwidth) +{ + RecommendedBandwidth = bandwidth; +} + } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.h b/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.h index bb1aaf983dd..06aef1a5170 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator.h @@ -19,6 +19,7 @@ class TMigrationTimeoutCalculator const ui32 ExpectedDiskAgentSize = 0; TNonreplicatedPartitionConfigPtr PartitionConfig; ui32 LimitedBandwidthMiBs = 0; + ui64 RecommendedBandwidth = 0; public: TMigrationTimeoutCalculator( @@ -35,6 +36,8 @@ class TMigrationTimeoutCalculator const TEvStatsServicePrivate:: TEvRegisterTrafficSourceResponse::TPtr& ev, const NActors::TActorContext& ctx); + + void SetRecommendedBandwidth(ui64 bandwidth); }; } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator_ut.cpp index 17d0c7b5aa6..e69d3753460 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/migration_timeout_calculator_ut.cpp @@ -181,6 +181,7 @@ TNonreplicatedPartitionConfigPtr MakePartitionConfig( NActors::TActorId(), false, // muteIOErrors THashSet(), // freshDeviceIds + THashSet(), // laggingDeviceIds TDuration::Zero(), // maxTimedOutDeviceStateDuration false, // maxTimedOutDeviceStateDurationOverridden useSimpleMigrationBandwidthLimiter); @@ -255,6 +256,34 @@ Y_UNIT_TEST_SUITE(TMigrationCalculatorTest) TBlockRange64::WithLength(1024 * 3, 1024))); } + Y_UNIT_TEST(ShouldCalculateMigrationTimeoutWithRecommendedBandwidth) + { + TMigrationTimeoutCalculator timeoutCalculator( + 16, + 100500, + MakePartitionConfig(MakeDevices(), true)); + + // Old-fashion timeout calculation + UNIT_ASSERT_VALUES_EQUAL( + TDuration::Seconds(1) / 4, + timeoutCalculator.CalculateTimeout( + TBlockRange64::WithLength(1024 * 0, 1024))); + + // Calculate timeout with recommended bandwidth + timeoutCalculator.SetRecommendedBandwidth(40_MB); + UNIT_ASSERT_VALUES_EQUAL( + TDuration::Seconds(1) / 10, + timeoutCalculator.CalculateTimeout( + TBlockRange64::WithLength(1024 * 0, 1024))); + + // Reset recommendation and do old-fashion timeout calculation + timeoutCalculator.SetRecommendedBandwidth(0); + UNIT_ASSERT_VALUES_EQUAL( + TDuration::Seconds(1) / 4, + timeoutCalculator.CalculateTimeout( + TBlockRange64::WithLength(1024 * 0, 1024))); + } + Y_UNIT_TEST(ShouldRegisterTrafficSourceWithSimpleLimiter) { TMyTestEnv testEnv; diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp index f120d037ae4..1931a9da47f 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.cpp @@ -208,6 +208,11 @@ void TMirrorPartitionActor::CompareChecksums(const TActorContext& ctx) DiskId.c_str(), DescribeRange(GetScrubbingRange()).c_str()); + if (Config->GetAutomaticallyEnableBufferCopyingAfterChecksumMismatch()) + { + AddTagForBufferCopying(ctx); + } + for (size_t i = 0; i < checksums.size(); i++) { LOG_ERROR( ctx, @@ -275,6 +280,22 @@ void TMirrorPartitionActor::StartResyncRange( BlockDigestGenerator); } +void TMirrorPartitionActor::AddTagForBufferCopying( + const NActors::TActorContext& ctx) +{ + auto requestInfo = CreateRequestInfo( + SelfId(), + 0, // cookie + MakeIntrusive()); + + TVector tags({TString(IntermediateWriteBufferTagName)}); + auto request = std::make_unique( + DiskId, + std::move(tags)); + + ctx.Send(MakeStorageServiceId(), std::move(request)); +} + void TMirrorPartitionActor::ReplyAndDie(const TActorContext& ctx) { NCloud::Reply(ctx, *Poisoner, std::make_unique()); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h index 60106832fcd..8679a972908 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_actor.h @@ -116,6 +116,7 @@ class TMirrorPartitionActor final const NActors::TActorContext& ctx, ui64 scrubbingRangeId); void StartResyncRange(const NActors::TActorContext& ctx); + void AddTagForBufferCopying(const NActors::TActorContext& ctx); private: STFUNC(StateWork); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_ut.cpp index 7c2e0d2561c..90e0780a0ea 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_resync_ut.cpp @@ -267,11 +267,12 @@ struct TTestEnv // only SSD/HDD distinction matters NProto::STORAGE_MEDIA_SSD_MIRROR3}, VolumeActorId, - false, // muteIOErrors + false, // muteIOErrors std::move(freshDeviceIds), - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - true // useSimpleMigrationBandwidthLimiter + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + true // useSimpleMigrationBandwidthLimiter ); for (auto& replica: Replicas) { diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_state_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_state_ut.cpp index 574bd18e914..2f9610b70cb 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_state_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_state_ut.cpp @@ -60,11 +60,12 @@ struct TEnv 4_KB, volumeInfo, NActors::TActorId(), - false, // muteIOErrors + false, // muteIOErrors FreshDeviceIds, - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - true // useSimpleMigrationBandwidthLimiter + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + true // useSimpleMigrationBandwidthLimiter ); { diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_ut.cpp index ad9e6c31f5f..1e965ebe875 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_mirror_ut.cpp @@ -201,11 +201,12 @@ struct TTestEnv // only SSD/HDD distinction matters NProto::STORAGE_MEDIA_SSD_MIRROR3}, VolumeActorId, - false, // muteIOErrors + false, // muteIOErrors std::move(freshDeviceIds), - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - true // useSimpleMigrationBandwidthLimiter + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + true // useSimpleMigrationBandwidthLimiter ); for (auto& replica: replicas) { @@ -259,6 +260,10 @@ struct TTestEnv ) ); + Runtime.AddLocalService( + MakeStorageServiceId(), + TActorSetupCmd(new TStorageServiceMock(), TMailboxType::Simple, 0)); + NKikimr::SetupTabletServices(Runtime); } @@ -1179,7 +1184,25 @@ Y_UNIT_TEST_SUITE(TMirrorPartitionTest) TDynamicCountersPtr critEventsCounters = new TDynamicCounters(); InitCriticalEventsCounter(critEventsCounters); - TTestEnv env(runtime); + NProto::TStorageServiceConfig config; + config.SetAutomaticallyEnableBufferCopyingAfterChecksumMismatch(true); + TTestEnv env(runtime, config); + + bool tagEnabled = false; + runtime.SetEventFilter([&] (auto& runtime, auto& event) { + Y_UNUSED(runtime); + if (event->GetTypeRewrite() == TEvService::EvAddTagsRequest) + { + using TRequest = + TEvService::TEvAddTagsRequest; + const auto& tags = event->template Get()->Tags; + UNIT_ASSERT_VALUES_EQUAL(1, tags.size()); + UNIT_ASSERT_VALUES_EQUAL(IntermediateWriteBufferTagName, tags[0]); + tagEnabled = true; + } + + return false; + }); const auto range1 = TBlockRange64::WithLength(0, 2); env.WriteMirror(range1, 'A'); @@ -1200,6 +1223,7 @@ Y_UNIT_TEST_SUITE(TMirrorPartitionTest) UNIT_ASSERT_VALUES_EQUAL(2, mirroredDiskMinorityChecksumMismatch->Val()); UNIT_ASSERT_VALUES_EQUAL(2, counters.Simple.ChecksumMismatches.Value); + UNIT_ASSERT(tagEnabled); const auto range3 = TBlockRange64::WithLength(1025, 50); env.WriteMirror(range3, 'A'); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h index 96f4735807b..9d81eedb642 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_events_private.h @@ -51,6 +51,7 @@ struct TEvNonreplPartitionPrivate TDuration WriteDuration; TVector AffectedBlockInfos; bool AllZeroes; + ui64 RecommendedBandwidth; EExecutionSide ExecutionSide; ui64 ExecCycles; @@ -62,6 +63,7 @@ struct TEvNonreplPartitionPrivate TInstant writeStartTs, TDuration writeDuration, TVector affectedBlockInfos, + ui64 recommendedBandwidth, bool allZeroes, ui64 execCycles) : Range(range) @@ -71,6 +73,7 @@ struct TEvNonreplPartitionPrivate , WriteDuration(writeDuration) , AffectedBlockInfos(std::move(affectedBlockInfos)) , AllZeroes(allZeroes) + , RecommendedBandwidth(recommendedBandwidth) , ExecutionSide(executionSide) , ExecCycles(execCycles) {} diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_migration.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_migration.cpp index 0ed8b62cd4e..3504cd82600 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_migration.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_common_actor_migration.cpp @@ -230,6 +230,7 @@ void TNonreplicatedPartitionMigrationCommonActor::HandleRangeMigrated( if (!IsMigrationAllowed()) { return; } + STORAGE_CHECK_PRECONDITION(TimeoutCalculator); auto* msg = ev->Get(); @@ -307,13 +308,15 @@ void TNonreplicatedPartitionMigrationCommonActor::HandleRangeMigrated( LOG_DEBUG( ctx, TBlockStoreComponents::PARTITION, - "[%s] Range %s migrated", + "[%s] Range %s migrated. Recommended bandwidth: %.2f MiB", DiskId.c_str(), - DescribeRange(msg->Range).c_str()); + DescribeRange(msg->Range).c_str(), + static_cast(msg->RecommendedBandwidth) / 1_MB); if (msg->AllZeroes) { ChangedRangesMap.MarkNotChanged(msg->Range); } + TimeoutCalculator->SetRecommendedBandwidth(msg->RecommendedBandwidth); NotifyMigrationProgressIfNeeded(ctx, msg->Range); NotifyMigrationFinishedIfNeeded(ctx); ScheduleRangeMigration(ctx); diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_ut.cpp index ea9a7f9d7cc..c38a8d29688 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_migration_ut.cpp @@ -150,7 +150,8 @@ struct TTestEnv ctx, owner, CreateRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), - std::move(record)); + std::move(record), + 1000); }; NProto::TStorageServiceConfig storageConfig; @@ -203,11 +204,12 @@ struct TTestEnv // only SSD/HDD distinction matters NProto::STORAGE_MEDIA_SSD_NONREPLICATED}, VolumeActorId, - false, // muteIOErrors - THashSet(), // freshDeviceIds - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - false + false, // muteIOErrors + THashSet(), // freshDeviceIds + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + false // useSimpleMigrationBandwidthLimiter ); auto part = std::make_unique( @@ -1005,6 +1007,76 @@ Y_UNIT_TEST_SUITE(TNonreplicatedPartitionMigrationTest) (migratedRangeCount * ProcessingBlockCount) * DefaultBlockSize, counters.WriteBlocks.RequestBytes); } + + Y_UNIT_TEST(ShouldUseRecommendedBandwidth) + { + using TEvGetDeviceForRangeRequest = + TEvNonreplPartitionPrivate::TEvGetDeviceForRangeRequest; + using TEvGetDeviceForRangeResponse = + TEvNonreplPartitionPrivate::TEvGetDeviceForRangeResponse; + using EPurpose = TEvGetDeviceForRangeRequest::EPurpose; + + TTestBasicRuntime runtime; + + auto migrationState = std::make_shared(); + migrationState->IsMigrationAllowed = false; + + TTestEnv env( + runtime, + TTestEnv::DefaultDevices(runtime.GetNodeId(0)), + TTestEnv::DefaultMigrations(runtime.GetNodeId(0)), + NProto::VOLUME_IO_OK, + false, + migrationState, + true); + TPartitionClient client(runtime, env.ActorId); + + migrationState->IsMigrationAllowed = true; + + { // Request to first device + client.SendRequest( + env.ActorId, + std::make_unique( + EPurpose::ForReading, + TBlockRange64::WithLength(2040, 8))); + auto response = client.RecvResponse(); + UNIT_ASSERT_C( + SUCCEEDED(response->GetStatus()), + response->GetErrorReason()); + UNIT_ASSERT_VALUES_EQUAL("vasya", response->Device.GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL( + TBlockRange64::WithLength(2040, 8), + response->DeviceBlockRange); + } + + WaitForMigrations(runtime, 3); + + { + // Request to second device + client.SendRequest( + env.ActorId, + std::make_unique( + EPurpose::ForWriting, + TBlockRange64::WithLength(2048, 8))); + auto response = client.RecvResponse(); + UNIT_ASSERT_C( + SUCCEEDED(response->GetStatus()), + response->GetErrorReason()); + UNIT_ASSERT_VALUES_EQUAL("petya", response->Device.GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL( + TBlockRange64::WithLength(0, 8), + response->DeviceBlockRange); + } + { // Request on the border of two devices + client.SendRequest( + env.ActorId, + std::make_unique( + EPurpose::ForReading, + TBlockRange64::WithLength(2040, 16))); + auto response = client.RecvResponse(); + UNIT_ASSERT_VALUES_EQUAL(E_ABORTED, response->Error.GetCode()); + } + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_ut.cpp index 12b1d3a8d9f..cda0d626f45 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_rdma_ut.cpp @@ -130,11 +130,12 @@ struct TTestEnv // only SSD/HDD distinction matters NProto::STORAGE_MEDIA_SSD_NONREPLICATED}, VolumeActorId, - false, // muteIOErrors - THashSet(), // freshDeviceIds - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - false // useSimpleMigrationBandwidthLimiter + false, // muteIOErrors + THashSet(), // freshDeviceIds + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + false // useSimpleMigrationBandwidthLimiter ); auto part = std::make_unique( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_ut.cpp index fe7af8eadba..963728f612e 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/part_nonrepl_ut.cpp @@ -132,10 +132,11 @@ struct TTestEnv TNonreplicatedPartitionConfig::TVolumeInfo{Now(), params.MediaKind}, VolumeActorId, params.MuteIOErrors, - THashSet(), // freshDeviceIds - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - false // useSimpleMigrationBandwidthLimiter + THashSet(), // freshDeviceIds + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + false // useSimpleMigrationBandwidthLimiter ); auto part = std::make_unique( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/resync_range_ut.cpp b/cloud/blockstore/libs/storage/partition_nonrepl/resync_range_ut.cpp index 9b144ccfd21..2ff0bc7e8a9 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/resync_range_ut.cpp +++ b/cloud/blockstore/libs/storage/partition_nonrepl/resync_range_ut.cpp @@ -135,11 +135,12 @@ struct TTestEnv // only SSD/HDD distinction matters NProto::STORAGE_MEDIA_SSD_NONREPLICATED}, VolumeActorId, - false, // muteIOErrors - THashSet(), // freshDeviceIds - TDuration::Zero(), // maxTimedOutDeviceStateDuration - false, // maxTimedOutDeviceStateDurationOverridden - true // useSimpleMigrationBandwidthLimiter + false, // muteIOErrors + THashSet(), // freshDeviceIds + THashSet(), // laggingDeviceIds + TDuration::Zero(), // maxTimedOutDeviceStateDuration + false, // maxTimedOutDeviceStateDurationOverridden + true // useSimpleMigrationBandwidthLimiter ); auto part = std::make_unique( diff --git a/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h b/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h index 1994bf4e812..09b35fdbe05 100644 --- a/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h +++ b/cloud/blockstore/libs/storage/partition_nonrepl/ut_env.h @@ -236,6 +236,47 @@ class TDummyActor final //////////////////////////////////////////////////////////////////////////////// +class TStorageServiceMock final: public NActors::TActor +{ +public: + TStorageServiceMock() + : TActor(&TThis::StateWork) + { + } + +private: + STFUNC(StateWork) + { + switch (ev->GetTypeRewrite()) { + HFunc(NActors::TEvents::TEvPoisonPill, HandlePoisonPill); + + HFunc(TEvService::TEvAddTagsRequest, HandleAddTagsRequest); + + default: + Y_ABORT("Unexpected event %x", ev->GetTypeRewrite()); + } + } + + void HandlePoisonPill( + const NActors::TEvents::TEvPoisonPill::TPtr& ev, + const NActors::TActorContext& ctx) + { + Y_UNUSED(ev); + + Die(ctx); + } + + void HandleAddTagsRequest( + const TEvService::TEvAddTagsRequest::TPtr& ev, + const NActors::TActorContext& ctx) + { + Y_UNUSED(ev); + Y_UNUSED(ctx); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + class TPartitionClient { private: diff --git a/cloud/blockstore/libs/storage/protos/disk.proto b/cloud/blockstore/libs/storage/protos/disk.proto index 8bdc86bfcd8..c1e3ea91467 100644 --- a/cloud/blockstore/libs/storage/protos/disk.proto +++ b/cloud/blockstore/libs/storage/protos/disk.proto @@ -278,6 +278,9 @@ message TDiskConfig // A log of important events in the life of this disk. repeated TDiskHistoryItem History = 17; + + // A list of devices that are lagging behind on writes. + repeated TLaggingDevice LaggingDevices = 18; } //////////////////////////////////////////////////////////////////////////////// @@ -417,6 +420,41 @@ message TAgentStats //////////////////////////////////////////////////////////////////////////////// +message TLaggingDevice +{ + // UUID of the lagging device. + string DeviceUUID = 1; + + // Index of the lagging device in the replica. + uint32 RowIndex = 2; +} + +//////////////////////////////////////////////////////////////////////////////// + +message TLaggingAgent +{ + // Agent id. + string AgentId = 1; + + // Index of the mirror disk replica. + // 0 - main devices + // 1,2 - replica devices + uint32 ReplicaIndex = 2; + + // A list of devices that belong to the agent. + repeated TLaggingDevice Devices = 3; +} + +//////////////////////////////////////////////////////////////////////////////// + +message TLaggingAgentsInfo +{ + // A list of agents that lagging behind on writes. + repeated TLaggingAgent Agents = 1; +} + +//////////////////////////////////////////////////////////////////////////////// + message TDiskRegistryAgentListRequestParams { repeated string AgentIds = 1; @@ -635,6 +673,9 @@ message TAllocateDiskResponse // New devices used instead of recently replaced ones. repeated string DeviceReplacementUUIDs = 8; + + // Devices that had been lagging. + repeated TLaggingDevice RemovedLaggingDevices = 9; } //////////////////////////////////////////////////////////////////////////////// @@ -983,6 +1024,9 @@ message TDirectCopyBlocksResponse // The time spent writing the data (in microseconds). uint64 WriteDuration = 4; + + // The disk agent's recommended bandwidth for copying blocks. + uint64 RecommendedBandwidth = 5; } //////////////////////////////////////////////////////////////////////////////// @@ -1662,6 +1706,27 @@ message TGetAgentNodeIdResponse bool Connected = 4; } +//////////////////////////////////////////////////////////////////////////////// +// Report that some of the devices were lagging. + +message TAddLaggingDevicesRequest +{ + // Optional request headers. + THeaders Headers = 1; + + // Disk identifier to perform operations on. + string DiskId = 2; + + // Devices that has been lagging. + repeated TLaggingDevice LaggingDevices = 3; +} + +message TAddLaggingDevicesResponse +{ + // Optional error, set only if error happened. + NCloud.NProto.TError Error = 1; +} + //////////////////////////////////////////////////////////////////////////////// // Get dependent disks diff --git a/cloud/blockstore/libs/storage/protos/volume.proto b/cloud/blockstore/libs/storage/protos/volume.proto index dfae6356a6a..5b98af3f8c1 100644 --- a/cloud/blockstore/libs/storage/protos/volume.proto +++ b/cloud/blockstore/libs/storage/protos/volume.proto @@ -652,3 +652,24 @@ message TGetStorageConfigResponse // Result Storage config. NProto.TStorageServiceConfig StorageConfig = 3; } + +//////////////////////////////////////////////////////////////////////////////// +// GracefulShutdown request/response. + +message TGracefulShutdownRequest +{ + // Optional request headers. + THeaders Headers = 1; + + // Label of volume to shutdown. + string DiskId = 2; +} + +message TGracefulShutdownResponse +{ + // Optional error, set only if error happened. + NCloud.NProto.TError Error = 1; + + // Request traces. + NCloud.NProto.TTraceInfo Trace = 2; +} diff --git a/cloud/blockstore/libs/storage/protos_ydb/volume.proto b/cloud/blockstore/libs/storage/protos_ydb/volume.proto index 3d784a3006a..90df08528f3 100644 --- a/cloud/blockstore/libs/storage/protos_ydb/volume.proto +++ b/cloud/blockstore/libs/storage/protos_ydb/volume.proto @@ -67,6 +67,12 @@ message TVolumeMeta // We don't allow clients with old sequential number to mount disk for read/write // in order to prevent data corruption during disk filling. uint64 FillSeqNumber = 16; + + // A list of agents that lagging behind on writes. Used only for mirror + // disks. An agent can exit this state if it starts to respond to requests, + // or if the volume has been restarted/reallocated (i.e. the partition has + // restarted), or if the DR has replaced all lagging devices. + TLaggingAgentsInfo LaggingAgentsInfo = 17; } //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/blockstore/libs/storage/service/service_actor_actions_modify_tags.cpp b/cloud/blockstore/libs/storage/service/service_actor_actions_modify_tags.cpp index 242d38f162a..46f652a072c 100644 --- a/cloud/blockstore/libs/storage/service/service_actor_actions_modify_tags.cpp +++ b/cloud/blockstore/libs/storage/service/service_actor_actions_modify_tags.cpp @@ -352,6 +352,35 @@ STFUNC(TModifyTagsActionActor::StateWaitReady) //////////////////////////////////////////////////////////////////////////////// +void TServiceActor::HandleAddTags( + const TEvService::TEvAddTagsRequest::TPtr& ev, + const NActors::TActorContext& ctx) +{ + auto* msg = ev->Get(); + + auto requestInfo = CreateRequestInfo( + SelfId(), + 0, // cookie + MakeIntrusive()); + + NPrivateProto::TModifyTagsRequest modifyTagsRequest; + modifyTagsRequest.SetDiskId(msg->DiskId); + for (const auto& tag: msg->Tags) { + modifyTagsRequest.AddTagsToAdd(tag); + } + + TString input; + google::protobuf::util::MessageToJsonString(modifyTagsRequest, &input); + + NCloud::Register( + ctx, + std::make_unique( + std::move(requestInfo), + std::move(input))); +} + +//////////////////////////////////////////////////////////////////////////////// + TResultOrError TServiceActor::CreateModifyTagsActionActor( TRequestInfoPtr requestInfo, TString input) diff --git a/cloud/blockstore/libs/storage/service/service_actor_destroy.cpp b/cloud/blockstore/libs/storage/service/service_actor_destroy.cpp index b78674d6e2f..1244a527c03 100644 --- a/cloud/blockstore/libs/storage/service/service_actor_destroy.cpp +++ b/cloud/blockstore/libs/storage/service/service_actor_destroy.cpp @@ -34,6 +34,7 @@ class TDestroyVolumeActor final const bool DestroyIfBroken; const bool Sync; const ui64 FillGeneration; + const TDuration Timeout; bool IsDiskRegistryBased = false; bool VolumeNotFoundInSS = false; @@ -47,7 +48,8 @@ class TDestroyVolumeActor final TString diskId, bool destroyIfBroken, bool sync, - ui64 fillGeneration); + ui64 fillGeneration, + TDuration timeout); void Bootstrap(const TActorContext& ctx); @@ -57,6 +59,7 @@ class TDestroyVolumeActor final void NotifyDiskRegistry(const TActorContext& ctx); void StatVolume(const TActorContext& ctx); void DeallocateDisk(const TActorContext& ctx); + void GracefulShutdown(const TActorContext& ctx); NProto::TError CheckIfDestructionIsAllowed() const; void HandleModifyResponse( @@ -79,6 +82,15 @@ class TDestroyVolumeActor final const TEvDiskRegistry::TEvDeallocateDiskResponse::TPtr& ev, const TActorContext& ctx); + void HandleGracefulShutdownResponse( + const TEvVolume::TEvGracefulShutdownResponse::TPtr& + ev, + const TActorContext& ctx); + + void HandleTimeout( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx); + void ReplyAndDie(const TActorContext& ctx, NProto::TError error); private: @@ -95,7 +107,8 @@ TDestroyVolumeActor::TDestroyVolumeActor( TString diskId, bool destroyIfBroken, bool sync, - ui64 fillGeneration) + ui64 fillGeneration, + TDuration timeout) : Sender(sender) , Cookie(cookie) , AttachedDiskDestructionTimeout(attachedDiskDestructionTimeout) @@ -105,10 +118,12 @@ TDestroyVolumeActor::TDestroyVolumeActor( , DestroyIfBroken(destroyIfBroken) , Sync(sync) , FillGeneration(fillGeneration) + , Timeout(timeout) {} void TDestroyVolumeActor::Bootstrap(const TActorContext& ctx) { + ctx.Schedule(Timeout, new TEvents::TEvWakeup()); if (DestroyIfBroken) { WaitReady(ctx); } else { @@ -180,6 +195,13 @@ void TDestroyVolumeActor::DeallocateDisk(const TActorContext& ctx) NCloud::Send(ctx, MakeDiskRegistryProxyServiceId(), std::move(request)); } +void TDestroyVolumeActor::GracefulShutdown(const TActorContext& ctx) +{ + auto request = std::make_unique(); + request->Record.SetDiskId(DiskId); + NCloud::Send(ctx, MakeVolumeProxyServiceId(), std::move(request)); +} + NProto::TError TDestroyVolumeActor::CheckIfDestructionIsAllowed() const { const auto& prefixes = DestructionAllowedOnlyForDisksWithIdPrefixes; @@ -270,9 +292,16 @@ void TDestroyVolumeActor::HandleMarkDiskForCleanupResponse( // disk is broken and will be removed by DR at some point if (error.GetCode() == E_NOT_FOUND) { - LOG_INFO(ctx, TBlockStoreComponents::SERVICE, - "volume %s not found in registry", DiskId.Quote().data()); - } else if (HasError(error)) { + LOG_INFO( + ctx, + TBlockStoreComponents::SERVICE, + "volume %s not found in registry", + DiskId.Quote().data()); + DestroyVolume(ctx); + return; + } + + if (HasError(error)) { LOG_ERROR(ctx, TBlockStoreComponents::SERVICE, "Volume %s: unable to notify DR about disk destruction: %s", DiskId.Quote().data(), @@ -282,7 +311,7 @@ void TDestroyVolumeActor::HandleMarkDiskForCleanupResponse( return; } - DestroyVolume(ctx); + GracefulShutdown(ctx); } void TDestroyVolumeActor::HandleDeallocateDiskResponse( @@ -383,6 +412,45 @@ void TDestroyVolumeActor::HandleStatVolumeResponse( } } +void TDestroyVolumeActor::HandleGracefulShutdownResponse( + const TEvVolume::TEvGracefulShutdownResponse::TPtr& ev, + const TActorContext& ctx) +{ + const auto* msg = ev->Get(); + + if (auto error = msg->GetError(); HasError(error)) { + LOG_ERROR( + ctx, + TBlockStoreComponents::SERVICE, + "Volume %s: unable to gracefully stop volume: %s", + DiskId.Quote().data(), + FormatError(error).data()); + + ReplyAndDie(ctx, std::move(error)); + return; + } + + DestroyVolume(ctx); +} + +void TDestroyVolumeActor::HandleTimeout( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + + LOG_ERROR( + ctx, + TBlockStoreComponents::SERVICE, + "Timeout destroy volume request, diskId = %s, destroyIfBroken = %d, " + "sync = %d", + DiskId.c_str(), + DestroyIfBroken, + Sync); + + ReplyAndDie(ctx, MakeError(E_TIMEOUT, "Timeout")); +} + void TDestroyVolumeActor::ReplyAndDie( const TActorContext& ctx, NProto::TError error) @@ -412,6 +480,12 @@ STFUNC(TDestroyVolumeActor::StateWork) TEvService::TEvStatVolumeResponse, HandleStatVolumeResponse); + HFunc( + TEvVolume::TEvGracefulShutdownResponse, + HandleGracefulShutdownResponse); + + HFunc(TEvents::TEvWakeup, HandleTimeout); + default: HandleUnexpectedEvent(ev, TBlockStoreComponents::SERVICE); break; @@ -449,7 +523,8 @@ void TServiceActor::HandleDestroyVolume( diskId, destroyIfBroken, sync, - fillGeneration); + fillGeneration, + Config->GetDestroyVolumeTimeout()); } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/testlib/disk_agent_mock.h b/cloud/blockstore/libs/storage/testlib/disk_agent_mock.h index 35c4fd724cf..8b9ee4d1edd 100644 --- a/cloud/blockstore/libs/storage/testlib/disk_agent_mock.h +++ b/cloud/blockstore/libs/storage/testlib/disk_agent_mock.h @@ -100,6 +100,8 @@ class TDiskAgentMock final HFunc(TEvDiskAgent::TEvZeroDeviceBlocksRequest, HandleZeroDeviceBlocks); HFunc(TEvDiskAgent::TEvChecksumDeviceBlocksRequest, HandleChecksumDeviceBlocks); HFunc(TEvDiskAgent::TEvDirectCopyBlocksRequest, HandleDirectCopyBlocks); + HFunc(TEvDiskAgent::TEvAcquireDevicesRequest, HandleAcquireDevicesRequest); + HFunc(TEvDiskAgent::TEvReleaseDevicesRequest, HandleReleaseDevicesRequest); default: Y_ABORT("Unexpected event %x", ev->GetTypeRewrite()); @@ -315,6 +317,26 @@ class TDiskAgentMock final { State->CreateDirectCopyActorFunc(ev, ctx, SelfId()); } + + void HandleAcquireDevicesRequest( + TEvDiskAgent::TEvAcquireDevicesRequest::TPtr& ev, + const NActors::TActorContext& ctx) + { + auto response = + std::make_unique(); + + Reply(ctx, *ev, std::move(response)); + } + + void HandleReleaseDevicesRequest( + TEvDiskAgent::TEvReleaseDevicesRequest::TPtr& ev, + const NActors::TActorContext& ctx) + { + auto response = + std::make_unique(); + + Reply(ctx, *ev, std::move(response)); + } }; } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/testlib/disk_registry_proxy_mock.h b/cloud/blockstore/libs/storage/testlib/disk_registry_proxy_mock.h index 2e79fe56936..83cb59c62b3 100644 --- a/cloud/blockstore/libs/storage/testlib/disk_registry_proxy_mock.h +++ b/cloud/blockstore/libs/storage/testlib/disk_registry_proxy_mock.h @@ -34,10 +34,12 @@ class TDiskRegistryProxyMock final TDiskRegistryStatePtr State; public: - TDiskRegistryProxyMock(TDiskRegistryStatePtr state) + explicit TDiskRegistryProxyMock(TDiskRegistryStatePtr state) : TActor(&TThis::StateWork) , State(std::move(state)) - {} + { + State->DeviceIsAllocated.resize(State->Devices.size(), false); + } private: STFUNC(StateWork) @@ -136,6 +138,10 @@ class TDiskRegistryProxyMock final TEvDiskRegistry::TEvDeallocateCheckpointRequest, HandleDeallocateCheckpoint); + HFunc( + TEvDiskRegistry::TEvAddLaggingDevicesRequest, + HandleAddLaggingDevices); + HFunc( TEvService::TEvCmsActionRequest, HandleCmsAction); @@ -144,6 +150,7 @@ class TDiskRegistryProxyMock final TEvDiskRegistryProxy::TEvGetDrTabletInfoRequest, HandleGetDrTabletInfo); + IgnoreFunc(NKikimr::TEvLocal::TEvTabletMetrics); default: @@ -184,6 +191,26 @@ class TDiskRegistryProxyMock final } } + const NProto::TDeviceConfig* AllocateNextDevice(i32 prevNodeId) + { + for (int i = 0; i < State->Devices.ysize(); i++) { + if (State->DeviceIsAllocated[i]) { + continue; + } + + if (State->AllocateDiskReplicasOnDifferentNodes && + static_cast(State->Devices[i].GetNodeId()) <= prevNodeId) + { + continue; + } + + State->DeviceIsAllocated[i] = true; + return &State->Devices[i]; + } + + return nullptr; + } + void HandleAllocateDisk( const TEvDiskRegistry::TEvAllocateDiskRequest::TPtr& ev, const NActors::TActorContext& ctx) @@ -217,6 +244,7 @@ class TDiskRegistryProxyMock final disk.PoolName = msg->Record.GetPoolName(); disk.MediaKind = msg->Record.GetStorageMediaKind(); + disk.Replicas.resize(State->ReplicaCount); disk.Migrations.clear(); ui64 bytes = (1 + State->ReplicaCount) * msg->Record.GetBlocksCount() @@ -225,49 +253,61 @@ class TDiskRegistryProxyMock final ui32 i = 0; while (bytes) { ui64 deviceBytes = 0; + i32 prevNodeId = -1; if (i < disk.Devices.size()) { - deviceBytes = Min(bytes, disk.Devices[i].GetBlocksCount() - * disk.Devices[i].GetBlockSize()); + deviceBytes = + Min(bytes, + disk.Devices[i].GetBlocksCount() * + disk.Devices[i].GetBlockSize()); } else { - if (State->NextDeviceIdx >= State->Devices.size()) { + const auto* device = AllocateNextDevice(prevNodeId); + if (!device) { break; } - disk.Devices.push_back( - State->Devices[State->NextDeviceIdx++]); - const auto& device = disk.Devices.back(); - deviceBytes = device.GetBlocksCount() * device.GetBlockSize(); + disk.Devices.push_back(*device); + deviceBytes = device->GetBlocksCount() * device->GetBlockSize(); + prevNodeId = static_cast(device->GetNodeId()); } - disk.Replicas.resize(State->ReplicaCount); - for (auto& replica: disk.Replicas) { if (i < replica.size()) { - deviceBytes += Min(bytes, replica[i].GetBlocksCount() - * replica[i].GetBlockSize()); + deviceBytes += + Min(bytes, + replica[i].GetBlocksCount() * + replica[i].GetBlockSize()); } else { - if (State->NextDeviceIdx >= State->Devices.size()) { + const auto* device = AllocateNextDevice(prevNodeId); + if (!device) { break; } - replica.push_back( - State->Devices[State->NextDeviceIdx++]); - const auto& device = replica.back(); + replica.push_back(*device); deviceBytes += - device.GetBlocksCount() * device.GetBlockSize(); + device->GetBlocksCount() * device->GetBlockSize(); + prevNodeId = static_cast(device->GetNodeId()); } } if (State->MigrationMode != EMigrationMode::Disabled) { - auto& device = disk.Devices[i]; - auto* mdevice = State->MigrationDevices.FindPtr(device.GetDeviceUUID()); - if (mdevice) { - if (State->MigrationMode == EMigrationMode::InProgress) { - disk.Migrations[device.GetDeviceUUID()] = *mdevice; - } else { - UNIT_ASSERT(State->MigrationMode == EMigrationMode::Finish); - device = *mdevice; + auto initMigration = [&](NProto::TDeviceConfig& device) + { + auto* mdevice = + State->MigrationDevices.FindPtr(device.GetDeviceUUID()); + if (mdevice) { + if (State->MigrationMode == EMigrationMode::InProgress) + { + disk.Migrations[device.GetDeviceUUID()] = *mdevice; + } else { + UNIT_ASSERT( + State->MigrationMode == EMigrationMode::Finish); + device = *mdevice; + } } + }; + initMigration(disk.Devices[i]); + for (auto& replica: disk.Replicas) { + initMigration(replica[i]); } } @@ -281,6 +321,11 @@ class TDiskRegistryProxyMock final *response->Record.AddDeviceReplacementUUIDs() = deviceId; } + for (const auto& laggingDevice: disk.LaggingDevices) { + *response->Record.AddRemovedLaggingDevices() = laggingDevice; + } + disk.LaggingDevices.clear(); + if (bytes) { response->Record.MutableError()->CopyFrom( MakeError(E_BS_OUT_OF_SPACE, "not enough available devices") @@ -964,6 +1009,32 @@ class TDiskRegistryProxyMock final TEvDiskRegistry::TEvDeallocateCheckpointResponse>()); } + void HandleAddLaggingDevices( + const TEvDiskRegistry::TEvAddLaggingDevicesRequest::TPtr& ev, + const NActors::TActorContext& ctx) + { + const auto* msg = ev->Get(); + const auto& diskId = msg->Record.GetDiskId(); + auto* diskState = State->Disks.FindPtr(diskId); + if (!diskState) { + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError(E_NOT_FOUND, "Disk not found"))); + return; + } + + for (const auto& laggingDevice: msg->Record.GetLaggingDevices()) { + diskState->LaggingDevices.push_back(laggingDevice); + } + + NCloud::Reply( + ctx, + *ev, + std::make_unique()); + } + void HandleCmsAction( const TEvService::TEvCmsActionRequest::TPtr& ev, const NActors::TActorContext& ctx) diff --git a/cloud/blockstore/libs/storage/testlib/test_env_state.h b/cloud/blockstore/libs/storage/testlib/test_env_state.h index 52b976859d1..250e453e62a 100644 --- a/cloud/blockstore/libs/storage/testlib/test_env_state.h +++ b/cloud/blockstore/libs/storage/testlib/test_env_state.h @@ -32,6 +32,7 @@ struct TDiskRegistryState: TAtomicRefCount TInstant IOModeTs; TMap Migrations; TVector> Replicas; + TVector LaggingDevices; bool MuteIOErrors = false; TString PoolName; NCloud::NProto::EStorageMediaKind MediaKind = {}; @@ -51,8 +52,8 @@ struct TDiskRegistryState: TAtomicRefCount TSet DisksMarkedForCleanup; TMap PlacementGroups; TVector Devices; + TVector DeviceIsAllocated; TMap MigrationDevices; - ui32 NextDeviceIdx = 0; ui32 CurrentErrorCode = S_OK; EMigrationMode MigrationMode = EMigrationMode::Disabled; ui32 ReplicaCount = 0; @@ -61,6 +62,7 @@ struct TDiskRegistryState: TAtomicRefCount TVector> AgentStates; + bool AllocateDiskReplicasOnDifferentNodes = false; bool WritableState = false; }; diff --git a/cloud/blockstore/libs/storage/volume/actors/shadow_disk_actor.cpp b/cloud/blockstore/libs/storage/volume/actors/shadow_disk_actor.cpp index c546e44e47f..5e151823c58 100644 --- a/cloud/blockstore/libs/storage/volume/actors/shadow_disk_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/actors/shadow_disk_actor.cpp @@ -620,6 +620,8 @@ bool TShadowDiskActor::OnMessage( TEvService::TEvReadBlocksLocalRequest, HandleReadBlocks); + IgnoreFunc(TEvVolumePrivate::TEvDeviceTimeoutedRequest); + // Write/zero request. case TEvService::TEvWriteBlocksRequest::EventType: { return HandleWriteZeroBlocks( @@ -775,7 +777,9 @@ void TShadowDiskActor::HandleShadowDiskAcquired( } if (HasError(msg->Error)) { - if (acquireReason != EAcquireReason::PeriodicalReAcquire) { + if (msg->Error.GetCode() == E_NOT_FOUND || + acquireReason != EAcquireReason::PeriodicalReAcquire) + { SetErrorState(ctx); } return; @@ -804,6 +808,7 @@ void TShadowDiskActor::CreateShadowDiskConfig() SelfId(), // need to handle TEvRdmaUnavailable, TEvReacquireDisk true, // muteIOErrors THashSet(), // freshDeviceIds + THashSet(), // laggingDeviceIds TDuration(), // maxTimedOutDeviceStateDuration false, // maxTimedOutDeviceStateDurationOverridden true // useSimpleMigrationBandwidthLimiter diff --git a/cloud/blockstore/libs/storage/volume/model/helpers.cpp b/cloud/blockstore/libs/storage/volume/model/helpers.cpp new file mode 100644 index 00000000000..1070bf0bb59 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/model/helpers.cpp @@ -0,0 +1,279 @@ +#include "helpers.h" + +#include +#include + +#include +#include +#include +#include + +namespace NCloud::NBlockStore::NStorage { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +using google::protobuf::RepeatedPtrField; + +struct TLaggingDeviceIndexCmp +{ + bool operator()( + const NProto::TLaggingDevice& lhs, + const NProto::TLaggingDevice& rhs) const + { + return lhs.GetRowIndex() < rhs.GetRowIndex(); + } +}; + +struct TDeviceLocation +{ + ui32 RowIndex = 0; + ui32 ReplicaIndex = 0; + std::optional MigrationIndex; +}; + +const RepeatedPtrField& GetReplicaDevices( + const NProto::TVolumeMeta& meta, + ui32 index) +{ + if (index == 0) { + return meta.GetDevices(); + } + index--; + return meta.GetReplicas(index).GetDevices(); +} + +const NProto::TDeviceConfig& GetDeviceConfig( + const NProto::TVolumeMeta& meta, + TDeviceLocation deviceLocation) +{ + if (!deviceLocation.MigrationIndex) { + return GetReplicaDevices( + meta, + deviceLocation.ReplicaIndex)[deviceLocation.RowIndex]; + } + return meta.GetMigrations(*deviceLocation.MigrationIndex).GetTargetDevice(); +} + +std::optional FindDeviceLocation( + const NProto::TVolumeMeta& meta, + TStringBuf deviceUUID) +{ + auto deviceMatcher = [&deviceUUID](const auto& device) + { + return device.GetDeviceUUID() == deviceUUID; + }; + + for (size_t i = 0; i <= meta.ReplicasSize(); i++) { + const auto& devices = GetReplicaDevices(meta, i); + const auto index = FindIndexIf(devices, deviceMatcher); + if (index != NPOS) { + return TDeviceLocation{ + .RowIndex = static_cast(index), + .ReplicaIndex = static_cast(i)}; + } + } + + for (size_t i = 0; i < meta.MigrationsSize(); i++) { + const auto& migration = meta.GetMigrations(i); + if (!deviceMatcher(migration.GetTargetDevice())) { + continue; + } + auto sourceLocation = + FindDeviceLocation(meta, migration.GetSourceDeviceId()); + if (!sourceLocation) { + ReportDiskAllocationFailure( + TStringBuilder() + << "Migration source device " << migration.GetSourceDeviceId() + << " doesn't belong to the disk " + << meta.GetConfig().GetDiskId() << ". Target device: " + << migration.GetTargetDevice().GetDeviceUUID()); + continue; + } + sourceLocation->MigrationIndex = i; + return sourceLocation; + } + + return std::nullopt; +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +const NProto::TDeviceConfig* FindDeviceConfig( + const NProto::TVolumeMeta& meta, + TStringBuf deviceUUID) +{ + auto deviceLocation = FindDeviceLocation(meta, deviceUUID); + if (!deviceLocation) { + return nullptr; + } + return &GetDeviceConfig(meta, *deviceLocation); +} + +std::optional FindReplicaIndexByAgentId( + const NProto::TVolumeMeta& meta, + TStringBuf agentId) +{ + const auto deviceMatcher = [agentId](const NProto::TDeviceConfig& device) + { + return device.GetAgentId() == agentId; + }; + + for (size_t i = 0; i <= meta.ReplicasSize(); i++) { + const auto& devices = GetReplicaDevices(meta, i); + if (AnyOf(devices, deviceMatcher)) { + return i; + } + } + + for (const auto& migration: meta.GetMigrations()) { + if (deviceMatcher(migration.GetTargetDevice())) { + auto deviceLocation = + FindDeviceLocation(meta, migration.GetSourceDeviceId()); + if (deviceLocation) { + return deviceLocation->ReplicaIndex; + } + } + } + + return std::nullopt; +} + +TVector CollectLaggingDevices( + const NProto::TVolumeMeta& meta, + ui32 replicaIndex, + TStringBuf agentId) +{ + const auto deviceMatcher = [agentId](const NProto::TDeviceConfig& device) + { + return device.GetAgentId() == agentId; + }; + + TVector result; + const auto replicaDevices = GetReplicaDevices(meta, replicaIndex); + for (int i = 0; i < replicaDevices.size(); i++) { + const auto& device = replicaDevices[i]; + if (deviceMatcher(device)) { + auto& laggingDevice = result.emplace_back(); + laggingDevice.SetRowIndex(i); + laggingDevice.SetDeviceUUID(device.GetDeviceUUID()); + } + } + + for (const auto& migration: meta.GetMigrations()) { + const auto& targetDevice = migration.GetTargetDevice(); + if (deviceMatcher(targetDevice)) { + auto deviceLocation = + FindDeviceLocation(meta, migration.GetSourceDeviceId()); + if (!deviceLocation) { + ReportDiskAllocationFailure( + TStringBuilder() + << "Migration source device " + << migration.GetSourceDeviceId() + << " doesn't belong to the disk " + << meta.GetConfig().GetDiskId() + << ". Target device: " << targetDevice.GetDeviceUUID()); + continue; + } + auto& laggingDevice = result.emplace_back(); + laggingDevice.SetRowIndex(deviceLocation->RowIndex); + laggingDevice.SetDeviceUUID(targetDevice.GetDeviceUUID()); + } + } + + Sort(result, TLaggingDeviceIndexCmp()); + return result; +} + +bool HaveCommonRows( + const TVector& laggingCandidates, + const RepeatedPtrField& alreadyLagging) +{ + TLaggingDeviceIndexCmp cmp; + + Y_ABORT_UNLESS( + IsSorted(laggingCandidates.begin(), laggingCandidates.end(), cmp)); + Y_ABORT_UNLESS(IsSorted(alreadyLagging.begin(), alreadyLagging.end(), cmp)); + + for (int i = 0, j = 0; + i < laggingCandidates.ysize() && j < alreadyLagging.size();) + { + if (cmp(laggingCandidates[i], alreadyLagging[j])) { + i++; + } else if (cmp(alreadyLagging[j], laggingCandidates[i])) { + j++; + } else { + return true; + } + } + + return false; +} + +bool RowHasFreshDevices( + const NProto::TVolumeMeta& meta, + ui32 rowIndex, + ui32 timeoutedDeviceReplicaIndex) +{ + for (size_t i = 0; i <= meta.ReplicasSize(); i++) { + if (i == timeoutedDeviceReplicaIndex) { + continue; + } + const auto& devices = GetReplicaDevices(meta, i); + if (FindPtr( + meta.GetFreshDeviceIds(), + devices[rowIndex].GetDeviceUUID())) + { + return true; + } + } + return false; +} + +void UpdateLaggingDevicesAfterMetaUpdate( + NProto::TVolumeMeta& meta, + const TVector& removedLaggingDeviceIds) +{ + auto& laggingAgents = *meta.MutableLaggingAgentsInfo()->MutableAgents(); + for (auto& agent: laggingAgents) { + const bool laggingDevicesWereRemoved = AllOf( + agent.GetDevices(), + [&removedLaggingDeviceIds]( + const NProto::TLaggingDevice& laggingDevice) + { + return !!FindPtr( + removedLaggingDeviceIds, + laggingDevice.GetDeviceUUID()); + }); + + agent.ClearDevices(); + if (laggingDevicesWereRemoved) { + continue; + } + + auto replicaIndex = FindReplicaIndexByAgentId(meta, agent.GetAgentId()); + if (!replicaIndex) { + continue; + } + TVector updatedLaggingDevices = + CollectLaggingDevices(meta, *replicaIndex, agent.GetAgentId()); + + Y_ABORT_UNLESS(*replicaIndex == agent.GetReplicaIndex()); + Y_DEBUG_ABORT_UNLESS(!updatedLaggingDevices.empty()); + for (auto& laggingDevice: updatedLaggingDevices) { + *agent.AddDevices() = std::move(laggingDevice); + } + } + + EraseIf( + laggingAgents, + [](const NProto::TLaggingAgent& laggingAgent) + { return laggingAgent.GetDevices().empty(); }); + if (laggingAgents.empty()) { + meta.MutableLaggingAgentsInfo()->Clear(); + } +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/model/helpers.h b/cloud/blockstore/libs/storage/volume/model/helpers.h new file mode 100644 index 00000000000..76e44879fdc --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/model/helpers.h @@ -0,0 +1,37 @@ +#pragma once + +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +[[nodiscard]] const NProto::TDeviceConfig* FindDeviceConfig( + const NProto::TVolumeMeta& meta, + TStringBuf deviceUUID); + +[[nodiscard]] std::optional FindReplicaIndexByAgentId( + const NProto::TVolumeMeta& meta, + TStringBuf agentId); + +[[nodiscard]] TVector CollectLaggingDevices( + const NProto::TVolumeMeta& meta, + ui32 replicaIndex, + TStringBuf agentId); + +[[nodiscard]] bool RowHasFreshDevices( + const NProto::TVolumeMeta& meta, + ui32 rowIndex, + ui32 timeoutedDeviceReplicaIndex); + +[[nodiscard]] bool HaveCommonRows( + const TVector& laggingCandidates, + const google::protobuf::RepeatedPtrField& + alreadyLagging); + +void UpdateLaggingDevicesAfterMetaUpdate( + NProto::TVolumeMeta& meta, + const TVector& removedLaggingDeviceIds); + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/model/ya.make b/cloud/blockstore/libs/storage/volume/model/ya.make index 05a558420e2..2931369f866 100644 --- a/cloud/blockstore/libs/storage/volume/model/ya.make +++ b/cloud/blockstore/libs/storage/volume/model/ya.make @@ -8,6 +8,7 @@ SRCS( checkpoint.cpp checkpoint_light.cpp client_state.cpp + helpers.cpp merge.cpp meta.cpp requests_inflight.cpp diff --git a/cloud/blockstore/libs/storage/volume/testlib/test_env.cpp b/cloud/blockstore/libs/storage/volume/testlib/test_env.cpp index 87f58a1facc..3f567fc62a7 100644 --- a/cloud/blockstore/libs/storage/volume/testlib/test_env.cpp +++ b/cloud/blockstore/libs/storage/volume/testlib/test_env.cpp @@ -533,6 +533,16 @@ std::unique_ptr TVolumeClient::CreateGetS return request; } +std::unique_ptr +TVolumeClient::CreateDeviceTimeoutedRequest( + TString deviceUUID) +{ + auto request = + std::make_unique( + std::move(deviceUUID)); + return request; +} + std::unique_ptr TVolumeClient::CreateUpdateShadowDiskStateRequest( TString checkpointId, @@ -551,6 +561,12 @@ TVolumeClient::CreateReadMetaHistoryRequest() return std::make_unique(); } +std::unique_ptr +TVolumeClient::CreateGracefulShutdownRequest() +{ + return std::make_unique(); +} + void TVolumeClient::SendRemoteHttpInfo( const TString& params, HTTP_METHOD method) @@ -606,9 +622,10 @@ std::unique_ptr PrepareTestActorRuntime( TDiskRegistryStatePtr diskRegistryState, NProto::TFeaturesConfig featuresConfig, NRdma::IClientPtr rdmaClient, - TDiskAgentStatePtr diskAgentState) + TVector diskAgentStates) { - auto runtime = std::make_unique(1); + const ui32 agentCount = Max(diskAgentStates.size(), 1); + auto runtime = std::make_unique(agentCount); runtime->AppendToLogSettings( TBlockStoreComponents::START, @@ -620,6 +637,13 @@ std::unique_ptr PrepareTestActorRuntime( } // runtime->SetLogPriority(NLog::InvalidComponent, NLog::PRI_DEBUG); + runtime->SetRegistrationObserverFunc( + [] (auto& runtime, const auto& parentId, const auto& actorId) + { + Y_UNUSED(parentId); + runtime.EnableScheduleForActor(actorId); + }); + runtime->AddLocalService( MakeHiveProxyServiceId(), TActorSetupCmd(new TFakeHiveProxy(), TMailboxType::Simple, 0)); @@ -647,37 +671,38 @@ std::unique_ptr PrepareTestActorRuntime( } if (diskRegistryState->Devices.empty()) { - google::protobuf::RepeatedPtrField devices; + TVector devices; - *devices.Add() = MakeDevice("uuid0", "dev0", "transport0"); - *devices.Add() = MakeDevice("uuid1", "dev1", "transport1"); - *devices.Add() = MakeDevice("uuid2", "dev2", "transport2"); + devices.push_back(MakeDevice("uuid0", "dev0", "transport0")); + devices.push_back(MakeDevice("uuid1", "dev1", "transport1")); + devices.push_back(MakeDevice("uuid2", "dev2", "transport2")); auto dev0m = MakeDevice("uuid0_migration", "dev0_migration", "transport0_migration"); auto dev2m = MakeDevice("uuid2_migration", "dev2_migration", "transport2_migration"); - *devices.Add() = dev0m; - *devices.Add() = dev2m; - - diskRegistryState->Devices = TVector( - devices.begin(), - devices.end() - ); + devices.push_back(dev0m); + devices.push_back(dev2m); + diskRegistryState->Devices = std::move(devices); diskRegistryState->MigrationDevices["uuid0"] = dev0m; diskRegistryState->MigrationDevices["uuid2"] = dev2m; } for (auto& d: diskRegistryState->Devices) { - d.SetNodeId(runtime->GetNodeId()); + d.SetNodeId(runtime->GetNodeId(d.GetNodeId())); } - for (auto& [id, d]: diskRegistryState->MigrationDevices) { - d.SetNodeId(runtime->GetNodeId()); + for (auto& [_, d]: diskRegistryState->MigrationDevices) { + d.SetNodeId(runtime->GetNodeId(d.GetNodeId())); } + Sort( + diskRegistryState->Devices, + [](const NProto::TDeviceConfig& lhs, const NProto::TDeviceConfig& rhs) + { return lhs.GetNodeId() < rhs.GetNodeId(); }); + runtime->AddLocalService( MakeDiskRegistryProxyServiceId(), TActorSetupCmd( @@ -688,19 +713,43 @@ std::unique_ptr PrepareTestActorRuntime( ); runtime->EnableScheduleForActor(MakeDiskRegistryProxyServiceId()); - runtime->AddLocalService( - MakeDiskAgentServiceId(runtime->GetNodeId()), - TActorSetupCmd( + SetupTabletServices(*runtime); + + for (ui32 i = 0; i < agentCount; i++) { + struct TByNodeId + { + auto operator()(const NProto::TDeviceConfig& device) const + { + return device.GetNodeId(); + } + }; + const ui32 nodeId = runtime->GetNodeId(i); + auto begin = LowerBoundBy( + diskRegistryState->Devices.begin(), + diskRegistryState->Devices.end(), + nodeId, + TByNodeId()); + auto end = UpperBoundBy( + diskRegistryState->Devices.begin(), + diskRegistryState->Devices.end(), + nodeId, + TByNodeId()); + + auto state = diskAgentStates.size() > i ? std::move(diskAgentStates[i]) + : TDiskAgentStatePtr(); + const auto actorId = runtime->Register( new TDiskAgentMock( { - diskRegistryState->Devices.begin(), - diskRegistryState->Devices.end(), + begin, + end, }, - diskAgentState), - TMailboxType::Simple, - 0)); - - SetupTabletServices(*runtime); + std::move(state)), + i); + runtime->RegisterService( + MakeDiskAgentServiceId(nodeId), + actorId, + i); + } auto config = CreateTestStorageConfig( std::move(storageServiceConfig), @@ -729,13 +778,6 @@ std::unique_ptr PrepareTestActorRuntime( TestTabletId, TTabletTypes::BlockStoreVolume); - runtime->SetRegistrationObserverFunc( - [] (auto& runtime, const auto& parentId, const auto& actorId) - { - Y_UNUSED(parentId); - runtime.EnableScheduleForActor(actorId); - }); - CreateTestBootstrapper(*runtime, info.Get(), createFunc); return runtime; diff --git a/cloud/blockstore/libs/storage/volume/testlib/test_env.h b/cloud/blockstore/libs/storage/volume/testlib/test_env.h index 6aaadd6f87e..6f9d10f643b 100644 --- a/cloud/blockstore/libs/storage/volume/testlib/test_env.h +++ b/cloud/blockstore/libs/storage/volume/testlib/test_env.h @@ -452,6 +452,9 @@ class TVolumeClient std::unique_ptr CreateGetStorageConfigRequest(); + std::unique_ptr + CreateDeviceTimeoutedRequest(TString deviceUUID); + std::unique_ptr CreateUpdateShadowDiskStateRequest( TString checkpointId, TEvVolumePrivate::TEvUpdateShadowDiskStateRequest::EReason reason, @@ -460,6 +463,9 @@ class TVolumeClient std::unique_ptr CreateReadMetaHistoryRequest(); + std::unique_ptr + CreateGracefulShutdownRequest(); + void SendRemoteHttpInfo( const TString& params, HTTP_METHOD method); @@ -526,7 +532,7 @@ inline NProto::TDeviceConfig MakeDevice( const TString& transportId) { NProto::TDeviceConfig device; - device.SetAgentId("Mulder"); + device.SetAgentId("agent-1"); device.SetNodeId(0); device.SetBlocksCount(DefaultDeviceBlockCount); device.SetDeviceUUID(uuid); @@ -543,7 +549,7 @@ std::unique_ptr PrepareTestActorRuntime( TDiskRegistryStatePtr diskRegistryState = {}, NProto::TFeaturesConfig featuresConfig = {}, NRdma::IClientPtr rdmaClient = {}, - TDiskAgentStatePtr diskAgentState = {}); + TVector diskAgentStates = {}); struct TTestRuntimeBuilder { diff --git a/cloud/blockstore/libs/storage/volume/ut/ya.make b/cloud/blockstore/libs/storage/volume/ut/ya.make index ca5b81b744c..a7072574071 100644 --- a/cloud/blockstore/libs/storage/volume/ut/ya.make +++ b/cloud/blockstore/libs/storage/volume/ut/ya.make @@ -4,9 +4,11 @@ INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/medium.inc) SRCS( volume_database_ut.cpp + volume_lagging_agent_ut.cpp volume_state_ut.cpp volume_ut.cpp volume_ut_checkpoint.cpp + volume_ut_session.cpp volume_ut_stats.cpp ) diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.cpp b/cloud/blockstore/libs/storage/volume/volume_actor.cpp index 2f5901fbd29..58acb7c1847 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor.cpp @@ -1,7 +1,6 @@ #include "volume_actor.h" #include "volume_database.h" -#include "volume_tx.h" #include #include @@ -1052,6 +1051,9 @@ STFUNC(TVolumeActor::StateWork) HFunc( TEvDiskRegistry::TEvAcquireDiskResponse, HandleAcquireDiskResponse); + HFunc( + TEvVolumePrivate::TEvDevicesAcquireFinished, + HandleDevicesAcquireFinished); HFunc( TEvVolumePrivate::TEvAcquireDiskIfNeeded, HandleAcquireDiskIfNeeded); @@ -1060,6 +1062,9 @@ STFUNC(TVolumeActor::StateWork) HFunc( TEvDiskRegistry::TEvReleaseDiskResponse, HandleReleaseDiskResponse); + HFunc( + TEvVolumePrivate::TEvDevicesReleaseFinished, + HandleDevicesReleasedFinished); HFunc( TEvDiskRegistry::TEvAllocateDiskResponse, HandleAllocateDiskResponse); @@ -1081,6 +1086,22 @@ STFUNC(TVolumeActor::StateWork) HFunc(TEvVolume::TEvUpdateResyncState, HandleUpdateResyncState); HFunc(TEvVolume::TEvResyncFinished, HandleResyncFinished); + HFunc( + TEvDiskRegistry::TEvAddLaggingDevicesResponse, + HandleAddLaggingDevicesResponse); + HFunc( + TEvVolumePrivate::TEvReportLaggingDevicesToDR, + HandleReportLaggingDevicesToDR); + HFunc( + TEvVolumePrivate::TEvDeviceTimeoutedRequest, + HandleDeviceTimeouted); + HFunc( + TEvVolumePrivate::TEvUpdateSmartMigrationState, + HandleUpdateSmartMigrationState); + HFunc( + TEvVolumePrivate::TEvSmartMigrationFinished, + HandleSmartMigrationFinished); + HFunc( TEvPartitionCommonPrivate::TEvLongRunningOperation, HandleLongRunningBlobOperation); @@ -1114,13 +1135,18 @@ STFUNC(TVolumeActor::StateZombie) IgnoreFunc(TEvVolumePrivate::TEvUpdateThrottlerState); IgnoreFunc(TEvVolumePrivate::TEvUpdateReadWriteClientInfo); IgnoreFunc(TEvVolumePrivate::TEvRemoveExpiredVolumeParams); + IgnoreFunc(TEvVolumePrivate::TEvReportLaggingDevicesToDR); + IgnoreFunc(TEvVolumePrivate::TEvDeviceTimeoutedRequest); + IgnoreFunc(TEvVolumePrivate::TEvUpdateSmartMigrationState); + IgnoreFunc(TEvVolumePrivate::TEvSmartMigrationFinished); IgnoreFunc(TEvStatsService::TEvVolumePartCounters); IgnoreFunc(TEvPartition::TEvWaitReadyResponse); - IgnoreFunc(TEvents::TEvPoisonPill); - IgnoreFunc(TEvents::TEvPoisonTaken); + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); + HFunc(TEvents::TEvPoisonTaken, HandlePoisonTaken); + HFunc(TEvTablet::TEvTabletStop, HandleTabletStop); IgnoreFunc(TEvLocal::TEvTabletMetrics); @@ -1132,6 +1158,8 @@ STFUNC(TVolumeActor::StateZombie) IgnoreFunc(TEvDiskRegistryProxy::TEvGetDrTabletInfoResponse); + IgnoreFunc(TEvDiskRegistry::TEvAddLaggingDevicesResponse); + default: if (!RejectRequests(ev)) { HandleUnexpectedEvent(ev, TBlockStoreComponents::VOLUME); diff --git a/cloud/blockstore/libs/storage/volume/volume_actor.h b/cloud/blockstore/libs/storage/volume/volume_actor.h index c44e9873e88..2c461382958 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor.h +++ b/cloud/blockstore/libs/storage/volume/volume_actor.h @@ -236,6 +236,7 @@ class TVolumeActor final TMigrations Migrations; TVector Replicas; TVector FreshDeviceIds; + TVector RemovedLaggingDeviceIds; void Clear() { @@ -474,6 +475,8 @@ class TVolumeActor final void SetupDiskRegistryBasedPartitions(const NActors::TActorContext& ctx); + void ReportLaggingDevicesToDR(const NActors::TActorContext& ctx); + void DumpUsageStats( const NActors::TActorContext& ctx, TVolumeActor::EStatus status); @@ -657,6 +660,10 @@ class TVolumeActor final const TEvVolumePrivate::TEvWriteOrZeroCompleted::TPtr& ev, const NActors::TActorContext& ctx); + void HandleReportLaggingDevicesToDR( + const TEvVolumePrivate::TEvReportLaggingDevicesToDR::TPtr& ev, + const NActors::TActorContext& ctx); + template bool ReplyToOriginalRequest( const NActors::TActorContext& ctx, @@ -716,12 +723,26 @@ class TVolumeActor final const TEvDiskRegistry::TEvAcquireDiskResponse::TPtr& ev, const NActors::TActorContext& ctx); + void HandleDevicesAcquireFinishedImpl( + const NProto::TError& error, + const NActors::TActorContext& ctx); + void AcquireDisk( const NActors::TActorContext& ctx, TString clientId, NProto::EVolumeAccessMode accessMode, ui64 mountSeqNumber); + void SendAcquireDevicesToAgents( + TString clientId, + NProto::EVolumeAccessMode accessMode, + ui64 mountSeqNumber, + const NActors::TActorContext& ctx); + + void HandleDevicesAcquireFinished( + const TEvVolumePrivate::TEvDevicesAcquireFinished::TPtr& ev, + const NActors::TActorContext& ctx); + void AcquireDiskIfNeeded(const NActors::TActorContext& ctx); void ScheduleAcquireDiskIfNeeded(const NActors::TActorContext& ctx); @@ -742,12 +763,28 @@ class TVolumeActor final const TEvDiskRegistry::TEvReleaseDiskResponse::TPtr& ev, const NActors::TActorContext& ctx); + void HandleDevicesReleasedFinishedImpl( + const NProto::TError& error, + const NActors::TActorContext& ctx); + void ReleaseDisk(const NActors::TActorContext& ctx, const TString& clientId); + void SendReleaseDevicesToAgents( + const TString& clientId, + const NActors::TActorContext& ctx); + + void HandleDevicesReleasedFinished( + const TEvVolumePrivate::TEvDevicesReleaseFinished::TPtr& ev, + const NActors::TActorContext& ctx); + void HandleAllocateDiskResponse( const TEvDiskRegistry::TEvAllocateDiskResponse::TPtr& ev, const NActors::TActorContext& ctx); + void HandleAddLaggingDevicesResponse( + const TEvDiskRegistry::TEvAddLaggingDevicesResponse::TPtr& ev, + const NActors::TActorContext& ctx); + void ScheduleAllocateDiskIfNeeded(const NActors::TActorContext& ctx); NProto::TAllocateDiskRequest MakeAllocateDiskRequest() const; @@ -968,6 +1005,14 @@ class TVolumeActor final const TEvService::TEvReadBlocksLocalResponse::TPtr& ev, const NActors::TActorContext& ctx); + void HandleSmartMigrationFinished( + const TEvVolumePrivate::TEvSmartMigrationFinished::TPtr& ev, + const NActors::TActorContext& ctx); + + void HandleUpdateSmartMigrationState( + const TEvVolumePrivate::TEvUpdateSmartMigrationState::TPtr& ev, + const NActors::TActorContext& ctx); + void CreateCheckpointLightRequest( const NActors::TActorContext& ctx, ui64 requestId, diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_acquire.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_acquire.cpp new file mode 100644 index 00000000000..5dba7995a46 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/volume_actor_acquire.cpp @@ -0,0 +1,383 @@ +#include "volume_actor.h" + +#include + +#include +#include + +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +using namespace NActors; + +using namespace NKikimr::NTabletFlatExecutor; + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +class TAcquireDevicesActor final + : public TActorBootstrapped +{ +private: + const TActorId Owner; + TVector Devices; + const TString DiskId; + const TString ClientId; + const NProto::EVolumeAccessMode AccessMode; + const ui64 MountSeqNumber; + const ui32 VolumeGeneration; + const TDuration RequestTimeout; + const bool MuteIOErrors; + + int PendingRequests = 0; + +public: + TAcquireDevicesActor( + const TActorId& owner, + TVector devices, + TString diskId, + TString clientId, + NProto::EVolumeAccessMode accessMode, + ui64 mountSeqNumber, + ui32 volumeGeneration, + TDuration requestTimeout, + bool muteIOErrors); + + void Bootstrap(const TActorContext& ctx); + +private: + void PrepareRequest(NProto::TAcquireDevicesRequest& request) const; + void PrepareRequest(NProto::TReleaseDevicesRequest& request) const; + + void ReplyAndDie(const TActorContext& ctx, NProto::TError error); + + void OnAcquireResponse( + const TActorContext& ctx, + ui32 nodeId, + NProto::TError error); + + template + struct TSentRequest + { + TString AgentId; + ui32 NodeId = 0; + decltype(TRequest::Record) Record; + }; + + template + TVector> CreateRequests() const; + + template + void SendRequests( + const TActorContext& ctx, + const TVector>& requests); + +private: + STFUNC(StateAcquire); + + void HandlePoisonPill( + const TEvents::TEvPoisonPill::TPtr& ev, + const TActorContext& ctx); + + void HandleAcquireDevicesResponse( + const TEvDiskAgent::TEvAcquireDevicesResponse::TPtr& ev, + const TActorContext& ctx); + + void HandleAcquireDevicesUndelivery( + const TEvDiskAgent::TEvAcquireDevicesRequest::TPtr& ev, + const TActorContext& ctx); + + void HandleWakeup( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx); + + TString LogTargets() const; +}; + +//////////////////////////////////////////////////////////////////////////////// + +TAcquireDevicesActor::TAcquireDevicesActor( + const TActorId& owner, + TVector devices, + TString diskId, + TString clientId, + NProto::EVolumeAccessMode accessMode, + ui64 mountSeqNumber, + ui32 volumeGeneration, + TDuration requestTimeout, + bool muteIOErrors) + : Owner(owner) + , Devices(std::move(devices)) + , DiskId(std::move(diskId)) + , ClientId(std::move(clientId)) + , AccessMode(accessMode) + , MountSeqNumber(mountSeqNumber) + , VolumeGeneration(volumeGeneration) + , RequestTimeout(requestTimeout) + , MuteIOErrors(muteIOErrors) +{ + SortBy(Devices, [](auto& d) { return d.GetNodeId(); }); +} + +void TAcquireDevicesActor::Bootstrap(const TActorContext& ctx) +{ + Become(&TThis::StateAcquire); + + if (Devices.empty()) { + ReplyAndDie(ctx, {}); + return; + } + + ctx.Schedule(RequestTimeout, new TEvents::TEvWakeup()); + + LOG_DEBUG( + ctx, + TBlockStoreComponents::VOLUME, + "[%s] Sending acquire devices requests for disk %s, targets %s", + ClientId.c_str(), + DiskId.c_str(), + LogTargets().c_str()); + + SendRequests(ctx, CreateRequests()); +} + +void TAcquireDevicesActor::ReplyAndDie( + const TActorContext& ctx, + NProto::TError error) +{ + using TType = TEvVolumePrivate::TEvDevicesAcquireFinished; + + if (HasError(error)) { + LOG_ERROR( + ctx, + TBlockStoreComponents::VOLUME, + "[%s] AcquireDevices %s targets %s error: %s", + ClientId.c_str(), + DiskId.c_str(), + LogTargets().c_str(), + FormatError(error).c_str()); + } + + NCloud::Send(ctx, Owner, std::make_unique(std::move(error))); + + Die(ctx); +} + +void TAcquireDevicesActor::PrepareRequest( + NProto::TAcquireDevicesRequest& request) const +{ + request.MutableHeaders()->SetClientId(ClientId); + request.SetAccessMode(AccessMode); + request.SetMountSeqNumber(MountSeqNumber); + request.SetDiskId(DiskId); + request.SetVolumeGeneration(VolumeGeneration); +} + +void TAcquireDevicesActor::PrepareRequest( + NProto::TReleaseDevicesRequest& request) const +{ + request.MutableHeaders()->SetClientId(ClientId); +} + +template +auto TAcquireDevicesActor::CreateRequests() const + -> TVector> +{ + auto it = Devices.begin(); + TVector> requests; + while (it != Devices.end()) { + const ui32 nodeId = it->GetNodeId(); + + auto& request = requests.emplace_back(); + request.AgentId = it->GetAgentId(); + request.NodeId = nodeId; + PrepareRequest(request.Record); + + for (; it != Devices.end() && it->GetNodeId() == nodeId; ++it) { + *request.Record.AddDeviceUUIDs() = it->GetDeviceUUID(); + } + } + return requests; +} + +template +void TAcquireDevicesActor::SendRequests( + const TActorContext& ctx, + const TVector>& requests) +{ + PendingRequests = 0; + + for (const auto& r: requests) { + auto request = std::make_unique(TCallContextPtr{}, r.Record); + + LOG_DEBUG( + ctx, + TBlockStoreComponents::VOLUME, + "[%s] Send an acquire request to node #%d. Devices: %s", + ClientId.c_str(), + r.NodeId, + JoinSeq(", ", request->Record.GetDeviceUUIDs()).c_str()); + + auto event = std::make_unique( + MakeDiskAgentServiceId(r.NodeId), + ctx.SelfID, + request.release(), + IEventHandle::FlagForwardOnNondelivery, + r.NodeId, + &ctx.SelfID // forwardOnNondelivery + ); + + ctx.Send(event.release()); + + ++PendingRequests; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +void TAcquireDevicesActor::HandlePoisonPill( + const TEvents::TEvPoisonPill::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + + ReplyAndDie(ctx, MakeError(E_REJECTED, "Tablet is dead")); +} + +void TAcquireDevicesActor::OnAcquireResponse( + const TActorContext& ctx, + ui32 nodeId, + NProto::TError error) +{ + Y_ABORT_UNLESS(PendingRequests > 0); + + if (HasError(error) && !MuteIOErrors) { + LOG_ERROR( + ctx, + TBlockStoreComponents::VOLUME, + "[%s] AcquireDevices on the node #%d %s error: %s", + ClientId.c_str(), + nodeId, + LogTargets().c_str(), + FormatError(error).c_str()); + + if (GetErrorKind(error) != EErrorKind::ErrorRetriable) { + LOG_DEBUG( + ctx, + TBlockStoreComponents::VOLUME, + "[%s] Canceling acquire operation for disk %s, targets %s", + ClientId.c_str(), + DiskId.c_str(), + LogTargets().c_str()); + + SendRequests( + ctx, + CreateRequests()); + } + + ReplyAndDie(ctx, std::move(error)); + + return; + } + + if (--PendingRequests == 0) { + ReplyAndDie(ctx, {}); + } +} + +void TAcquireDevicesActor::HandleAcquireDevicesResponse( + const TEvDiskAgent::TEvAcquireDevicesResponse::TPtr& ev, + const TActorContext& ctx) +{ + OnAcquireResponse( + ctx, + SafeIntegerCast(ev->Cookie), + ev->Get()->GetError()); +} + +void TAcquireDevicesActor::HandleAcquireDevicesUndelivery( + const TEvDiskAgent::TEvAcquireDevicesRequest::TPtr& ev, + const TActorContext& ctx) +{ + OnAcquireResponse( + ctx, + SafeIntegerCast(ev->Cookie), + MakeError(E_REJECTED, "not delivered")); +} + +void TAcquireDevicesActor::HandleWakeup( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx) +{ + OnAcquireResponse( + ctx, + SafeIntegerCast(ev->Cookie), + MakeError(E_REJECTED, "timeout")); +} + +//////////////////////////////////////////////////////////////////////////////// + +TString TAcquireDevicesActor::LogTargets() const +{ + return LogDevices(Devices); +} + +//////////////////////////////////////////////////////////////////////////////// + +STFUNC(TAcquireDevicesActor::StateAcquire) +{ + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); + + HFunc( + TEvDiskAgent::TEvAcquireDevicesResponse, + HandleAcquireDevicesResponse); + HFunc( + TEvDiskAgent::TEvAcquireDevicesRequest, + HandleAcquireDevicesUndelivery); + + HFunc(TEvents::TEvWakeup, HandleWakeup); + + default: + HandleUnexpectedEvent(ev, TBlockStoreComponents::VOLUME); + break; + } +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +void TVolumeActor::SendAcquireDevicesToAgents( + TString clientId, + NProto::EVolumeAccessMode accessMode, + ui64 mountSeqNumber, + const TActorContext& ctx) +{ + auto devices = State->GetAllDevicesForAcquireRelease(); + + auto actor = NCloud::Register( + ctx, + ctx.SelfID, + std::move(devices), + State->GetDiskId(), + std::move(clientId), + accessMode, + mountSeqNumber, + Executor()->Generation(), + Config->GetAgentRequestTimeout(), + State->GetMeta().GetMuteIOErrors()); + Actors.insert(actor); +} + +void TVolumeActor::HandleDevicesAcquireFinished( + const TEvVolumePrivate::TEvDevicesAcquireFinished::TPtr& ev, + const TActorContext& ctx) +{ + HandleDevicesAcquireFinishedImpl(ev->Get()->GetError(), ctx); +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_addclient.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_addclient.cpp index dff49881dea..99d44e17495 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_addclient.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_addclient.cpp @@ -40,6 +40,15 @@ void TVolumeActor::AcquireDisk( "Acquiring disk " << State->GetDiskId() ); + if (Config->GetNonReplicatedVolumeDirectAcquireEnabled()) { + SendAcquireDevicesToAgents( + std::move(clientId), + accessMode, + mountSeqNumber, + ctx); + return; + } + auto request = std::make_unique(); request->Record.SetDiskId(State->GetDiskId()); @@ -178,6 +187,13 @@ void TVolumeActor::HandleAcquireDiskResponse( // agents auto& record = msg->Record; + HandleDevicesAcquireFinishedImpl(record.GetError(), ctx); +} + +void TVolumeActor::HandleDevicesAcquireFinishedImpl( + const NProto::TError& error, + const NActors::TActorContext& ctx) +{ ScheduleAcquireDiskIfNeeded(ctx); if (AcquireReleaseDiskRequests.empty()) { @@ -193,7 +209,7 @@ void TVolumeActor::HandleAcquireDiskResponse( auto& request = AcquireReleaseDiskRequests.front(); auto& cr = request.ClientRequest; - if (HasError(record.GetError())) { + if (HasError(error)) { LOG_DEBUG_S( ctx, TBlockStoreComponents::VOLUME, @@ -201,8 +217,8 @@ void TVolumeActor::HandleAcquireDiskResponse( ); if (cr) { - auto response = std::make_unique( - record.GetError()); + auto response = + std::make_unique(error); response->Record.MutableVolume()->SetDiskId(cr->DiskId); response->Record.SetClientId(cr->GetClientId()); response->Record.SetTabletId(TabletID()); diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_allocatedisk.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_allocatedisk.cpp index 8647082e9f2..4aecb3a227a 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_allocatedisk.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_allocatedisk.cpp @@ -4,7 +4,7 @@ #include #include - +#include #include #include @@ -34,7 +34,6 @@ ui64 GetSize(const TDevices& devs) ui64 GetBlocks(const NKikimrBlockStore::TVolumeConfig& config) { - // XXX Y_ABORT_UNLESS(config.PartitionsSize() == 1); return config.GetPartitions(0).GetBlockCount(); } @@ -160,6 +159,7 @@ NProto::TVolumeMeta CreateNewMeta( newMeta.SetIOMode(args.IOMode); newMeta.SetIOModeTs(args.IOModeTs.MicroSeconds()); newMeta.SetMuteIOErrors(args.MuteIOErrors); + UpdateLaggingDevicesAfterMetaUpdate(newMeta, args.RemovedLaggingDeviceIds); return newMeta; } @@ -351,12 +351,19 @@ void TVolumeActor::HandleAllocateDiskResponse( auto& migrations = *msg->Record.MutableMigrations(); TVector replicas; TVector freshDeviceIds; + TVector removedLaggingDevices; for (auto& msgReplica: *msg->Record.MutableReplicas()) { replicas.push_back(std::move(*msgReplica.MutableDevices())); } for (auto& freshDeviceId: *msg->Record.MutableDeviceReplacementUUIDs()) { freshDeviceIds.push_back(std::move(freshDeviceId)); } + for (auto& removedLaggingDevice: + *msg->Record.MutableRemovedLaggingDevices()) + { + removedLaggingDevices.push_back( + std::move(*removedLaggingDevice.MutableDeviceUUID())); + } if (!CheckAllocationResult(ctx, devices, replicas)) { return; @@ -367,6 +374,8 @@ void TVolumeActor::HandleAllocateDiskResponse( UnfinishedUpdateVolumeConfig.Migrations = std::move(migrations); UnfinishedUpdateVolumeConfig.Replicas = std::move(replicas); UnfinishedUpdateVolumeConfig.FreshDeviceIds = std::move(freshDeviceIds); + UnfinishedUpdateVolumeConfig.RemovedLaggingDeviceIds = + std::move(removedLaggingDevices); } else { ExecuteTx( ctx, @@ -374,6 +383,7 @@ void TVolumeActor::HandleAllocateDiskResponse( std::move(migrations), std::move(replicas), std::move(freshDeviceIds), + std::move(removedLaggingDevices), msg->Record.GetIOMode(), TInstant::MicroSeconds(msg->Record.GetIOModeTs()), msg->Record.GetMuteIOErrors() @@ -418,6 +428,7 @@ void TVolumeActor::HandleUpdateDevices( std::move(msg->Migrations), std::move(msg->Replicas), std::move(msg->FreshDeviceIds), + std::move(msg->RemovedLaggingDevices), msg->IOMode, msg->IOModeTs, msg->MuteIOErrors); diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_lagging_agents.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_lagging_agents.cpp new file mode 100644 index 00000000000..b75119860d4 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/volume_actor_lagging_agents.cpp @@ -0,0 +1,403 @@ +#include "volume_actor.h" + +#include "volume_tx.h" + +#include +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +using namespace NActors; +using namespace NKikimr; +using namespace NCloud::NBlockStore::NStorage::NPartition; + +LWTRACE_USING(BLOCKSTORE_STORAGE_PROVIDER); + +//////////////////////////////////////////////////////////////////////////////// + +void TVolumeActor::HandleReportLaggingDevicesToDR( + const TEvVolumePrivate::TEvReportLaggingDevicesToDR::TPtr& ev, + const NActors::TActorContext& ctx) +{ + Y_UNUSED(ev); + ReportLaggingDevicesToDR(ctx); +} + +void TVolumeActor::ReportLaggingDevicesToDR(const NActors::TActorContext& ctx) +{ + if (!State || State->GetMeta().GetLaggingAgentsInfo().GetAgents().empty()) { + return; + } + + auto request = + std::make_unique(); + *request->Record.MutableDiskId() = State->GetDiskId(); + for (const auto& laggingAgent: + State->GetMeta().GetLaggingAgentsInfo().GetAgents()) + { + for (const auto& laggingDevice: laggingAgent.GetDevices()) { + *request->Record.AddLaggingDevices() = laggingDevice; + } + } + NCloud::Send( + ctx, + MakeDiskRegistryProxyServiceId(), + std::move(request), + 0 // cookie + ); +} + +void TVolumeActor::HandleAddLaggingDevicesResponse( + const TEvDiskRegistry::TEvAddLaggingDevicesResponse::TPtr& ev, + const NActors::TActorContext& ctx) +{ + Y_DEBUG_ABORT_UNLESS(State); + if (State->GetMeta().GetLaggingAgentsInfo().GetAgents().empty()) { + return; + } + + const auto* msg = ev->Get(); + if (HasError(msg->GetError())) { + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Couldn't add lagging devices to the DR. Error: %s", + TabletID(), + FormatError(msg->GetError()).c_str()); + + ctx.Schedule( + TDuration::Seconds(1), + new TEvVolumePrivate::TEvReportLaggingDevicesToDR()); + return; + } +} + +void TVolumeActor::HandleDeviceTimeouted( + const TEvVolumePrivate::TEvDeviceTimeoutedRequest::TPtr& ev, + const TActorContext& ctx) +{ + const auto* msg = ev->Get(); + + LOG_INFO( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Device \"%s\" timeouted", + TabletID(), + msg->DeviceUUID.c_str()); + + const auto& meta = State->GetMeta(); + if (!IsReliableDiskRegistryMediaKind( + State->GetConfig().GetStorageMediaKind())) + { + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError( + E_PRECONDITION_FAILED, + "Only DR mirror disks can have lagging devices"))); + return; + } + + if (UpdateVolumeConfigInProgress) { + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError(E_REJECTED, "Volume config update in progress"))); + return; + } + + const NProto::TDeviceConfig* timeoutedDeviceConfig = + FindDeviceConfig(meta, msg->DeviceUUID); + if (!timeoutedDeviceConfig) { + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Could not find config with device %s", + TabletID(), + msg->DeviceUUID.c_str()); + + auto response = + std::make_unique( + MakeError( + E_NOT_FOUND, + TStringBuilder() << "Could not find config with device " + << msg->DeviceUUID)); + NCloud::Reply(ctx, *ev, std::move(response)); + return; + } + + const auto timeoutedDeviceReplicaIndex = + FindReplicaIndexByAgentId(meta, timeoutedDeviceConfig->GetAgentId()); + Y_DEBUG_ABORT_UNLESS(timeoutedDeviceReplicaIndex); + + TVector timeoutedAgentDevices = + CollectLaggingDevices( + meta, + *timeoutedDeviceReplicaIndex, + timeoutedDeviceConfig->GetAgentId()); + Y_DEBUG_ABORT_UNLESS(!timeoutedAgentDevices.empty()); + + for (const auto& laggingAgent: meta.GetLaggingAgentsInfo().GetAgents()) { + // Whether the agent is lagging already. + if (laggingAgent.GetAgentId() == timeoutedDeviceConfig->GetAgentId()) { + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Agent %s is already lagging", + TabletID(), + laggingAgent.GetAgentId().c_str()); + + STORAGE_CHECK_PRECONDITION( + laggingAgent.DevicesSize() == timeoutedAgentDevices.size()); + NCloud::Send( + ctx, + State->GetDiskRegistryBasedPartitionActor(), + std::make_unique( + *timeoutedDeviceReplicaIndex, + timeoutedDeviceConfig->GetAgentId())); + + auto response = + std::make_unique( + MakeError(S_ALREADY, "Device is already lagging")); + NCloud::Reply(ctx, *ev, std::move(response)); + return; + } + + // Intersect row indexes of known lagging devices and a new one. We only + // allow one lagging device per row. + const bool intersects = + HaveCommonRows(timeoutedAgentDevices, laggingAgent.GetDevices()); + if (intersects) { + // TODO(komarevtsev-d): Allow source and target of the migration to + // lag at the same time. + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Will not add a lagging agent %s. Agent's " + "devices intersect with already lagging %s", + TabletID(), + timeoutedDeviceConfig->GetAgentId().c_str(), + laggingAgent.GetAgentId().c_str()); + + auto response = + std::make_unique( + MakeError( + E_INVALID_STATE, + TStringBuilder() + << "There are other lagging devices on agent " + << laggingAgent.GetAgentId())); + NCloud::Reply(ctx, *ev, std::move(response)); + return; + } + } + + // Check for fresh devices in the same row. + for (const auto& laggingDevice: timeoutedAgentDevices) { + const bool rowHasFreshDevice = RowHasFreshDevices( + meta, + laggingDevice.GetRowIndex(), + *timeoutedDeviceReplicaIndex); + if (rowHasFreshDevice) { + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] There are other fresh devices on the same row with " + "device %s", + TabletID(), + laggingDevice.GetDeviceUUID().c_str()); + + auto response = + std::make_unique( + MakeError( + E_INVALID_STATE, + TStringBuilder() << "There are other fresh devices on " + "the same row with device " + << laggingDevice.GetDeviceUUID())); + NCloud::Reply(ctx, *ev, std::move(response)); + return; + } + } + + NProto::TLaggingAgent unavailableAgent; + unavailableAgent.SetAgentId(timeoutedDeviceConfig->GetAgentId()); + unavailableAgent.SetReplicaIndex(*timeoutedDeviceReplicaIndex); + unavailableAgent.MutableDevices()->Assign( + std::make_move_iterator(timeoutedAgentDevices.begin()), + std::make_move_iterator(timeoutedAgentDevices.end())); + ExecuteTx( + ctx, + CreateRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), + std::move(unavailableAgent)); +} + +void TVolumeActor::HandleUpdateSmartMigrationState( + const TEvVolumePrivate::TEvUpdateSmartMigrationState::TPtr& ev, + const TActorContext& ctx) +{ + LOG_INFO( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] UpdateSmartMigrationState %s", + TabletID(), + ev->Get()->AgentId.c_str()); + + // TODO(komarevtsev-d): Show the progress on the mon page. +} + +void TVolumeActor::HandleSmartMigrationFinished( + const TEvVolumePrivate::TEvSmartMigrationFinished::TPtr& ev, + const TActorContext& ctx) +{ + const auto* msg = ev->Get(); + LOG_INFO( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Smart migration finished for agent %s", + TabletID(), + msg->AgentId.c_str()); + + if (UpdateVolumeConfigInProgress) { + // When the volume configuration update is in progress, we don't know at + // which stage it is. By removing the lagging agent from the meta, we + // have either done it before new meta were created, so our change will + // take effect. Or we're too late and, upon partition restart, the + // volume will send all the lagging agents to the DiskRegistry, which + // will make them fresh and reallocate the volume. + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Lagging agent %s removal may fail because the volume config " + "update is in progress", + TabletID(), + msg->AgentId.c_str()); + State->RemoveLaggingAgent(msg->AgentId); + return; + } + + ExecuteTx( + ctx, + CreateRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), + msg->AgentId); +} + +//////////////////////////////////////////////////////////////////////////////// + +bool TVolumeActor::PrepareAddLaggingAgent( + const TActorContext& ctx, + ITransactionBase::TTransactionContext& tx, + TTxVolume::TAddLaggingAgent& args) +{ + Y_UNUSED(ctx); + Y_UNUSED(tx); + Y_UNUSED(args); + + return true; +} + +void TVolumeActor::ExecuteAddLaggingAgent( + const TActorContext& ctx, + ITransactionBase::TTransactionContext& tx, + TTxVolume::TAddLaggingAgent& args) +{ + Y_DEBUG_ABORT_UNLESS(!args.Agent.GetDevices().empty()); + LOG_INFO( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Add lagging agent: %s, replicaIndex: %u, devices: ( %s )", + TabletID(), + args.Agent.GetAgentId().c_str(), + args.Agent.GetReplicaIndex(), + [&laggingDevices = args.Agent.GetDevices()]() + { + TStringBuilder ss; + for (const auto& device: laggingDevices) { + ss << "[" << device.GetDeviceUUID() << "; " + << device.GetRowIndex() << "], "; + } + ss.erase(ss.size() - 2); + return ss; + }() + .c_str()); + + TVolumeDatabase db(tx.DB); + State->AddLaggingAgent(args.Agent); + db.WriteMeta(State->GetMeta()); +} + +void TVolumeActor::CompleteAddLaggingAgent( + const TActorContext& ctx, + TTxVolume::TAddLaggingAgent& args) +{ + const auto& partActorId = State->GetDiskRegistryBasedPartitionActor(); + Y_DEBUG_ABORT_UNLESS(partActorId); + NCloud::Send( + ctx, + partActorId, + std::make_unique( + args.Agent.GetReplicaIndex(), + args.Agent.GetAgentId())); + + auto response = + std::make_unique(); + NCloud::Reply(ctx, *args.RequestInfo, std::move(response)); +} + +//////////////////////////////////////////////////////////////////////////////// + +bool TVolumeActor::PrepareRemoveLaggingAgent( + const TActorContext& ctx, + ITransactionBase::TTransactionContext& tx, + TTxVolume::TRemoveLaggingAgent& args) +{ + Y_UNUSED(ctx); + Y_UNUSED(tx); + Y_UNUSED(args); + + return true; +} + +void TVolumeActor::ExecuteRemoveLaggingAgent( + const TActorContext& ctx, + ITransactionBase::TTransactionContext& tx, + TTxVolume::TRemoveLaggingAgent& args) +{ + auto laggingAgent = State->RemoveLaggingAgent(args.AgentId); + if (!laggingAgent.has_value()) { + LOG_WARN( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Could not find an agent %s in lagging agents list.", + TabletID(), + args.AgentId.c_str()); + return; + } + + TVolumeDatabase db(tx.DB); + db.WriteMeta(State->GetMeta()); + args.RemovedLaggingAgent = std::move(*laggingAgent); +} + +void TVolumeActor::CompleteRemoveLaggingAgent( + const TActorContext& ctx, + TTxVolume::TRemoveLaggingAgent& args) +{ + if (args.RemovedLaggingAgent.GetAgentId().empty()) { + return; + } + + if (State->HasLaggingInReplica(args.RemovedLaggingAgent.GetReplicaIndex())) + { + return; + } + + NCloud::Send( + ctx, + State->GetDiskRegistryBasedPartitionActor(), + std::make_unique( + args.RemovedLaggingAgent.GetReplicaIndex())); +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp index 696433bf8d7..41703ae960f 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_reallocatedisk.cpp @@ -120,11 +120,20 @@ void TReallocateActor::HandleAllocateDiskResponse( freshDeviceIds.push_back(std::move(freshDeviceId)); } + TVector removedLaggingDevices; + for (auto& removedLaggingDevice: + *msg->Record.MutableRemovedLaggingDevices()) + { + removedLaggingDevices.push_back( + std::move(*removedLaggingDevice.MutableDeviceUUID())); + } + auto request = std::make_unique( std::move(*msg->Record.MutableDevices()), std::move(*msg->Record.MutableMigrations()), std::move(replicas), std::move(freshDeviceIds), + std::move(removedLaggingDevices), msg->Record.GetIOMode(), TInstant::MicroSeconds(msg->Record.GetIOModeTs()), msg->Record.GetMuteIOErrors()); diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_release.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_release.cpp new file mode 100644 index 00000000000..6e3debf3422 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/volume_actor_release.cpp @@ -0,0 +1,284 @@ +#include "volume_actor.h" +#include + +#include +#include + +namespace NCloud::NBlockStore::NStorage { + +using namespace NActors; + +using namespace NKikimr::NTabletFlatExecutor; + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +class TReleaseDevicesActor final + : public TActorBootstrapped +{ +private: + const TActorId Owner; + const TString DiskId; + const TString ClientId; + const ui32 VolumeGeneration; + const TDuration RequestTimeout; + const bool MuteIOErrors; + + TVector Devices; + int PendingRequests = 0; + +public: + TReleaseDevicesActor( + const TActorId& owner, + TString diskId, + TString clientId, + ui32 volumeGeneration, + TDuration requestTimeout, + TVector devices, + bool muteIOErrors); + + void Bootstrap(const TActorContext& ctx); + +private: + void PrepareRequest(NProto::TReleaseDevicesRequest& request); + void ReplyAndDie(const TActorContext& ctx, NProto::TError error); + + void OnReleaseResponse( + const TActorContext& ctx, + ui64 cookie, + NProto::TError error); + +private: + STFUNC(StateWork); + + void HandlePoisonPill( + const TEvents::TEvPoisonPill::TPtr& ev, + const TActorContext& ctx); + + void HandleReleaseDevicesResponse( + const TEvDiskAgent::TEvReleaseDevicesResponse::TPtr& ev, + const TActorContext& ctx); + + void HandleReleaseDevicesUndelivery( + const TEvDiskAgent::TEvReleaseDevicesRequest::TPtr& ev, + const TActorContext& ctx); + + void HandleTimeout( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx); + + TString LogTargets() const; +}; + +//////////////////////////////////////////////////////////////////////////////// + +TReleaseDevicesActor::TReleaseDevicesActor( + const TActorId& owner, + TString diskId, + TString clientId, + ui32 volumeGeneration, + TDuration requestTimeout, + TVector devices, + bool muteIOErrors) + : Owner(owner) + , DiskId(std::move(diskId)) + , ClientId(std::move(clientId)) + , VolumeGeneration(volumeGeneration) + , RequestTimeout(requestTimeout) + , MuteIOErrors(muteIOErrors) + , Devices(std::move(devices)) +{} + +void TReleaseDevicesActor::PrepareRequest(NProto::TReleaseDevicesRequest& request) +{ + request.MutableHeaders()->SetClientId(ClientId); + request.SetDiskId(DiskId); + request.SetVolumeGeneration(VolumeGeneration); +} + +void TReleaseDevicesActor::Bootstrap(const TActorContext& ctx) +{ + Become(&TThis::StateWork); + + SortBy(Devices, [](auto& d) { return d.GetNodeId(); }); + + auto it = Devices.begin(); + while (it != Devices.end()) { + auto request = + std::make_unique(); + PrepareRequest(request->Record); + + const ui32 nodeId = it->GetNodeId(); + + for (; it != Devices.end() && it->GetNodeId() == nodeId; ++it) { + *request->Record.AddDeviceUUIDs() = it->GetDeviceUUID(); + } + + ++PendingRequests; + NCloud::Send( + ctx, + MakeDiskAgentServiceId(nodeId), + std::move(request), + nodeId); + } + + ctx.Schedule(RequestTimeout, new TEvents::TEvWakeup()); +} + +void TReleaseDevicesActor::ReplyAndDie( + const TActorContext& ctx, + NProto::TError error) +{ + NCloud::Send( + ctx, + Owner, + std::make_unique( + std::move(error))); + + Die(ctx); +} + +void TReleaseDevicesActor::OnReleaseResponse( + const TActorContext& ctx, + ui64 cookie, + NProto::TError error) +{ + Y_ABORT_UNLESS(PendingRequests > 0); + + if (HasError(error)) { + LOG_LOG( + ctx, + MuteIOErrors ? NLog::PRI_WARN : NLog::PRI_ERROR, + TBlockStoreComponents::VOLUME, + "ReleaseDevices %s error: %s, %llu", + LogTargets().c_str(), + FormatError(error).c_str(), + cookie); + } + + if (--PendingRequests == 0) { + ReplyAndDie(ctx, {}); + } +} + +void TReleaseDevicesActor::HandleReleaseDevicesResponse( + const TEvDiskAgent::TEvReleaseDevicesResponse::TPtr& ev, + const TActorContext& ctx) +{ + OnReleaseResponse(ctx, ev->Cookie, ev->Get()->GetError()); +} + +void TReleaseDevicesActor::HandleReleaseDevicesUndelivery( + const TEvDiskAgent::TEvReleaseDevicesRequest::TPtr& ev, + const TActorContext& ctx) +{ + OnReleaseResponse(ctx, ev->Cookie, MakeError(E_REJECTED, "not delivered")); +} + +void TReleaseDevicesActor::HandlePoisonPill( + const TEvents::TEvPoisonPill::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + + ReplyAndDie(ctx, MakeError(E_REJECTED, "Tablet is dead")); +} + +void TReleaseDevicesActor::HandleTimeout( + const TEvents::TEvWakeup::TPtr& ev, + const TActorContext& ctx) +{ + Y_UNUSED(ev); + + const auto err = TStringBuilder() + << "TReleaseDiskActor timeout." << " DiskId: " << DiskId + << " ClientId: " << ClientId + << " Targets: " << LogTargets() + << " VolumeGeneration: " << VolumeGeneration + << " PendingRequests: " << PendingRequests; + + LOG_WARN(ctx, TBlockStoreComponents::VOLUME, err); + + ReplyAndDie(ctx, MakeError(E_TIMEOUT, err)); +} + +STFUNC(TReleaseDevicesActor::StateWork) +{ + switch (ev->GetTypeRewrite()) { + HFunc(TEvents::TEvPoisonPill, HandlePoisonPill); + HFunc(TEvents::TEvWakeup, HandleTimeout); + + HFunc( + TEvDiskAgent::TEvReleaseDevicesResponse, + HandleReleaseDevicesResponse); + HFunc( + TEvDiskAgent::TEvReleaseDevicesRequest, + HandleReleaseDevicesUndelivery); + + default: + HandleUnexpectedEvent(ev, TBlockStoreComponents::VOLUME); + break; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +TString TReleaseDevicesActor::LogTargets() const +{ + return LogDevices(Devices); +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +void TVolumeActor::SendReleaseDevicesToAgents( + const TString& clientId, + const TActorContext& ctx) +{ + auto replyWithError = [&](auto error) + { + NCloud::Send( + ctx, + SelfId(), + std::make_unique( + std::move(error))); + }; + + TString diskId = State->GetDiskId(); + ui32 volumeGeneration = Executor()->Generation(); + + if (!clientId) { + replyWithError(MakeError(E_ARGUMENT, "empty client id")); + return; + } + + if (!diskId) { + replyWithError(MakeError(E_ARGUMENT, "empty disk id")); + return; + } + + auto devices = State->GetAllDevicesForAcquireRelease(); + + auto actor = NCloud::Register( + ctx, + ctx.SelfID, + std::move(diskId), + clientId, + volumeGeneration, + Config->GetAgentRequestTimeout(), + std::move(devices), + State->GetMeta().GetMuteIOErrors()); + + Actors.insert(actor); +} + +void TVolumeActor::HandleDevicesReleasedFinished( + const TEvVolumePrivate::TEvDevicesReleaseFinished::TPtr& ev, + const NActors::TActorContext& ctx) +{ + HandleDevicesReleasedFinishedImpl(ev->Get()->GetError(), ctx); +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp index 27c26445aea..a1b72d50aaa 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_removeclient.cpp @@ -28,7 +28,10 @@ void TVolumeActor::ReleaseDisk(const TActorContext& ctx, const TString& clientId request->Record.SetDiskId(State->GetDiskId()); request->Record.MutableHeaders()->SetClientId(clientId); request->Record.SetVolumeGeneration(Executor()->Generation()); - + if (Config->GetNonReplicatedVolumeDirectAcquireEnabled()) { + SendReleaseDevicesToAgents(clientId, ctx); + return; + } NCloud::Send( ctx, MakeDiskRegistryProxyServiceId(), @@ -42,6 +45,13 @@ void TVolumeActor::HandleReleaseDiskResponse( auto* msg = ev->Get(); auto& record = msg->Record; + HandleDevicesReleasedFinishedImpl(record.GetError(), ctx); +} + +void TVolumeActor::HandleDevicesReleasedFinishedImpl( + const NProto::TError& error, + const NActors::TActorContext& ctx) +{ if (AcquireReleaseDiskRequests.empty()) { LOG_DEBUG_S( ctx, @@ -54,7 +64,6 @@ void TVolumeActor::HandleReleaseDiskResponse( auto& request = AcquireReleaseDiskRequests.front(); auto& cr = request.ClientRequest; - const auto& error = record.GetError(); if (HasError(error) && (error.GetCode() != E_NOT_FOUND)) { LOG_DEBUG_S( diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_startstop.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_startstop.cpp index 002fe1e9033..093aa8e4b5a 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_startstop.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_startstop.cpp @@ -178,6 +178,7 @@ void TVolumeActor::SetupDiskRegistryBasedPartitions(const TActorContext& ctx) SelfId(), State->GetMeta().GetMuteIOErrors(), State->GetFilteredFreshDevices(), + State->GetLaggingDevices(), maxTimedOutDeviceStateDuration, maxTimedOutDeviceStateDurationOverridden, useSimpleMigrationBandwidthLimiter); @@ -259,6 +260,7 @@ void TVolumeActor::SetupDiskRegistryBasedPartitions(const TActorContext& ctx) State->SetDiskRegistryBasedPartitionActor( WrapNonreplActorIfNeeded(ctx, nonreplicatedActorId, nonreplicatedConfig), nonreplicatedConfig); + ReportLaggingDevicesToDR(ctx); } NActors::TActorId TVolumeActor::WrapNonreplActorIfNeeded( @@ -346,6 +348,51 @@ void TVolumeActor::StartPartitionsForGc(const TActorContext& ctx) PartitionsStartedReason = EPartitionsStartedReason::STARTED_FOR_GC; } +void TVolumeActor::HandleGracefulShutdown( + const TEvVolume::TEvGracefulShutdownRequest::TPtr& ev, + const TActorContext& ctx) +{ + if (!State->GetDiskRegistryBasedPartitionActor()) { + LOG_ERROR( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] GracefulShutdown request was sent to " + "non-DR based disk", + TabletID()); + + NCloud::Reply( + ctx, + *ev, + std::make_unique( + MakeError(E_NOT_IMPLEMENTED, "request is not supported"))); + return; + } + + LOG_INFO( + ctx, + TBlockStoreComponents::VOLUME, + "[%lu] Stop Partition before volume destruction", + TabletID()); + + auto reqInfo = + CreateRequestInfo(ev->Sender, ev->Cookie, ev->Get()->CallContext); + StopPartitions( + ctx, + [reqInfo = std::move(reqInfo)](const auto& ctx) + { + NCloud::Reply( + ctx, + *reqInfo, + std::make_unique()); + }); + + TerminateTransactions(ctx); + KillActors(ctx); + CancelRequests(ctx); + + BecomeAux(ctx, STATE_ZOMBIE); +} + void TVolumeActor::StopPartitions( const TActorContext& ctx, TDiskRegistryBasedPartitionStoppedCallback onPartitionStopped) diff --git a/cloud/blockstore/libs/storage/volume/volume_actor_updateconfig.cpp b/cloud/blockstore/libs/storage/volume/volume_actor_updateconfig.cpp index 88be65b7538..04a4bf18ba3 100644 --- a/cloud/blockstore/libs/storage/volume/volume_actor_updateconfig.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_actor_updateconfig.cpp @@ -4,7 +4,7 @@ #include #include - +#include #include #include @@ -243,11 +243,19 @@ void TVolumeActor::FinishUpdateVolumeConfig(const TActorContext& ctx) for (auto& freshDeviceId: UnfinishedUpdateVolumeConfig.FreshDeviceIds) { *newMeta.AddFreshDeviceIds() = std::move(freshDeviceId); } + if (State) { + newMeta.MutableLaggingAgentsInfo()->CopyFrom( + State->GetMeta().GetLaggingAgentsInfo()); + UpdateLaggingDevicesAfterMetaUpdate( + newMeta, + UnfinishedUpdateVolumeConfig.RemovedLaggingDeviceIds); + } UnfinishedUpdateVolumeConfig.Devices = {}; UnfinishedUpdateVolumeConfig.Migrations = {}; UnfinishedUpdateVolumeConfig.Replicas = {}; UnfinishedUpdateVolumeConfig.FreshDeviceIds = {}; + UnfinishedUpdateVolumeConfig.RemovedLaggingDeviceIds = {}; LOG_DEBUG(ctx, TBlockStoreComponents::VOLUME, "[%lu] Updating volume config to version %u", diff --git a/cloud/blockstore/libs/storage/volume/volume_events_private.h b/cloud/blockstore/libs/storage/volume/volume_events_private.h index 923fdb8ba96..16bbcbff1a2 100644 --- a/cloud/blockstore/libs/storage/volume/volume_events_private.h +++ b/cloud/blockstore/libs/storage/volume/volume_events_private.h @@ -25,6 +25,7 @@ namespace NCloud::NBlockStore::NStorage { xxx(UpdateCheckpointRequest, __VA_ARGS__) \ xxx(UpdateShadowDiskState, __VA_ARGS__) \ xxx(ReadMetaHistory, __VA_ARGS__) \ + xxx(DeviceTimeouted, __VA_ARGS__) \ // BLOCKSTORE_VOLUME_REQUESTS_PRIVATE //////////////////////////////////////////////////////////////////////////////// @@ -132,6 +133,56 @@ struct TEvVolumePrivate TVector MetaHistory; }; + // + // DeviceTimeouted + // + + struct TDeviceTimeoutedRequest + { + const TString DeviceUUID; + + explicit TDeviceTimeoutedRequest(TString deviceUUID) + : DeviceUUID(std::move(deviceUUID)) + {} + }; + + struct TDeviceTimeoutedResponse + { + }; + + // + // UpdateSmartMigrationState + // + + struct TUpdateSmartMigrationState + { + TString AgentId; + ui64 ProcessedBlockCount; + ui64 BlockCountNeedToBeProcessed; + + TUpdateSmartMigrationState( + TString agentId, + ui64 processedBlockCount, + ui64 blockCountNeedToBeProcessed) + : AgentId(std::move(agentId)) + , ProcessedBlockCount(processedBlockCount) + , BlockCountNeedToBeProcessed(blockCountNeedToBeProcessed) + {} + }; + + // + // SmartMigrationFinished + // + + struct TSmartMigrationFinished + { + const TString AgentId; + + explicit TSmartMigrationFinished(TString agentId) + : AgentId(std::move(agentId)) + {} + }; + // // UpdateDevices // @@ -142,6 +193,7 @@ struct TEvVolumePrivate TMigrations Migrations; TVector Replicas; TVector FreshDeviceIds; + TVector RemovedLaggingDevices; NProto::EVolumeIOMode IOMode; TInstant IOModeTs; bool MuteIOErrors; @@ -151,6 +203,7 @@ struct TEvVolumePrivate TMigrations migrations, TVector replicas, TVector freshDeviceIds, + TVector removedLaggingDevices, NProto::EVolumeIOMode ioMode, TInstant ioModeTs, bool muteIOErrors) @@ -158,6 +211,7 @@ struct TEvVolumePrivate , Migrations(std::move(migrations)) , Replicas(std::move(replicas)) , FreshDeviceIds(std::move(freshDeviceIds)) + , RemovedLaggingDevices(std::move(removedLaggingDevices)) , IOMode(ioMode) , IOModeTs(ioModeTs) , MuteIOErrors(muteIOErrors) @@ -243,6 +297,14 @@ struct TEvVolumePrivate { }; + // + // ReportLaggingDevicesToDR + // + + struct TReportLaggingDevicesToDR + { + }; + // // ShadowDiskAcquired // @@ -308,6 +370,22 @@ struct TEvVolumePrivate TExternalDrainDone() = default; }; + // + // DevicesAcquireFinished + // + + struct TDevicesAcquireFinished + { + }; + + // + // DevicesReleaseFinished + // + + struct TDevicesReleaseFinished + { + }; + // // Events declaration // @@ -330,6 +408,11 @@ struct TEvVolumePrivate EvRemoveExpiredVolumeParams, EvShadowDiskAcquired, EvExternalDrainDone, + EvDevicesAcquireFinished, + EvDevicesReleaseFinished, + EvReportLaggingDevicesToDR, + EvUpdateSmartMigrationState, + EvSmartMigrationFinished, EvEnd }; @@ -378,6 +461,21 @@ struct TEvVolumePrivate EvUpdateReadWriteClientInfo >; + using TEvUpdateSmartMigrationState = TRequestEvent< + TUpdateSmartMigrationState, + EvUpdateSmartMigrationState + >; + + using TEvSmartMigrationFinished = TRequestEvent< + TSmartMigrationFinished, + EvSmartMigrationFinished + >; + + using TEvReportLaggingDevicesToDR = TRequestEvent< + TReportLaggingDevicesToDR, + EvReportLaggingDevicesToDR + >; + using TEvRemoveExpiredVolumeParams = TRequestEvent< TRemoveExpiredVolumeParams, EvRemoveExpiredVolumeParams @@ -392,6 +490,12 @@ struct TEvVolumePrivate TExternalDrainDone, EvExternalDrainDone >; + + using TEvDevicesAcquireFinished = + TResponseEvent; + + using TEvDevicesReleaseFinished = + TResponseEvent; }; } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_lagging_agent_ut.cpp b/cloud/blockstore/libs/storage/volume/volume_lagging_agent_ut.cpp new file mode 100644 index 00000000000..1d9983becbb --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/volume_lagging_agent_ut.cpp @@ -0,0 +1,603 @@ +#include +#include +#include +#include +#include + +#include + +namespace NCloud::NBlockStore::NStorage { + +using namespace NActors; +using namespace NKikimr; +using namespace NCloud::NBlockStore::NStorage::NPartition; +using namespace NCloud::NStorage; +using namespace NTestVolume; + +namespace { + +TVector MakeDeviceList(ui32 agentCount, ui32 deviceCount) +{ + TVector result; + for (ui32 i = 1; i <= agentCount; i++) { + for (ui32 j = 0; j < deviceCount; j++) { + auto device = MakeDevice( + Sprintf("uuid-%u.%u", i, j), + Sprintf("dev%u", j), + Sprintf("transport%u-%u", i, j)); + device.SetNodeId(i - 1); + device.SetAgentId(Sprintf("agent-%u", i)); + result.push_back(std::move(device)); + } + } + return result; +} + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(TLaggingAgentVolumeTest) +{ + Y_UNIT_TEST(ShouldHandleDeviceTimeouted) + { + constexpr ui32 AgentCount = 3; + auto diskRegistryState = MakeIntrusive(); + diskRegistryState->Devices = MakeDeviceList(AgentCount, 3); + diskRegistryState->AllocateDiskReplicasOnDifferentNodes = true; + diskRegistryState->ReplicaCount = 2; + TVector agentStates; + for (ui32 i = 0; i < AgentCount; i++) { + agentStates.push_back(TDiskAgentStatePtr{}); + } + auto runtime = PrepareTestActorRuntime( + {}, + diskRegistryState, + {}, + {}, + std::move(agentStates)); + + // Create mirror-3 volume with a size of 1 device. + TVolumeClient volume(*runtime); + const ui64 blockCount = + DefaultDeviceBlockCount * DefaultDeviceBlockSize / DefaultBlockSize; + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, // version + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + blockCount); + + volume.WaitReady(); + + auto stat = volume.StatVolume(); + const auto& devices = stat->Record.GetVolume().GetDevices(); + const auto& replicas = stat->Record.GetVolume().GetReplicas(); + UNIT_ASSERT_VALUES_EQUAL(1, devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1.0", devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-1", devices[0].GetAgentId()); + + UNIT_ASSERT_VALUES_EQUAL(2, replicas.size()); + UNIT_ASSERT_VALUES_EQUAL(1, replicas[0].DevicesSize()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-2.0", + replicas[0].GetDevices(0).GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL( + "agent-2", + replicas[0].GetDevices(0).GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL(1, replicas[1].DevicesSize()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-3.0", + replicas[1].GetDevices(0).GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL( + "agent-3", + replicas[1].GetDevices(0).GetAgentId()); + + std::optional + addLaggingAgentRequest; + runtime->SetEventFilter( + [&](TTestActorRuntimeBase&, TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvPartition::EvAddLaggingAgentRequest: { + auto* msg = event->Get< + TEvPartition::TEvAddLaggingAgentRequest>(); + UNIT_ASSERT(!addLaggingAgentRequest.has_value()); + addLaggingAgentRequest = *msg; + return true; + } + case TEvPartition::EvRemoveLaggingReplicaRequest: { + auto* msg = event->Get< + TEvPartition::TEvRemoveLaggingReplicaRequest>(); + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL( + msg->ReplicaIndex, + addLaggingAgentRequest->ReplicaIndex); + addLaggingAgentRequest.reset(); + return true; + } + } + return false; + }); + + // Device in the first replica is timeouted. + volume.DeviceTimeouted("uuid-2.0"); + + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL( + replicas[0].GetDevices(0).GetAgentId(), + addLaggingAgentRequest->AgentId); + UNIT_ASSERT_VALUES_EQUAL(1, addLaggingAgentRequest->ReplicaIndex); + + // Can't add more lagging devices in the same row. + volume.SendDeviceTimeoutedRequest("uuid-3.0"); + auto response = volume.RecvDeviceTimeoutedResponse(); + UNIT_ASSERT_VALUES_EQUAL( + E_INVALID_STATE, + response->GetError().GetCode()); + + // Agent devices are now up-to-date. + volume.SendToPipe( + std::make_unique( + "agent-2")); + runtime->DispatchEvents({}, TDuration::Seconds(1)); + UNIT_ASSERT(!addLaggingAgentRequest.has_value()); + + // Now the zeroth replica can lag. + volume.DeviceTimeouted("uuid-1.0"); + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL( + devices[0].GetAgentId(), + addLaggingAgentRequest->AgentId); + UNIT_ASSERT_VALUES_EQUAL(0, addLaggingAgentRequest->ReplicaIndex); + } + + Y_UNIT_TEST(ShouldHandleTabletReboot) + { + constexpr ui32 AgentCount = 6; + constexpr ui32 DevicePerAgentCount = 2; + auto diskRegistryState = MakeIntrusive(); + diskRegistryState->Devices = + MakeDeviceList(AgentCount, DevicePerAgentCount); + diskRegistryState->AllocateDiskReplicasOnDifferentNodes = true; + diskRegistryState->ReplicaCount = 2; + TVector agentStates; + for (ui32 i = 0; i < AgentCount; i++) { + agentStates.push_back(TDiskAgentStatePtr{}); + } + auto runtime = PrepareTestActorRuntime( + {}, + diskRegistryState, + {}, + {}, + std::move(agentStates)); + + TVolumeClient volume(*runtime); + const ui64 blockCount = DefaultDeviceBlockCount * + DefaultDeviceBlockSize / DefaultBlockSize * 3; + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, // version + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + blockCount); + volume.WaitReady(); + + auto stat = volume.StatVolume(); + const auto& devices = stat->Record.GetVolume().GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1.0", devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-1", devices[0].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1.1", devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-1", devices[1].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-4.0", devices[2].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-4", devices[2].GetAgentId()); + + const auto& replicas = stat->Record.GetVolume().GetReplicas(); + UNIT_ASSERT_VALUES_EQUAL(2, replicas.size()); + const auto& replica1Devices = replicas[0].GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, replica1Devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-2.0", replica1Devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-2", replica1Devices[0].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-2.1", replica1Devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-2", replica1Devices[1].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-5.0", replica1Devices[2].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-5", replica1Devices[2].GetAgentId()); + + const auto& replica2Devices = replicas[1].GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, replica2Devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-3.0", replica2Devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-3", replica2Devices[0].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-3.1", replica2Devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-3", replica2Devices[1].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-6.0", replica2Devices[2].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-6", replica2Devices[2].GetAgentId()); + + std::optional + addLaggingAgentRequest; + std::optional + addLaggingDevicesRequest; + runtime->SetEventFilter( + [&](TTestActorRuntimeBase&, TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvPartition::EvAddLaggingAgentRequest: { + auto* msg = event->Get< + TEvPartition::TEvAddLaggingAgentRequest>(); + UNIT_ASSERT(!addLaggingAgentRequest.has_value()); + addLaggingAgentRequest = *msg; + return true; + } + case TEvDiskRegistry::EvAddLaggingDevicesRequest: { + auto* msg = event->Get< + TEvDiskRegistry::TEvAddLaggingDevicesRequest>(); + addLaggingDevicesRequest = msg->Record; + break; + } + } + return false; + }); + + // Device in the zeroth replica is timeouted. + volume.DeviceTimeouted("uuid-1.1"); + + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL( + devices[1].GetAgentId(), + addLaggingAgentRequest->AgentId); + UNIT_ASSERT_VALUES_EQUAL(0, addLaggingAgentRequest->ReplicaIndex); + + { + addLaggingAgentRequest.reset(); + // The first agent is already lagging. + volume.SendDeviceTimeoutedRequest("uuid-1.0"); + auto response = volume.RecvDeviceTimeoutedResponse(); + UNIT_ASSERT_VALUES_EQUAL(S_ALREADY, response->GetError().GetCode()); + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL( + devices[0].GetAgentId(), + addLaggingAgentRequest->AgentId); + } + + { + // 0 and 1st rows already lagging. Can't add more lagging devices on + // these rows. + volume.SendDeviceTimeoutedRequest("uuid-2.1"); + auto response = volume.RecvDeviceTimeoutedResponse(); + UNIT_ASSERT_VALUES_EQUAL( + E_INVALID_STATE, + response->GetError().GetCode()); + } + + // Adding the second row to lagging. + addLaggingAgentRequest.reset(); + volume.DeviceTimeouted("uuid-6.0"); + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL( + replica2Devices[2].GetAgentId(), + addLaggingAgentRequest->AgentId); + + // Rebooting the volume tablet should report lagging devices to the DR. + UNIT_ASSERT(!addLaggingDevicesRequest.has_value()); + volume.RebootTablet(); + runtime->DispatchEvents({}, TDuration::Seconds(1)); + UNIT_ASSERT(addLaggingDevicesRequest.has_value()); + + UNIT_ASSERT_VALUES_EQUAL("vol0", addLaggingDevicesRequest->GetDiskId()); + UNIT_ASSERT_VALUES_EQUAL( + 3, + addLaggingDevicesRequest->GetLaggingDevices().size()); + UNIT_ASSERT_VALUES_EQUAL( + "DeviceUUID: \"uuid-1.0\"\n", + addLaggingDevicesRequest->GetLaggingDevices(0).DebugString()); + UNIT_ASSERT_VALUES_EQUAL( + "DeviceUUID: \"uuid-1.1\"\nRowIndex: 1\n", + addLaggingDevicesRequest->GetLaggingDevices(1).DebugString()); + UNIT_ASSERT_VALUES_EQUAL( + "DeviceUUID: \"uuid-6.0\"\nRowIndex: 2\n", + addLaggingDevicesRequest->GetLaggingDevices(2).DebugString()); + + // Disk Registry will remove lagging devices on reallocation. + volume.ReallocateDisk(); + auto metaHistoryResponse = volume.ReadMetaHistory(); + UNIT_ASSERT(!metaHistoryResponse->MetaHistory.empty()); + UNIT_ASSERT_VALUES_EQUAL( + 0, + metaHistoryResponse->MetaHistory.back() + .Meta.GetLaggingAgentsInfo() + .AgentsSize()); + } + + Y_UNIT_TEST(ShouldHandleUpdateVolumeConfig) + { + constexpr ui32 AgentCount = 6; + constexpr ui32 DevicePerAgentCount = 2; + auto diskRegistryState = MakeIntrusive(); + diskRegistryState->Devices = + MakeDeviceList(AgentCount, DevicePerAgentCount); + diskRegistryState->AllocateDiskReplicasOnDifferentNodes = true; + diskRegistryState->ReplicaCount = 2; + TVector agentStates; + for (ui32 i = 0; i < AgentCount; i++) { + agentStates.push_back(TDiskAgentStatePtr{}); + } + auto runtime = PrepareTestActorRuntime( + {}, + diskRegistryState, + {}, + {}, + std::move(agentStates)); + + TVolumeClient volume(*runtime); + const ui64 blockCount = DefaultDeviceBlockCount * + DefaultDeviceBlockSize / DefaultBlockSize * 3; + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, // version + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + blockCount); + volume.WaitReady(); + + auto stat = volume.StatVolume(); + const auto& devices = stat->Record.GetVolume().GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, devices.size()); + + std::optional + addLaggingDevicesRequest; + runtime->SetEventFilter( + [&](TTestActorRuntimeBase&, TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvPartition::EvAddLaggingAgentRequest: { + return true; + } + case TEvDiskRegistry::EvAddLaggingDevicesRequest: { + auto* msg = event->Get< + TEvDiskRegistry::TEvAddLaggingDevicesRequest>(); + addLaggingDevicesRequest = msg->Record; + break; + } + } + return false; + }); + + // Device in the zeroth replica is timeouted. + volume.DeviceTimeouted("uuid-1.1"); + + UNIT_ASSERT(!addLaggingDevicesRequest.has_value()); + // Update volume config. + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 2, // version + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + blockCount); + volume.WaitReady(); + UNIT_ASSERT(addLaggingDevicesRequest.has_value()); + + auto metaHistoryResponse = volume.ReadMetaHistory(); + UNIT_ASSERT(!metaHistoryResponse->MetaHistory.empty()); + + // Make sure that lagging devices are still there. + auto historyItem = metaHistoryResponse->MetaHistory.back(); + UNIT_ASSERT_VALUES_EQUAL( + 1, + historyItem.Meta.GetLaggingAgentsInfo().AgentsSize()); + UNIT_ASSERT_VALUES_EQUAL( + "agent-1", + historyItem.Meta.GetLaggingAgentsInfo() + .GetAgents()[0] + .GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL( + 2, + historyItem.Meta.GetLaggingAgentsInfo() + .GetAgents()[0] + .GetDevices() + .size()); + } + + Y_UNIT_TEST(ShouldHandleMigratingDevice) + { + constexpr ui32 AgentCount = 8; + constexpr ui32 DevicePerAgentCount = 2; + auto diskRegistryState = MakeIntrusive(); + diskRegistryState->Devices = + MakeDeviceList(AgentCount - 2, DevicePerAgentCount); + diskRegistryState->AllocateDiskReplicasOnDifferentNodes = true; + diskRegistryState->ReplicaCount = 2; + diskRegistryState->MigrationMode = EMigrationMode::InProgress; + + // Add migration devices. + { + auto device = MakeDevice( + "uuid-migration-1", + "dev-migration-1", + "transport-migration-1"); + device.SetNodeId(AgentCount - 2); + device.SetAgentId(Sprintf("agent-%u", AgentCount - 1)); + diskRegistryState->MigrationDevices["uuid-1.0"] = device; + diskRegistryState->Devices.push_back(device); + } + { + auto device = MakeDevice( + "uuid-migration-2", + "dev-migration-2", + "transport-migration-2"); + device.SetNodeId(AgentCount - 1); + device.SetAgentId(Sprintf("agent-%u", AgentCount)); + diskRegistryState->MigrationDevices["uuid-6.0"] = device; + diskRegistryState->Devices.push_back(device); + } + + TVector agentStates; + for (ui32 i = 0; i < AgentCount; i++) { + agentStates.push_back(TDiskAgentStatePtr{}); + } + auto runtime = PrepareTestActorRuntime( + {}, + diskRegistryState, + {}, + {}, + std::move(agentStates)); + + TVolumeClient volume(*runtime); + const ui64 blockCount = DefaultDeviceBlockCount * + DefaultDeviceBlockSize / DefaultBlockSize * 3; + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, // version + NCloud::NProto::STORAGE_MEDIA_SSD_MIRROR3, + blockCount); + volume.WaitReady(); + + auto stat = volume.StatVolume(); + const auto& devices = stat->Record.GetVolume().GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, devices.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1.0", devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-1", devices[0].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1.1", devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-1", devices[1].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL("uuid-4.0", devices[2].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-4", devices[2].GetAgentId()); + + const auto& replicas = stat->Record.GetVolume().GetReplicas(); + UNIT_ASSERT_VALUES_EQUAL(2, replicas.size()); + const auto& replica1Devices = replicas[0].GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, replica1Devices.size()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-2.0", + replica1Devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-2", replica1Devices[0].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-2.1", + replica1Devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-2", replica1Devices[1].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-5.0", + replica1Devices[2].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-5", replica1Devices[2].GetAgentId()); + + const auto& replica2Devices = replicas[1].GetDevices(); + UNIT_ASSERT_VALUES_EQUAL(3, replica2Devices.size()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-3.0", + replica2Devices[0].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-3", replica2Devices[0].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-3.1", + replica2Devices[1].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-3", replica2Devices[1].GetAgentId()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-6.0", + replica2Devices[2].GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("agent-6", replica2Devices[2].GetAgentId()); + + const auto& migrations = stat->Record.GetVolume().GetMigrations(); + UNIT_ASSERT_VALUES_EQUAL(2, migrations.size()); + UNIT_ASSERT_VALUES_EQUAL("uuid-1.0", migrations[0].GetSourceDeviceId()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-migration-1", + migrations[0].GetTargetDevice().GetDeviceUUID()); + UNIT_ASSERT_VALUES_EQUAL("uuid-6.0", migrations[1].GetSourceDeviceId()); + UNIT_ASSERT_VALUES_EQUAL( + "uuid-migration-2", + migrations[1].GetTargetDevice().GetDeviceUUID()); + + std::optional + addLaggingAgentRequest; + std::optional + addLaggingDevicesRequest; + runtime->SetEventFilter( + [&](TTestActorRuntimeBase&, TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvPartition::EvAddLaggingAgentRequest: { + auto* msg = event->Get< + TEvPartition::TEvAddLaggingAgentRequest>(); + UNIT_ASSERT(!addLaggingAgentRequest.has_value()); + addLaggingAgentRequest = *msg; + return true; + } + case TEvDiskRegistry::EvAddLaggingDevicesRequest: { + auto* msg = event->Get< + TEvDiskRegistry::TEvAddLaggingDevicesRequest>(); + addLaggingDevicesRequest = msg->Record; + break; + } + } + return false; + }); + + // Device in the zeroth replica is timeouted. + volume.DeviceTimeouted("uuid-migration-1"); + + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL("agent-7", addLaggingAgentRequest->AgentId); + UNIT_ASSERT_VALUES_EQUAL(0, addLaggingAgentRequest->ReplicaIndex); + addLaggingAgentRequest.reset(); + + // Device in the second replica is timeouted. + volume.DeviceTimeouted("uuid-migration-2"); + + UNIT_ASSERT(addLaggingAgentRequest.has_value()); + UNIT_ASSERT_VALUES_EQUAL("agent-8", addLaggingAgentRequest->AgentId); + UNIT_ASSERT_VALUES_EQUAL(2, addLaggingAgentRequest->ReplicaIndex); + + { + addLaggingAgentRequest.reset(); + // The zeroth row is already lagging. + volume.SendDeviceTimeoutedRequest("uuid-1.0"); + auto response = volume.RecvDeviceTimeoutedResponse(); + UNIT_ASSERT_VALUES_EQUAL( + E_INVALID_STATE, + response->GetError().GetCode()); + UNIT_ASSERT(!addLaggingAgentRequest.has_value()); + } + + // Rebooting the volume tablet should report lagging devices to the DR. + UNIT_ASSERT(!addLaggingDevicesRequest.has_value()); + volume.RebootTablet(); + runtime->DispatchEvents({}, TDuration::Seconds(1)); + UNIT_ASSERT(addLaggingDevicesRequest.has_value()); + + UNIT_ASSERT_VALUES_EQUAL("vol0", addLaggingDevicesRequest->GetDiskId()); + UNIT_ASSERT_VALUES_EQUAL( + 2, + addLaggingDevicesRequest->GetLaggingDevices().size()); + UNIT_ASSERT_VALUES_EQUAL( + "DeviceUUID: \"uuid-migration-1\"\n", + addLaggingDevicesRequest->GetLaggingDevices(0).DebugString()); + UNIT_ASSERT_VALUES_EQUAL( + "DeviceUUID: \"uuid-migration-2\"\nRowIndex: 2\n", + addLaggingDevicesRequest->GetLaggingDevices(1).DebugString()); + + // Disk Registry will remove lagging devices on reallocation. + volume.ReallocateDisk(); + auto metaHistoryResponse = volume.ReadMetaHistory(); + UNIT_ASSERT(!metaHistoryResponse->MetaHistory.empty()); + UNIT_ASSERT_VALUES_EQUAL( + 0, + metaHistoryResponse->MetaHistory.back() + .Meta.GetLaggingAgentsInfo() + .AgentsSize()); + } +} + +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_state.cpp b/cloud/blockstore/libs/storage/volume/volume_state.cpp index 2a8fbd0d8ec..e0d86184872 100644 --- a/cloud/blockstore/libs/storage/volume/volume_state.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_state.cpp @@ -197,6 +197,51 @@ TRuntimeVolumeParams& TVolumeState::GetVolumeParams() return VolumeParams; } +void TVolumeState::AddLaggingAgent(NProto::TLaggingAgent agent) +{ + Meta.MutableLaggingAgentsInfo()->MutableAgents()->Add(std::move(agent)); +} + +std::optional TVolumeState::RemoveLaggingAgent( + const TString& agentId) +{ + auto agentIdPredicate = [&agentId](const auto& info) + { + return info.GetAgentId() == agentId; + }; + + auto& laggingAgents = *Meta.MutableLaggingAgentsInfo()->MutableAgents(); + Y_DEBUG_ABORT_UNLESS(CountIf(laggingAgents, agentIdPredicate) <= 1); + auto it = FindIf(laggingAgents, agentIdPredicate); + if (it != laggingAgents.end()) { + NProto::TLaggingAgent laggingAgent = std::move(*it); + laggingAgents.erase(it); + return laggingAgent; + } + return std::nullopt; +} + +bool TVolumeState::HasLaggingInReplica(ui32 replicaIndex) const +{ + for (const auto& agent: Meta.GetLaggingAgentsInfo().GetAgents()) { + if (agent.GetReplicaIndex() == replicaIndex) { + return true; + } + } + return false; +} + +THashSet TVolumeState::GetLaggingDevices() const +{ + THashSet laggingDevices; + for (const auto& agent: Meta.GetLaggingAgentsInfo().GetAgents()) { + for (const auto& device: agent.GetDevices()) { + laggingDevices.insert(device.GetDeviceUUID()); + } + } + return laggingDevices; +} + void TVolumeState::ResetMeta(NProto::TVolumeMeta meta) { Meta = std::move(meta); @@ -310,7 +355,7 @@ void TVolumeState::Reset() TDuration::TryParse(value, MaxTimedOutDeviceStateDuration); } else if (tag == "use-fastpath") { UseFastPath = true; - } else if (tag == "use-intermediate-write-buffer") { + } else if (tag == IntermediateWriteBufferTagName) { UseIntermediateWriteBuffer = true; } } @@ -811,6 +856,31 @@ const THashMultiMap& TVolumeState::GetPipeServerId2ClientId() return ClientIdsByPipeServerId; } +TVector +TVolumeState::GetAllDevicesForAcquireRelease() const +{ + const size_t allDevicesCount = + ((Meta.ReplicasSize() + 1) * Meta.DevicesSize()) + + GetMeta().MigrationsSize(); + + TVector resultDevices; + resultDevices.reserve(allDevicesCount); + + for (const auto& device: Meta.GetDevices()) { + resultDevices.emplace_back(device); + } + for (const auto& replica: Meta.GetReplicas()) { + for (const auto& device: replica.GetDevices()) { + resultDevices.emplace_back(device); + } + } + for (const auto& migration: Meta.GetMigrations()) { + resultDevices.emplace_back(migration.GetTargetDevice()); + } + + return resultDevices; +} + bool TVolumeState::CanPreemptClient( const TString& oldClientId, TInstant referenceTimestamp, diff --git a/cloud/blockstore/libs/storage/volume/volume_state.h b/cloud/blockstore/libs/storage/volume/volume_state.h index 7b92e555392..6754d2e1649 100644 --- a/cloud/blockstore/libs/storage/volume/volume_state.h +++ b/cloud/blockstore/libs/storage/volume/volume_state.h @@ -240,7 +240,6 @@ class TVolumeState // The number of blocks that need to be migrated to complete the migration. std::optional BlockCountToMigrate; - public: TVolumeState( TStorageConfigPtr storageConfig, @@ -309,6 +308,12 @@ class TVolumeState Meta.SetFillSeqNumber(fillSeqNumber); } + void AddLaggingAgent(NProto::TLaggingAgent agent); + std::optional RemoveLaggingAgent( + const TString& agentId); + [[nodiscard]] bool HasLaggingInReplica(ui32 replicaIndex) const; + [[nodiscard]] THashSet GetLaggingDevices() const; + void SetStartPartitionsNeeded(bool startPartitionsNeeded) { StartPartitionsNeeded = startPartitionsNeeded; @@ -725,6 +730,8 @@ class TVolumeState return Meta.GetResyncNeeded(); } + TVector GetAllDevicesForAcquireRelease() const; + private: bool CanPreemptClient( const TString& oldClientId, diff --git a/cloud/blockstore/libs/storage/volume/volume_state_ut.cpp b/cloud/blockstore/libs/storage/volume/volume_state_ut.cpp index 5c3e28333b2..e5ade7417a9 100644 --- a/cloud/blockstore/libs/storage/volume/volume_state_ut.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_state_ut.cpp @@ -107,11 +107,11 @@ TVolumeState CreateVolumeState( MakeConfig(inactiveClientsTimeout, {}), CreateDiagnosticsConfig(), CreateVolumeMeta(pp), - {{TInstant::Seconds(100), CreateVolumeMeta(pp)}}, // metaHistory + {{TInstant::Seconds(100), CreateVolumeMeta(pp)}}, // metaHistory {}, CreateThrottlerConfig(), std::move(clientInfos), - {}, + TCachedVolumeMountHistory{VolumeHistoryCacheSize, {}}, std::move(checkpointRequests), false); } @@ -1067,12 +1067,13 @@ Y_UNIT_TEST_SUITE(TVolumeStateTest) info1, MakeError(S_OK), {}, {}); const auto& history = volumeState.GetMountHistory().GetItems(); - if (i) { + if (history.size() >= 2) { UNIT_ASSERT( res.Key.Timestamp != history[1].Key.Timestamp || res.Key.SeqNo != history[1].Key.SeqNo); } UNIT_ASSERT(history.size() <= VolumeHistoryCacheSize); + UNIT_ASSERT(history.size() > 0); } } @@ -1938,6 +1939,40 @@ Y_UNIT_TEST_SUITE(TVolumeStateTest) UNIT_ASSERT(!state.GetTrackUsedBlocks()); } } + + Y_UNIT_TEST(AcquireDisk) + { + auto volumeState = CreateVolumeState(); + auto meta = volumeState.GetMeta(); + const TInstant oldDate = TInstant::ParseIso8601("2023-08-30"); + meta.MutableVolumeConfig()->SetCreationTs(oldDate.MicroSeconds()); + meta.AddDevices()->SetDeviceUUID("d1"); + meta.AddDevices()->SetDeviceUUID("d2"); + auto& r1 = *meta.AddReplicas(); + r1.AddDevices()->SetDeviceUUID("d3"); + r1.AddDevices()->SetDeviceUUID("d4"); + auto& r2 = *meta.AddReplicas(); + r2.AddDevices()->SetDeviceUUID("d5"); + r2.AddDevices()->SetDeviceUUID("d6"); + + NProto::TDeviceMigration deviceMigration; + deviceMigration.SetSourceDeviceId("d1"); + deviceMigration.MutableTargetDevice()->SetDeviceUUID("d7"); + + meta.MutableMigrations()->Add(std::move(deviceMigration)); + volumeState.ResetMeta(meta); + + const THashSet + deviceUUIDSExpected{"d1", "d2", "d3", "d4", "d5", "d6", "d7"}; + + auto devices = volumeState.GetAllDevicesForAcquireRelease(); + THashSet devicesUUIDSActual; + for (const auto& d: volumeState.GetAllDevicesForAcquireRelease()) { + devicesUUIDSActual.insert(d.GetDeviceUUID()); + } + + UNIT_ASSERT_EQUAL(deviceUUIDSExpected, devicesUUIDSActual); + } } } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_tx.h b/cloud/blockstore/libs/storage/volume/volume_tx.h index e878d8ffb62..d909b7014e0 100644 --- a/cloud/blockstore/libs/storage/volume/volume_tx.h +++ b/cloud/blockstore/libs/storage/volume/volume_tx.h @@ -41,6 +41,8 @@ namespace NCloud::NBlockStore::NStorage { xxx(DeleteVolumeParams, __VA_ARGS__) \ xxx(ChangeStorageConfig, __VA_ARGS__) \ xxx(ReadMetaHistory, __VA_ARGS__) \ + xxx(AddLaggingAgent, __VA_ARGS__) \ + xxx(RemoveLaggingAgent, __VA_ARGS__) \ // BLOCKSTORE_VOLUME_TRANSACTIONS //////////////////////////////////////////////////////////////////////////////// @@ -148,6 +150,7 @@ struct TTxVolume TMigrations Migrations; TVector Replicas; TVector FreshDeviceIds; + TVector RemovedLaggingDeviceIds; NProto::EVolumeIOMode IOMode; TInstant IOModeTs; bool MuteIOErrors; @@ -159,6 +162,7 @@ struct TTxVolume TMigrations migrations, TVector replicas, TVector freshDeviceIds, + TVector removedLaggingDeviceIds, NProto::EVolumeIOMode ioMode, TInstant ioModeTs, bool muteIOErrors) @@ -168,6 +172,7 @@ struct TTxVolume std::move(migrations), std::move(replicas), std::move(freshDeviceIds), + std::move(removedLaggingDeviceIds), ioMode, ioModeTs, muteIOErrors @@ -180,6 +185,7 @@ struct TTxVolume TMigrations migrations, TVector replicas, TVector freshDeviceIds, + TVector removedLaggingDeviceIds, NProto::EVolumeIOMode ioMode, TInstant ioModeTs, bool muteIOErrors) @@ -188,6 +194,7 @@ struct TTxVolume , Migrations(std::move(migrations)) , Replicas(std::move(replicas)) , FreshDeviceIds(std::move(freshDeviceIds)) + , RemovedLaggingDeviceIds(std::move(removedLaggingDeviceIds)) , IOMode(ioMode) , IOModeTs(ioModeTs) , MuteIOErrors(muteIOErrors) @@ -695,6 +702,48 @@ struct TTxVolume ResultStorageConfig.Clear(); } }; + + // + // AddLaggingAgent + // + + struct TAddLaggingAgent + { + const TRequestInfoPtr RequestInfo; + const NProto::TLaggingAgent Agent; + + TAddLaggingAgent( + TRequestInfoPtr requestInfo, + NProto::TLaggingAgent agent) + : RequestInfo(std::move(requestInfo)) + , Agent(std::move(agent)) + {} + + void Clear() + {} + }; + + // + // RemoveLaggingAgent + // + + struct TRemoveLaggingAgent + { + const TRequestInfoPtr RequestInfo; + const TString AgentId; + + NProto::TLaggingAgent RemovedLaggingAgent; + + TRemoveLaggingAgent(TRequestInfoPtr requestInfo, TString agentId) + : RequestInfo(std::move(requestInfo)) + , AgentId(std::move(agentId)) + {} + + void Clear() + { + RemovedLaggingAgent.Clear(); + } + }; }; } // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_ut.cpp b/cloud/blockstore/libs/storage/volume/volume_ut.cpp index f1c9f2287c5..74db76b2d20 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut.cpp @@ -8148,6 +8148,57 @@ Y_UNIT_TEST_SUITE(TVolumeTest) UNIT_ASSERT(partitionsStopped); } + Y_UNIT_TEST(ShouldGracefulyShutdownVolume) + { + auto runtime = PrepareTestActorRuntime(); + TVolumeClient volume(*runtime); + + bool partitionsStopped = false; + runtime->SetEventFilter( + [&](TTestActorRuntimeBase&, TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + // Poison pill send to DR based partition actor. + case TEvents::TEvPoisonPill::EventType: { + partitionsStopped = true; + break; + } + } + return false; + }); + + volume.UpdateVolumeConfig( + // default arguments + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED, + 1024, + "vol0", + "cloud", + "folder", + 1 // partitions count + ); + volume.RebootTablet(); + + auto clientInfo = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + false); + + volume.GracefulShutdown(); + UNIT_ASSERT(partitionsStopped); + + // Check that volume after TEvGracefulShutdownRequest + // in zombie state and rejects requsts. + volume.SendGetVolumeInfoRequest(); + auto response = volume.RecvGetVolumeInfoResponse(); + UNIT_ASSERT_VALUES_EQUAL(response->GetStatus(), E_REJECTED); + } + Y_UNIT_TEST(ShouldReturnClientsAndHostnameInStatVolumeResponse) { auto runtime = PrepareTestActorRuntime(); diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_checkpoint.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_checkpoint.cpp index e261d1de8ea..8d464a99444 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut_checkpoint.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut_checkpoint.cpp @@ -3733,6 +3733,60 @@ Y_UNIT_TEST_SUITE(TVolumeCheckpointTest) } } + Y_UNIT_TEST(ShouldStopAcquiringAfterENotFound) + { + NProto::TStorageServiceConfig config; + config.SetUseShadowDisksForNonreplDiskCheckpoints(true); + config.SetMaxAcquireShadowDiskTotalTimeoutWhenNonBlocked(2000); + + auto runtime = PrepareTestActorRuntime(config); + + auto describeDiskRequestsFilter = [&](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvDiskRegistry::EvDescribeDiskResponse) + { // Simulate response with E_NOT_FOUND error from DiskRegistry. + auto* msg = + event->Get(); + msg->Record.MutableError()->SetCode(E_NOT_FOUND); + } + return TTestActorRuntime::DefaultObserverFunc(event); + }; + runtime->SetObserverFunc(describeDiskRequestsFilter); + + // Create volume. + TVolumeClient volume(*runtime); + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED, + 32768); + + volume.WaitReady(); + + auto clientInfo = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + volume.AddClient(clientInfo); + + // Create checkpoint. + volume.CreateCheckpoint("c1"); + + // Reconnect pipe since partition has restarted. + volume.ReconnectPipe(); + + // Shadow disk entered the error state. + auto status = + volume.GetCheckpointStatus("c1")->Record.GetCheckpointStatus(); + + UNIT_ASSERT_EQUAL(NProto::ECheckpointStatus::ERROR, status); + } + Y_UNIT_TEST(ShouldBlockWritesWhenReAcquire) { NProto::TStorageServiceConfig config; diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_session.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_session.cpp new file mode 100644 index 00000000000..40ab75f4574 --- /dev/null +++ b/cloud/blockstore/libs/storage/volume/volume_ut_session.cpp @@ -0,0 +1,462 @@ +#include "volume_ut.h" + +#include +#include +#include +#include + +#include + +namespace NCloud::NBlockStore::NStorage { + +using namespace std::chrono_literals; + +using namespace NActors; + +using namespace NKikimr; + +using namespace NCloud::NBlockStore::NStorage::NPartition; + +using namespace NCloud::NStorage; + +using namespace NTestVolume; + +using namespace NTestVolumeHelpers; + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +struct TFixture: public NUnitTest::TBaseFixture +{ + std::unique_ptr Runtime; + TIntrusivePtr State; + + void SetupTest(TDuration agentRequestTimeout = 1s) + { + NProto::TStorageServiceConfig config; + config.SetAcquireNonReplicatedDevices(true); + config.SetNonReplicatedVolumeDirectAcquireEnabled(true); + config.SetAgentRequestTimeout(agentRequestTimeout.MilliSeconds()); + config.SetClientRemountPeriod(2000); + State = MakeIntrusive(); + Runtime = PrepareTestActorRuntime(config, State); + auto volume = GetVolumeClient(); + + volume.UpdateVolumeConfig( + 0, + 0, + 0, + 0, + false, + 1, + NCloud::NProto::STORAGE_MEDIA_SSD_NONREPLICATED, + 1024); + + volume.WaitReady(); + } + + TVolumeClient GetVolumeClient() const + { + return {*Runtime}; + } +}; + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(TVolumeSessionTest) +{ + Y_UNIT_TEST_F(ShouldPassAllParamsInAcquireDevicesRequest, TFixture) + { + SetupTest(); + + auto volume = GetVolumeClient(); + + auto response = volume.GetVolumeInfo(); + auto diskInfo = response->Record.GetVolume(); + THashSet devices; + for (const auto& d: diskInfo.GetDevices()) { + devices.emplace(d.GetDeviceUUID()); + } + for (const auto& m: diskInfo.GetMigrations()) { + devices.emplace(m.GetTargetDevice().GetDeviceUUID()); + } + + auto statVolumeResponse = volume.StatVolume(); + + bool requestSended = false; + + Runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvDiskAgent::EvAcquireDevicesRequest) + { + requestSended = true; + auto* acquireReq = + event->Get(); + Y_UNUSED(acquireReq); + + auto& record = acquireReq->Record; + + UNIT_ASSERT_EQUAL( + record.GetAccessMode(), + NProto::VOLUME_ACCESS_READ_WRITE); + + UNIT_ASSERT_EQUAL(record.GetDiskId(), diskInfo.GetDiskId()); + + const auto& deviceUUIDS = record.GetDeviceUUIDs(); + UNIT_ASSERT_EQUAL( + static_cast(deviceUUIDS.size()), + devices.size()); + for (const auto& deviceUUID: deviceUUIDS) { + UNIT_ASSERT(devices.contains(deviceUUID)); + } + + UNIT_ASSERT_EQUAL( + statVolumeResponse->Record.GetVolumeGeneration(), + record.GetVolumeGeneration()); + + UNIT_ASSERT_EQUAL( + statVolumeResponse->Record.GetMountSeqNumber(), + record.GetMountSeqNumber()); + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + volume.AddClient(writer); + + UNIT_ASSERT(requestSended); + } + + Y_UNIT_TEST_F(ShouldPassAllParamsInReleaseDevicesRequest, TFixture) + { + SetupTest(); + + auto volume = GetVolumeClient(); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + volume.AddClient(writer); + + auto response = volume.GetVolumeInfo(); + auto diskInfo = response->Record.GetVolume(); + THashSet devices; + for (const auto& d: diskInfo.GetDevices()) { + devices.emplace(d.GetDeviceUUID()); + } + for (const auto& m: diskInfo.GetMigrations()) { + devices.emplace(m.GetTargetDevice().GetDeviceUUID()); + } + + auto statVolumeResponse = volume.StatVolume(); + + bool requestSended = false; + + Runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvDiskAgent::EvReleaseDevicesRequest) + { + requestSended = true; + auto* acquireReq = + event->Get(); + Y_UNUSED(acquireReq); + + auto& record = acquireReq->Record; + + UNIT_ASSERT_EQUAL(record.GetDiskId(), diskInfo.GetDiskId()); + + const auto& deviceUUIDS = record.GetDeviceUUIDs(); + UNIT_ASSERT_EQUAL( + static_cast(deviceUUIDS.size()), + devices.size()); + for (const auto& deviceUUID: deviceUUIDS) { + UNIT_ASSERT(devices.contains(deviceUUID)); + } + + UNIT_ASSERT_EQUAL( + statVolumeResponse->Record.GetVolumeGeneration(), + record.GetVolumeGeneration()); + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + volume.RemoveClient(writer.GetClientId()); + + UNIT_ASSERT(requestSended); + } + + Y_UNIT_TEST_F(ShouldSendAcquireReleaseRequestsDirectlyToDiskAgent, TFixture) + { + SetupTest(); + + TVolumeClient writerClient = GetVolumeClient(); + auto readerClient1 = GetVolumeClient(); + auto readerClient2 = GetVolumeClient(); + + ui32 acquireRequestsToDiskRegistry = 0; + ui32 releaseRequestsToDiskRegistry = 0; + ui32 readerAcquireRequests = 0; + ui32 writerAcquireRequests = 0; + ui32 releaseRequests = 0; + + Runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + switch (event->GetTypeRewrite()) { + case TEvDiskRegistry::EvAcquireDiskRequest: + ++acquireRequestsToDiskRegistry; + break; + case TEvDiskRegistry::EvReleaseDiskRequest: + ++releaseRequestsToDiskRegistry; + break; + case TEvDiskAgent::EvAcquireDevicesRequest: { + auto* msg = + event + ->Get(); + if (msg->Record.GetAccessMode() == + NProto::VOLUME_ACCESS_READ_ONLY) + { + ++readerAcquireRequests; + } else { + ++writerAcquireRequests; + } + break; + } + case TEvDiskAgent::EvReleaseDevicesRequest: + ++releaseRequests; + break; + default: + break; + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + Runtime->AdvanceCurrentTime(2s); + Runtime->DispatchEvents({}, 1ms); + + UNIT_ASSERT_VALUES_EQUAL(acquireRequestsToDiskRegistry, 0); + UNIT_ASSERT_VALUES_EQUAL(writerAcquireRequests, 0); + UNIT_ASSERT_VALUES_EQUAL(readerAcquireRequests, 0); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + writerClient.AddClient(writer); + + UNIT_ASSERT_VALUES_EQUAL(acquireRequestsToDiskRegistry, 0); + UNIT_ASSERT_VALUES_EQUAL(writerAcquireRequests, 1); + UNIT_ASSERT_VALUES_EQUAL(readerAcquireRequests, 0); + + auto reader1 = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_ONLY, + NProto::VOLUME_MOUNT_REMOTE, + 0); + readerClient1.AddClient(reader1); + + auto reader2 = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_ONLY, + NProto::VOLUME_MOUNT_REMOTE, + 0); + readerClient2.AddClient(reader2); + + UNIT_ASSERT_VALUES_EQUAL(acquireRequestsToDiskRegistry, 0); + UNIT_ASSERT_VALUES_EQUAL(writerAcquireRequests, 1); + UNIT_ASSERT_VALUES_EQUAL(readerAcquireRequests, 2); + + Runtime->AdvanceCurrentTime(2s); + Runtime->DispatchEvents({}, 1ms); + + UNIT_ASSERT_VALUES_EQUAL(acquireRequestsToDiskRegistry, 0); + UNIT_ASSERT_VALUES_EQUAL(writerAcquireRequests, 2); + UNIT_ASSERT_VALUES_EQUAL(readerAcquireRequests, 4); + + UNIT_ASSERT_VALUES_EQUAL(releaseRequests, 0); + UNIT_ASSERT_VALUES_EQUAL(releaseRequestsToDiskRegistry, 0); + + readerClient1.RemoveClient(reader1.GetClientId()); + + Runtime->AdvanceCurrentTime(2s); + Runtime->DispatchEvents({}, 1ms); + + UNIT_ASSERT_VALUES_EQUAL(acquireRequestsToDiskRegistry, 0); + UNIT_ASSERT_VALUES_EQUAL(writerAcquireRequests, 3); + UNIT_ASSERT_VALUES_EQUAL(readerAcquireRequests, 5); + + UNIT_ASSERT_VALUES_EQUAL(releaseRequests, 1); + UNIT_ASSERT_VALUES_EQUAL(releaseRequestsToDiskRegistry, 0); + + writerClient.RemoveClient(writer.GetClientId()); + readerClient2.RemoveClient(reader2.GetClientId()); + + Runtime->AdvanceCurrentTime(2s); + Runtime->DispatchEvents({}, 1ms); + + UNIT_ASSERT_VALUES_EQUAL(acquireRequestsToDiskRegistry, 0); + UNIT_ASSERT_VALUES_EQUAL(writerAcquireRequests, 3); + UNIT_ASSERT_VALUES_EQUAL(readerAcquireRequests, 5); + + UNIT_ASSERT_VALUES_EQUAL(releaseRequests, 3); + UNIT_ASSERT_VALUES_EQUAL(releaseRequestsToDiskRegistry, 0); + } + + Y_UNIT_TEST_F(ShouldRejectTimedoutAcquireRequests, TFixture) + { + SetupTest(100ms); + + auto writerClient = GetVolumeClient(); + + std::unique_ptr stollenResponse; + Runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvDiskAgent::EvAcquireDevicesResponse) + { + stollenResponse.reset(event.Release()); + return TTestActorRuntimeBase::EEventAction::DROP; + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + + writerClient.SendAddClientRequest(writer); + auto response = writerClient.RecvAddClientResponse(); + UNIT_ASSERT_EQUAL(response->GetError().GetCode(), E_REJECTED); + UNIT_ASSERT_VALUES_EQUAL(response->GetError().GetMessage(), "timeout"); + } + + Y_UNIT_TEST_F(ShouldPassErrorsFromDiskAgent, TFixture) + { + SetupTest(); + + auto writerClient = GetVolumeClient(); + + Runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvDiskAgent::EvAcquireDevicesResponse) + { + auto response = std::make_unique< + TEvDiskAgent::TEvAcquireDevicesResponse>( + MakeError(E_TRY_AGAIN)); + + Runtime->Send(new IEventHandle( + event->Recipient, + event->Sender, + response.release(), + 0, // flags + event->Cookie)); + + return TTestActorRuntime::EEventAction::DROP; + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + + writerClient.SendAddClientRequest(writer); + auto response = writerClient.RecvAddClientResponse(); + + UNIT_ASSERT_EQUAL(response->GetError().GetCode(), E_TRY_AGAIN); + } + + Y_UNIT_TEST_F(ShouldMuteErrorsWithMuteIoErrors, TFixture) + { + SetupTest(); + + auto writerClient = GetVolumeClient(); + + Runtime->SetObserverFunc( + [&](TAutoPtr& event) + { + if (event->GetTypeRewrite() == + TEvDiskAgent::EvAcquireDevicesResponse) + { + auto response = std::make_unique< + TEvDiskAgent::TEvAcquireDevicesResponse>( + MakeError(E_TRY_AGAIN)); + + Runtime->Send(new IEventHandle( + event->Recipient, + event->Sender, + response.release(), + 0, // flags + event->Cookie)); + + return TTestActorRuntime::EEventAction::DROP; + } + + return TTestActorRuntime::DefaultObserverFunc(event); + }); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + + auto& disk = State->Disks.at("vol0"); + disk.IOMode = NProto::VOLUME_IO_ERROR_READ_ONLY; + disk.IOModeTs = Runtime->GetCurrentTime(); + disk.MuteIOErrors = true; + + auto volume = GetVolumeClient(); + volume.ReallocateDisk(); + // reallocate disk will trigger pipes reset, so reestablish connection + volume.ReconnectPipe(); + + writerClient.AddClient(writer); + } + + Y_UNIT_TEST_F(ShouldHandleRequestsUndelivery, TFixture) + { + SetupTest(); + + auto writerClient = GetVolumeClient(); + + auto writer = CreateVolumeClientInfo( + NProto::VOLUME_ACCESS_READ_WRITE, + NProto::VOLUME_MOUNT_LOCAL, + 0); + + auto agentNodeId = MakeDiskAgentServiceId( + State->Disks.at("vol0").Devices[0].GetNodeId()); + + Runtime->Send(new IEventHandle( + agentNodeId, + TActorId(), + new TEvents::TEvPoisonPill)); + + writerClient.SendAddClientRequest(writer); + + auto response = writerClient.RecvAddClientResponse(); + + UNIT_ASSERT_EQUAL(response->GetError().GetCode(), E_REJECTED); + UNIT_ASSERT_EQUAL(response->GetError().GetMessage(), "not delivered"); + } +} +} // namespace NCloud::NBlockStore::NStorage diff --git a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp index 03575694686..231c810d5d2 100644 --- a/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp +++ b/cloud/blockstore/libs/storage/volume/volume_ut_stats.cpp @@ -534,11 +534,12 @@ Y_UNIT_TEST_SUITE(TVolumeStatsTest) ctx, owner, CreateRequestInfo(ev->Sender, ev->Cookie, msg->CallContext), - std::move(record)); + std::move(record), + 1000); }; auto runtime = - PrepareTestActorRuntime(config, {}, {}, {}, diskAgentState); + PrepareTestActorRuntime(config, {}, {}, {}, {diskAgentState}); struct TReadAndWriteByteCount { diff --git a/cloud/blockstore/libs/storage/volume/ya.make b/cloud/blockstore/libs/storage/volume/ya.make index 77f72624e71..1a85121181b 100644 --- a/cloud/blockstore/libs/storage/volume/ya.make +++ b/cloud/blockstore/libs/storage/volume/ya.make @@ -7,6 +7,7 @@ SRCS( volume.cpp volume_actor_addclient.cpp + volume_actor_acquire.cpp volume_actor_allocatedisk.cpp volume_actor_change_storage_config.cpp volume_actor_checkpoint.cpp @@ -14,6 +15,7 @@ SRCS( volume_actor_forward.cpp volume_actor_forward_trackused.cpp volume_actor_initschema.cpp + volume_actor_lagging_agents.cpp volume_actor_loadstate.cpp volume_actor_migration.cpp volume_actor_monitoring_checkpoint.cpp @@ -22,6 +24,7 @@ SRCS( volume_actor_read_history.cpp volume_actor_read_meta_history.cpp volume_actor_reallocatedisk.cpp + volume_actor_release.cpp volume_actor_removeclient.cpp volume_actor_reset_seqnumber.cpp volume_actor_resync.cpp diff --git a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.cpp b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.cpp index 68f688cdae6..d4a7cf2d845 100644 --- a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.cpp +++ b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.cpp @@ -11,14 +11,14 @@ using namespace NActors; IActorPtr CreateVolumeBalancerActor( TStorageConfigPtr storageConfig, IVolumeStatsPtr volumeStats, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatFetcher, + NCloud::NStorage::IStatsFetcherPtr statFetcher, IVolumeBalancerSwitchPtr volumeBalancerSwitch, NActors::TActorId serviceActorId) { return std::make_unique( std::move(storageConfig), std::move(volumeStats), - std::move(cgroupStatFetcher), + std::move(statFetcher), std::move(volumeBalancerSwitch), serviceActorId); } diff --git a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.h b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.h index 539448faf4a..289b4e76734 100644 --- a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.h +++ b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer.h @@ -20,7 +20,7 @@ namespace NCloud::NBlockStore::NStorage { NActors::IActorPtr CreateVolumeBalancerActor( TStorageConfigPtr storageConfig, IVolumeStatsPtr volumeStats, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatFetcher, + NCloud::NStorage::IStatsFetcherPtr cgroupStatFetcher, IVolumeBalancerSwitchPtr volumeBalancerSwitch, NActors::TActorId serviceActorId); diff --git a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.cpp b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.cpp index fb118beadac..a94d01792c9 100644 --- a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.cpp +++ b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.cpp @@ -12,7 +12,7 @@ #include #include -#include +#include #include @@ -140,12 +140,12 @@ STFUNC(TRemoteVolumeStatActor::StateWork) TVolumeBalancerActor::TVolumeBalancerActor( TStorageConfigPtr storageConfig, IVolumeStatsPtr volumeStats, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher, + NCloud::NStorage::IStatsFetcherPtr statsFetcher, IVolumeBalancerSwitchPtr volumeBalancerSwitch, TActorId serviceActorId) : StorageConfig(std::move(storageConfig)) , VolumeStats(std::move(volumeStats)) - , CgroupStatsFetcher(std::move(cgroupStatsFetcher)) + , StatsFetcher(std::move(statsFetcher)) , VolumeBalancerSwitch(std::move(volumeBalancerSwitch)) , ServiceActorId(serviceActorId) , State(std::make_unique(StorageConfig)) @@ -246,7 +246,7 @@ void TVolumeBalancerActor::HandleGetVolumeStatsResponse( auto now = ctx.Now(); auto interval = (now - LastCpuWaitQuery).MicroSeconds(); - auto [cpuWait, error] = CgroupStatsFetcher->GetCpuWait(); + auto [cpuWait, error] = StatsFetcher->GetCpuWait(); if (HasError(error)) { *CpuWaitFailure = 1; LOG_TRACE_S( diff --git a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.h b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.h index 6f965004ad3..b18de7ec099 100644 --- a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.h +++ b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_actor.h @@ -28,7 +28,7 @@ class TVolumeBalancerActor final private: const TStorageConfigPtr StorageConfig; const IVolumeStatsPtr VolumeStats; - const NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher; + const NCloud::NStorage::IStatsFetcherPtr StatsFetcher; const IVolumeBalancerSwitchPtr VolumeBalancerSwitch; const NActors::TActorId ServiceActorId; @@ -50,7 +50,7 @@ class TVolumeBalancerActor final TVolumeBalancerActor( TStorageConfigPtr storageConfig, IVolumeStatsPtr volumeStats, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher, + NCloud::NStorage::IStatsFetcherPtr statsFetcher, IVolumeBalancerSwitchPtr volumeBalancerSwitch, NActors::TActorId serviceActorId); diff --git a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_ut.cpp b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_ut.cpp index a856c4dbd41..efac9add304 100644 --- a/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_ut.cpp +++ b/cloud/blockstore/libs/storage/volume_balancer/volume_balancer_ut.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -194,7 +194,7 @@ struct TVolumeStatsTestMock final //////////////////////////////////////////////////////////////////////////////// -struct TCgroupStatsFetcherMock: public NCloud::NStorage::ICgroupStatsFetcher +struct TStatsFetcherMock: public NCloud::NStorage::IStatsFetcher { TResultOrError Value = TDuration::Zero(); @@ -230,14 +230,14 @@ class TVolumeBalancerTestEnv public: std::shared_ptr VolumeStats; - std::shared_ptr Fetcher; + std::shared_ptr Fetcher; public: TVolumeBalancerTestEnv() { Sender = TestEnv.GetRuntime().AllocateEdgeActor(); VolumeStats = std::make_shared(); - Fetcher = std::make_shared(); + Fetcher = std::make_shared(); } TActorId GetEdgeActor() const @@ -411,7 +411,7 @@ NFeatures::TFeaturesConfigPtr CreateFeatureConfig( IActorPtr CreateVolumeBalancerActor( TVolumeBalancerConfigBuilder& config, IVolumeStatsPtr volumeStats, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher, + NCloud::NStorage::IStatsFetcherPtr statsFetcher, TActorId serviceActorId) { NProto::TStorageServiceConfig storageConfig = config.Build(); @@ -425,7 +425,7 @@ IActorPtr CreateVolumeBalancerActor( CreateFeatureConfig("Balancer", {}) ), std::move(volumeStats), - std::move(cgroupStatsFetcher), + std::move(statsFetcher), std::move(volumeBalancerSwitch), std::move(serviceActorId)); } diff --git a/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py b/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py index 6e67be5d57b..efa6a362d34 100644 --- a/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py +++ b/cloud/blockstore/tests/csi_driver/e2e_tests_part2/test.py @@ -1,5 +1,6 @@ import pytest import subprocess +import os from pathlib import Path @@ -63,14 +64,21 @@ def test_readonly_volume(mount_path, access_type, vm_mode, gid): def test_mount_volume_group(): # Scenario - # 1. create volume and publish volume without mount volume group - # 2. create directory and file - # 3. unpublish volume + # 1. create volume and stage it + # 2. create directory and file in the staging directory # 4. create new group with specified GID # 5. publish volume with mount volume group GID # 6. check that mounted dir and existing files have specified GID # 7. create new directory and file # 8. check that new directory and file have specified GID + # 9. unpublish volume + # 10. create new file in staging directory and change ownership + # 11. publish volume with mount volume group GID + # 12. Verify that the new file doesn't have the specified GID. + # The change won't take effect because the GID of the mount directory + # matches the GID of the volume group. + + stage_path = Path("/var/lib/kubelet/plugins/kubernetes.io/csi/nbs.csi.nebius.ai/a/globalmount") env, run = csi.init() try: volume_name = "example-disk" @@ -81,6 +89,11 @@ def test_mount_volume_group(): env.csi.create_volume(name=volume_name, size=volume_size) env.csi.stage_volume(volume_name, access_type) + stage_test_dir1 = stage_path / "testdir1" + stage_test_dir1.mkdir() + stage_test_file1 = stage_test_dir1 / "testfile1" + stage_test_file1.touch() + gid = 1013 result = subprocess.run( ["groupadd", "-g", str(gid), "test_group_" + str(gid)], @@ -92,23 +105,13 @@ def test_mount_volume_group(): pod_id, volume_name, pod_name, - access_type + access_type, + volume_mount_group=str(gid) ) mount_path = Path("/var/lib/kubelet/pods") / pod_id / "volumes/kubernetes.io~csi" / volume_name / "mount" test_dir1 = mount_path / "testdir1" - test_dir1.mkdir() test_file1 = test_dir1 / "testfile1" - test_file1.touch() - - env.csi.unpublish_volume(pod_id, volume_name, access_type) - env.csi.publish_volume( - pod_id, - volume_name, - pod_name, - access_type, - volume_mount_group=str(gid) - ) assert gid == mount_path.stat().st_gid assert gid == test_dir1.stat().st_gid @@ -122,6 +125,24 @@ def test_mount_volume_group(): test_dir2.mkdir() assert gid == test_dir2.stat().st_gid + env.csi.unpublish_volume(pod_id, volume_name, access_type) + + stage_test_file3 = stage_test_dir1 / "testfile3" + stage_test_file3.touch() + os.chown(stage_test_file3, os.getuid(), os.getgid()) + assert gid != stage_test_file3.stat().st_gid + + env.csi.publish_volume( + pod_id, + volume_name, + pod_name, + access_type, + volume_mount_group=str(gid) + ) + + test_file3 = test_dir1 / "testfile3" + assert gid != test_file3.stat().st_gid + except subprocess.CalledProcessError as e: csi.log_called_process_error(e) raise @@ -168,3 +189,36 @@ def test_node_volume_expand_vm_mode(): raise finally: csi.cleanup_after_test(env, volume_name, access_type, [pod_id]) + + +def test_publish_volume_must_fail_after_fs_error(): + env, run = csi.init() + try: + volume_name = "example-disk" + volume_size = 1024 ** 3 + pod_name = "example-pod" + pod_id = "deadbeef1" + access_type = "mount" + env.csi.create_volume(name=volume_name, size=volume_size) + env.csi.stage_volume(volume_name, access_type) + env.csi.publish_volume(pod_id, volume_name, pod_name, access_type) + + with open('/sys/fs/ext4/nbd0/trigger_fs_error', 'w') as f: + f.write("test error") + + env.csi.unpublish_volume(pod_id, volume_name, access_type) + + stage_path = "/var/lib/kubelet/plugins/kubernetes.io/csi/nbs.csi.nebius.ai/a/globalmount" + assert "ro" == get_access_mode(stage_path) + + try: + env.csi.publish_volume(pod_id, volume_name, pod_name, access_type) + assert False + except subprocess.CalledProcessError: + pass + + except subprocess.CalledProcessError as e: + csi.log_called_process_error(e) + raise + finally: + csi.cleanup_after_test(env, volume_name, access_type, [pod_id]) diff --git a/cloud/blockstore/tests/e2e-tests/test.py b/cloud/blockstore/tests/e2e-tests/test.py index 69a58435103..92187465379 100644 --- a/cloud/blockstore/tests/e2e-tests/test.py +++ b/cloud/blockstore/tests/e2e-tests/test.py @@ -4,6 +4,7 @@ import shutil import subprocess import tempfile +import yaml from pathlib import Path @@ -29,7 +30,11 @@ "cloud/blockstore/apps/endpoint_proxy/blockstore-endpoint-proxy") -def init(with_netlink=True, with_endpoint_proxy=True): +def init( + with_netlink=True, + with_endpoint_proxy=True, + stored_endpoints_path=None +): server_config_patch = TServerConfig() server_config_patch.NbdEnabled = True if with_endpoint_proxy: @@ -57,13 +62,16 @@ def init(with_netlink=True, with_endpoint_proxy=True): server.ServerConfig.StrictContractValidation = True server.KikimrServiceConfig.CopyFrom(TKikimrServiceConfig()) subprocess.check_call(["modprobe", "nbd"], timeout=20) + if stored_endpoints_path: + stored_endpoints_path.mkdir(exist_ok=True) env = LocalLoadTest( endpoint="", server_app_config=server, storage_config_patches=None, use_in_memory_pdisks=True, with_endpoint_proxy=with_endpoint_proxy, - with_netlink=with_netlink) + with_netlink=with_netlink, + stored_endpoints_path=stored_endpoints_path) client_config_path = Path(yatest_common.output_path()) / "client-config.txt" client_config = TClientAppConfig() @@ -118,13 +126,16 @@ def log_called_process_error(exc): @pytest.mark.parametrize('with_netlink,with_endpoint_proxy', [(True, False), (True, True), (False, False), (False, True)]) def test_resize_device(with_netlink, with_endpoint_proxy): - env, run = init(with_netlink, with_endpoint_proxy) + stored_endpoints_path = Path(common.output_path()) / "stored_endpoints" + env, run = init(with_netlink, with_endpoint_proxy, stored_endpoints_path) + volume_name = "example-disk" block_size = 4096 blocks_count = 10000 volume_size = blocks_count * block_size nbd_device = "/dev/nbd0" socket_path = "/tmp/nbd.sock" + stored_endpoint_path = stored_endpoints_path / socket_path.replace("/", "_") try: result = run( "createvolume", @@ -174,6 +185,11 @@ def test_resize_device(with_netlink, with_endpoint_proxy): stderr=subprocess.STDOUT) assert result.returncode == 0 + if with_endpoint_proxy: + with open(stored_endpoint_path) as stream: + stored_endpoint = yaml.safe_load(stream) + assert stored_endpoint["BlocksCount"] == volume_size / block_size + new_volume_size = 2 * volume_size result = run( "resizevolume", @@ -206,6 +222,11 @@ def test_resize_device(with_netlink, with_endpoint_proxy): stderr=subprocess.STDOUT) assert result.returncode == 0 + if with_endpoint_proxy: + with open(stored_endpoint_path) as stream: + stored_endpoint = yaml.safe_load(stream) + assert stored_endpoint["BlocksCount"] == new_volume_size / block_size + except subprocess.CalledProcessError as e: log_called_process_error(e) raise diff --git a/cloud/blockstore/tests/loadtest/local-newfeatures/test.py b/cloud/blockstore/tests/loadtest/local-newfeatures/test.py index c2a149f55aa..9a5b510d9ce 100644 --- a/cloud/blockstore/tests/loadtest/local-newfeatures/test.py +++ b/cloud/blockstore/tests/loadtest/local-newfeatures/test.py @@ -43,6 +43,19 @@ def storage_config_with_incremental_batch_compaction(): return storage +def storage_config_with_garbage_batch_compaction(): + storage = default_storage_config() + storage.BatchCompactionEnabled = True + storage.GarbageCompactionRangeCountPerRun = 20 + storage.V1GarbageCompactionEnabled = True + storage.CompactionGarbageThreshold = 20 + storage.CompactionRangeGarbageThreshold = 999999 + storage.SSDMaxBlobsPerRange = 5 + storage.HDDMaxBlobsPerRange = 5 + + return storage + + def storage_config_with_incremental_compaction_and_patching(): storage = storage_config_with_incremental_compaction() storage.BlobPatchingEnabled = True diff --git a/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1-throttled.txt b/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1-throttled.txt index ed7bbdad31d..34c3f3b6448 100644 --- a/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1-throttled.txt +++ b/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1-throttled.txt @@ -1,6 +1,7 @@ Vertices { Test { CreateVolumeRequest { + StorageMediaKind: STORAGE_MEDIA_HDD BlocksCount: 20000000 BlockSize: 4096 PerformanceProfile { @@ -28,7 +29,7 @@ Vertices { } TestDuration: 60 SuccessOnError: 2147811330 - SuccessOnError: 2147483650 + SuccessOnError: 2147483662 Name: "Test for localhost tablet version 1 with throttling" } } diff --git a/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1.txt b/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1.txt index 9914fd0583a..45999667cdd 100644 --- a/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1.txt +++ b/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1.txt @@ -3,6 +3,7 @@ Vertices { Name: "create_and_shoot_volume" CreateVolumeRequest { DiskId: "@volume" + StorageMediaKind: STORAGE_MEDIA_HDD BlocksCount: 20000000 BlockSize: 4096 PerformanceProfile { @@ -24,7 +25,7 @@ Vertices { } } SuccessOnError: 2147811330 - SuccessOnError: 2147483650 + SuccessOnError: 2147483662 } } diff --git a/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-2.txt b/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-2.txt index af44e0cb566..747caf4bec7 100644 --- a/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-2.txt +++ b/cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-2.txt @@ -3,6 +3,7 @@ Vertices { Name: "create_and_shoot_volume" CreateVolumeRequest { DiskId: "@volume" + StorageMediaKind: STORAGE_MEDIA_HDD BlocksCount: 20000000 BlockSize: 4096 TabletVersion: 2 @@ -25,7 +26,7 @@ Vertices { } } SuccessOnError: 2147811330 - SuccessOnError: 2147483650 + SuccessOnError: 2147483662 } } diff --git a/cloud/blockstore/tests/loadtest/local-overflow/test.py b/cloud/blockstore/tests/loadtest/local-overflow/test.py index a9bd0a9132f..b02cf849418 100644 --- a/cloud/blockstore/tests/loadtest/local-overflow/test.py +++ b/cloud/blockstore/tests/loadtest/local-overflow/test.py @@ -16,9 +16,9 @@ def __init__(self, name, config_path, stat_filter=None, dynamic_disk_count=1): TESTS = [ - # NOTE: E_BS_OUT_OF_SPACE(2147811330) and E_REJECTED(2147483650) are - # considered as a success. E_REJECTED might happen before E_BS_OUT_OF_SPACE - # because of BS group disintegration. + # NOTE: BS group disintegration may happen before + # E_BS_OUT_OF_SPACE(2147483662), in that case retries are timed out and + # resulting E_RETRY_TIMEOUT(2147483662) error is considered as a success. # At the end of the test we read some data from disk to ensure that tablet # is available. TestCase( @@ -31,9 +31,9 @@ def __init__(self, name, config_path, stat_filter=None, dynamic_disk_count=1): "cloud/blockstore/tests/loadtest/local-overflow/local-tablet-version-1-throttled.txt", ["ThrottlerRejected", "ReassignTablet"], ), - # NOTE: E_BS_OUT_OF_SPACE(2147811330) and E_REJECTED(2147483650) are - # considered as a success. E_REJECTED might happen before E_BS_OUT_OF_SPACE - # because of BS group disintegration. + # NOTE: BS group disintegration may happen before + # E_BS_OUT_OF_SPACE(2147483662), in that case retries are timed out and + # resulting E_RETRY_TIMEOUT(2147483662) error is considered as a success. # At the end of the test we read some data from disk to ensure that tablet # is available. TestCase( @@ -75,7 +75,7 @@ def __run_test(test_case): env = LocalLoadTest( "", storage_config_patches=[storage], - dynamic_pdisks=[dict(user_kind=1, disk_size=1024 * 1024 * 1024) + dynamic_pdisks=[dict(user_kind=1, disk_size=10 * 1024 * 1024 * 1024) for x in range(test_case.dynamic_disk_count)], dynamic_storage_pools=[ dict(name="dynamic_storage_pool:1", diff --git a/cloud/blockstore/tests/python/lib/config.py b/cloud/blockstore/tests/python/lib/config.py index 6ace6561900..25e9f9d89fc 100644 --- a/cloud/blockstore/tests/python/lib/config.py +++ b/cloud/blockstore/tests/python/lib/config.py @@ -316,6 +316,7 @@ def generate_disk_agent_txt( config.ShutdownTimeout = 0 config.IOParserActorCount = 4 config.OffloadAllIORequestsParsingEnabled = True + config.IOParserActorAllocateStorageEnabled = True config.PathsPerFileIOService = 1 if device_erase_method is not None: diff --git a/cloud/blockstore/tests/python/lib/endpoint_proxy.py b/cloud/blockstore/tests/python/lib/endpoint_proxy.py index 8b1d4b2e353..cc5b7f5d562 100644 --- a/cloud/blockstore/tests/python/lib/endpoint_proxy.py +++ b/cloud/blockstore/tests/python/lib/endpoint_proxy.py @@ -4,12 +4,20 @@ class EndpointProxy(Daemon): - def __init__(self, working_dir, unix_socket_path, with_netlink): + def __init__( + self, + working_dir, + unix_socket_path, + with_netlink, + stored_endpoints_path + ): command = [yatest_common.binary_path( "cloud/blockstore/apps/endpoint_proxy/blockstore-endpoint-proxy")] command += [ "--unix-socket-path", unix_socket_path, "--verbose" ] + if stored_endpoints_path: + command += ["--stored-endpoints-path", stored_endpoints_path] if with_netlink: command += ["--netlink"] diff --git a/cloud/blockstore/tests/python/lib/loadtest_env.py b/cloud/blockstore/tests/python/lib/loadtest_env.py index a66e5aedd5c..1a7d7293d67 100644 --- a/cloud/blockstore/tests/python/lib/loadtest_env.py +++ b/cloud/blockstore/tests/python/lib/loadtest_env.py @@ -55,6 +55,7 @@ def __init__( with_netlink=False, access_service_type=AccessService, load_configs_from_cms=True, + stored_endpoints_path=None, ): self.__endpoint = endpoint @@ -125,7 +126,8 @@ def __init__( self.endpoint_proxy = EndpointProxy( working_dir=self.nbs.cwd, unix_socket_path=server_app_config.ServerConfig.EndpointProxySocketPath, - with_netlink=with_netlink) + with_netlink=with_netlink, + stored_endpoints_path=stored_endpoints_path) if run_kikimr: self.nbs.setup_cms(self.kikimr_cluster.client) diff --git a/cloud/blockstore/tests/python/lib/nonreplicated_setup.py b/cloud/blockstore/tests/python/lib/nonreplicated_setup.py index 4c55db1eb6d..681516042fa 100644 --- a/cloud/blockstore/tests/python/lib/nonreplicated_setup.py +++ b/cloud/blockstore/tests/python/lib/nonreplicated_setup.py @@ -222,6 +222,7 @@ def setup_disk_agent_config( config.ShutdownTimeout = get_shutdown_agent_interval() config.IOParserActorCount = 4 config.OffloadAllIORequestsParsingEnabled = True + config.IOParserActorAllocateStorageEnabled = True config.PathsPerFileIOService = 2 if cached_sessions_path is not None: diff --git a/cloud/blockstore/tests/recipes/local-kikimr/__main__.py b/cloud/blockstore/tests/recipes/local-kikimr/__main__.py index ef85199b255..bced43085d1 100644 --- a/cloud/blockstore/tests/recipes/local-kikimr/__main__.py +++ b/cloud/blockstore/tests/recipes/local-kikimr/__main__.py @@ -30,7 +30,7 @@ def start(argv): kikimr_binary_path = yatest_common.binary_path("contrib/ydb/apps/ydbd/ydbd") if args.kikimr_package_path is not None: kikimr_binary_path = yatest_common.build_path( - "{}/Berkanavt/kikimr/bin/kikimr".format(args.kikimr_package_path) + "{}/ydbd".format(args.kikimr_package_path) ) configurator = KikimrConfigGenerator( diff --git a/cloud/blockstore/tools/csi_driver/internal/driver/node.go b/cloud/blockstore/tools/csi_driver/internal/driver/node.go index 44c0572104b..0961c6f9b6a 100644 --- a/cloud/blockstore/tools/csi_driver/internal/driver/node.go +++ b/cloud/blockstore/tools/csi_driver/internal/driver/node.go @@ -11,9 +11,9 @@ import ( "log" "math" "os" - "os/exec" "path/filepath" "regexp" + "strconv" "strings" "sync" @@ -21,6 +21,7 @@ import ( nbsapi "github.com/ydb-platform/nbs/cloud/blockstore/public/api/protos" nbsclient "github.com/ydb-platform/nbs/cloud/blockstore/public/sdk/go/client" "github.com/ydb-platform/nbs/cloud/blockstore/tools/csi_driver/internal/mounter" + "github.com/ydb-platform/nbs/cloud/blockstore/tools/csi_driver/internal/volume" nfsapi "github.com/ydb-platform/nbs/cloud/filestore/public/api/protos" nfsclient "github.com/ydb-platform/nbs/cloud/filestore/public/sdk/go/client" "golang.org/x/sys/unix" @@ -623,6 +624,13 @@ func (s *nodeService) nodePublishDiskAsFilesystem( "Staging target path is not mounted: %w", req.VolumeId) } + readOnly, _ := s.mounter.IsFilesystemRemountedAsReadonly(req.StagingTargetPath) + if readOnly { + return s.statusErrorf( + codes.Internal, + "Filesystem was remounted as readonly") + } + mounted, _ = s.mounter.IsMountPoint(req.TargetPath) if !mounted { targetPerm := os.FileMode(0775) @@ -658,11 +666,15 @@ func (s *nodeService) nodePublishDiskAsFilesystem( return err } - if mnt != nil && mnt.VolumeMountGroup != "" && !req.Readonly { - cmd := exec.Command("chown", "-R", ":"+mnt.VolumeMountGroup, req.TargetPath) - if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to chown %s to %q: %w, output %q", - mnt.VolumeMountGroup, req.TargetPath, err, out) + if mnt != nil && mnt.VolumeMountGroup != "" { + fsGroup, err := strconv.ParseInt(mnt.VolumeMountGroup, 10, 64) + if err != nil { + return fmt.Errorf("failed to parse volume mount group: %w", err) + } + + err = volume.SetVolumeOwnership(req.TargetPath, &fsGroup, req.Readonly) + if err != nil { + return fmt.Errorf("failed to set volume ownership: %w", err) } } @@ -741,14 +753,6 @@ func (s *nodeService) nodeStageDiskAsFilesystem( return fmt.Errorf("failed to format or mount filesystem: %w", err) } - if mnt != nil && mnt.VolumeMountGroup != "" { - cmd := exec.Command("chown", "-R", ":"+mnt.VolumeMountGroup, req.StagingTargetPath) - if out, err := cmd.CombinedOutput(); err != nil { - return fmt.Errorf("failed to chown %s to %q: %w, output %q", - mnt.VolumeMountGroup, req.StagingTargetPath, err, out) - } - } - if err := os.Chmod(req.StagingTargetPath, targetPerm); err != nil { return fmt.Errorf("failed to chmod target path: %w", err) } diff --git a/cloud/blockstore/tools/csi_driver/internal/driver/node_test.go b/cloud/blockstore/tools/csi_driver/internal/driver/node_test.go index 2e30f952911..7402d2e715f 100644 --- a/cloud/blockstore/tools/csi_driver/internal/driver/node_test.go +++ b/cloud/blockstore/tools/csi_driver/internal/driver/node_test.go @@ -545,6 +545,7 @@ func TestPublishUnpublishDiskForInfrakuber(t *testing.T) { mounter.On("IsMountPoint", stagingTargetPath).Return(true, nil) mounter.On("IsMountPoint", targetPath).Return(false, nil) + mounter.On("IsFilesystemRemountedAsReadonly", stagingTargetPath).Return(false, nil) mounter.On("Mount", stagingTargetPath, targetPath, "", []string{"bind"}).Return(nil) diff --git a/cloud/blockstore/tools/csi_driver/internal/mounter/iface.go b/cloud/blockstore/tools/csi_driver/internal/mounter/iface.go index f1e3ead296b..18d17f9fdfb 100644 --- a/cloud/blockstore/tools/csi_driver/internal/mounter/iface.go +++ b/cloud/blockstore/tools/csi_driver/internal/mounter/iface.go @@ -12,6 +12,7 @@ type Interface interface { HasBlockDevice(device string) (bool, error) IsFilesystemExisted(device string) (bool, error) + IsFilesystemRemountedAsReadonly(mountPoint string) (bool, error) MakeFilesystem(device string, fsType string) ([]byte, error) NeedResize(devicePath string, deviceMountPath string) (bool, error) diff --git a/cloud/blockstore/tools/csi_driver/internal/mounter/mock.go b/cloud/blockstore/tools/csi_driver/internal/mounter/mock.go index 10245237e4d..a319d5017f7 100644 --- a/cloud/blockstore/tools/csi_driver/internal/mounter/mock.go +++ b/cloud/blockstore/tools/csi_driver/internal/mounter/mock.go @@ -40,6 +40,11 @@ func (c *Mock) IsFilesystemExisted(device string) (bool, error) { return args.Get(0).(bool), args.Error(1) } +func (c *Mock) IsFilesystemRemountedAsReadonly(mountPoint string) (bool, error) { + args := c.Called(mountPoint) + return args.Get(0).(bool), args.Error(1) +} + func (c *Mock) MakeFilesystem(device string, fsType string) ([]byte, error) { args := c.Called(device, fsType) return args.Get(0).([]byte), args.Error(1) diff --git a/cloud/blockstore/tools/csi_driver/internal/mounter/mounter.go b/cloud/blockstore/tools/csi_driver/internal/mounter/mounter.go index 6c2d9bbedc6..440870f853f 100644 --- a/cloud/blockstore/tools/csi_driver/internal/mounter/mounter.go +++ b/cloud/blockstore/tools/csi_driver/internal/mounter/mounter.go @@ -78,6 +78,39 @@ func (m *mounter) IsFilesystemExisted(device string) (bool, error) { return err == nil && string(out) != "", nil } +func (m *mounter) IsFilesystemRemountedAsReadonly(mountPoint string) (bool, error) { + mountInfoList, err := mount.ParseMountInfo("/proc/self/mountinfo") + if err != nil { + return false, err + } + + for _, mountInfo := range mountInfoList { + if mountInfo.MountPoint == mountPoint { + // The filesystem was remounted as read-only + // if the mount options included a read-write option, while + // the superblock options specified a read-only option. + var readWriteFs = false + for _, mountOption := range mountInfo.MountOptions { + if mountOption == "rw" { + readWriteFs = true + break + } + } + + if !readWriteFs { + return false, nil + } + + for _, superOption := range mountInfo.SuperOptions { + if superOption == "ro" { + return true, nil + } + } + } + } + return false, nil +} + func (m *mounter) MakeFilesystem(device string, fsType string) ([]byte, error) { options := []string{"-t", fsType} if fsType == "ext4" { diff --git a/cloud/blockstore/tools/csi_driver/internal/volume/volume_linux.go b/cloud/blockstore/tools/csi_driver/internal/volume/volume_linux.go new file mode 100644 index 00000000000..f8d17f2ba27 --- /dev/null +++ b/cloud/blockstore/tools/csi_driver/internal/volume/volume_linux.go @@ -0,0 +1,200 @@ +//go:build linux +// +build linux + +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +/* This file was edited for NBS. +* Original is from kubernetes. + */ + +package volume + +import ( + "path/filepath" + "syscall" + + "os" + "time" + + "k8s.io/klog/v2" +) + +const ( + rwMask = os.FileMode(0660) + roMask = os.FileMode(0440) + execMask = os.FileMode(0110) +) + +// SetVolumeOwnership modifies the given volume to be owned by +// fsGroup, and sets SetGid so that newly created files are owned by +// fsGroup. If fsGroup is nil nothing is done. +func SetVolumeOwnership(dir string, fsGroup *int64, readonly bool) error { + if fsGroup == nil { + return nil + } + + timer := time.AfterFunc(30*time.Second, func() { + klog.Warningf("Setting volume ownership for %s and fsGroup set. If the volume has a lot of files then setting volume ownership could be slow, see https://github.com/kubernetes/kubernetes/issues/69699", dir) + }) + defer timer.Stop() + + if !requiresPermissionChange(dir, fsGroup, readonly) { + klog.V(3).InfoS("Skipping permission and ownership change for volume", "path", dir) + return nil + } + + err := walkDeep(dir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + return changeFilePermission(path, fsGroup, readonly, info) + }) + + return err +} + +func changeFilePermission(filename string, fsGroup *int64, readonly bool, info os.FileInfo) error { + err := os.Lchown(filename, -1, int(*fsGroup)) + if err != nil { + klog.ErrorS(err, "Lchown failed", "path", filename) + } + + // chmod passes through to the underlying file for symlinks. + // Symlinks have a mode of 777 but this really doesn't mean anything. + // The permissions of the underlying file are what matter. + // However, if one reads the mode of a symlink then chmods the symlink + // with that mode, it changes the mode of the underlying file, overridden + // the defaultMode and permissions initialized by the volume plugin, which + // is not what we want; thus, we skip chmod for symlinks. + if info.Mode()&os.ModeSymlink != 0 { + return nil + } + + mask := rwMask + if readonly { + mask = roMask + } + + if info.IsDir() { + mask |= os.ModeSetgid + mask |= execMask + } + + err = os.Chmod(filename, info.Mode()|mask) + if err != nil { + klog.ErrorS(err, "chmod failed", "path", filename) + } + + return nil +} + +func requiresPermissionChange(rootDir string, fsGroup *int64, readonly bool) bool { + fsInfo, err := os.Stat(rootDir) + if err != nil { + klog.ErrorS(err, "Performing recursive ownership change on rootDir because reading permissions of root volume failed", "path", rootDir) + return true + } + stat, ok := fsInfo.Sys().(*syscall.Stat_t) + if !ok || stat == nil { + klog.ErrorS(nil, "Performing recursive ownership change on rootDir because reading permissions of root volume failed", "path", rootDir) + return true + } + + if int(stat.Gid) != int(*fsGroup) { + klog.V(4).InfoS("Expected group ownership of volume did not match with Gid", "path", rootDir, "GID", stat.Gid) + return true + } + unixPerms := rwMask + + if readonly { + unixPerms = roMask + } + + // if rootDir is not a directory then we should apply permission change anyways + if !fsInfo.IsDir() { + return true + } + unixPerms |= execMask + filePerm := fsInfo.Mode().Perm() + + // We need to check if actual permissions of root directory is a superset of permissions required by unixPerms. + // This is done by checking if permission bits expected in unixPerms is set in actual permissions of the directory. + // We use bitwise AND operation to check set bits. For example: + // unixPerms: 770, filePerms: 775 : 770&775 = 770 (perms on directory is a superset) + // unixPerms: 770, filePerms: 770 : 770&770 = 770 (perms on directory is a superset) + // unixPerms: 770, filePerms: 750 : 770&750 = 750 (perms on directory is NOT a superset) + // We also need to check if setgid bits are set in permissions of the directory. + if (unixPerms&filePerm != unixPerms) || (fsInfo.Mode()&os.ModeSetgid == 0) { + klog.V(4).InfoS("Performing recursive ownership change on rootDir because of mismatching mode", "path", rootDir) + return true + } + return false +} + +// readDirNames reads the directory named by dirname and returns +// a list of directory entries. +// We are not using filepath.readDirNames because we do not want to sort files found in a directory before changing +// permissions for performance reasons. +func readDirNames(dirname string) ([]string, error) { + f, err := os.Open(dirname) + if err != nil { + return nil, err + } + names, err := f.Readdirnames(-1) + f.Close() + if err != nil { + return nil, err + } + return names, nil +} + +// walkDeep can be used to traverse directories and has two minor differences +// from filepath.Walk: +// - List of files/dirs is not sorted for performance reasons +// - callback walkFunc is invoked on root directory after visiting children dirs and files +func walkDeep(root string, walkFunc filepath.WalkFunc) error { + info, err := os.Lstat(root) + if err != nil { + return walkFunc(root, nil, err) + } + return walk(root, info, walkFunc) +} + +func walk(path string, info os.FileInfo, walkFunc filepath.WalkFunc) error { + if !info.IsDir() { + return walkFunc(path, info, nil) + } + names, err := readDirNames(path) + if err != nil { + return err + } + for _, name := range names { + filename := filepath.Join(path, name) + fileInfo, err := os.Lstat(filename) + if err != nil { + if err := walkFunc(filename, fileInfo, err); err != nil { + return err + } + } else { + err = walk(filename, fileInfo, walkFunc) + if err != nil { + return err + } + } + } + return walkFunc(path, info, nil) +} diff --git a/cloud/blockstore/tools/csi_driver/internal/volume/ya.make b/cloud/blockstore/tools/csi_driver/internal/volume/ya.make new file mode 100644 index 00000000000..f2df5539b7e --- /dev/null +++ b/cloud/blockstore/tools/csi_driver/internal/volume/ya.make @@ -0,0 +1,7 @@ +GO_LIBRARY() + +SRCS( + volume_linux.go +) + +END() diff --git a/cloud/blockstore/tools/csi_driver/internal/ya.make b/cloud/blockstore/tools/csi_driver/internal/ya.make index 5c9c1c7aaf1..6ba42a1a0f9 100644 --- a/cloud/blockstore/tools/csi_driver/internal/ya.make +++ b/cloud/blockstore/tools/csi_driver/internal/ya.make @@ -1,4 +1,5 @@ RECURSE( driver mounter + volume ) diff --git a/cloud/blockstore/tools/testing/disk-registry-state-generator/main.cpp b/cloud/blockstore/tools/testing/disk-registry-state-generator/main.cpp index 44d873e7736..e9ee91df99e 100644 --- a/cloud/blockstore/tools/testing/disk-registry-state-generator/main.cpp +++ b/cloud/blockstore/tools/testing/disk-registry-state-generator/main.cpp @@ -164,7 +164,7 @@ auto GenerateAll(ui64 seed) auto rack = racks[rand.GenRand() % racks.size()]; NProto::TAgentConfig agent; - agent.SetNodeId(i); + agent.SetNodeId(i + 1); agent.SetAgentId(TStringBuilder() << "Agent_" << i); for (size_t j = 0; j < deviceCount; ++j) { auto* device = agent.MutableDevices()->Add(); diff --git a/cloud/blockstore/tools/testing/eternal_tests/eternal-load/lib/ut/ya.make b/cloud/blockstore/tools/testing/eternal_tests/eternal-load/lib/ut/ya.make index 3b64593aa91..3c077fa7734 100644 --- a/cloud/blockstore/tools/testing/eternal_tests/eternal-load/lib/ut/ya.make +++ b/cloud/blockstore/tools/testing/eternal_tests/eternal-load/lib/ut/ya.make @@ -1,6 +1,6 @@ UNITTEST_FOR(cloud/blockstore/tools/testing/eternal_tests/eternal-load/lib) -INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/small.inc) +INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/medium.inc) SRCS( config_ut.cpp diff --git a/cloud/blockstore/vhost-server/server_ut.cpp b/cloud/blockstore/vhost-server/server_ut.cpp index 71623b1a635..b741f495504 100644 --- a/cloud/blockstore/vhost-server/server_ut.cpp +++ b/cloud/blockstore/vhost-server/server_ut.cpp @@ -290,7 +290,21 @@ class TServerTest TSimpleStats prevStats; TCompleteStats stats; for (int i = 0; i != 5; ++i) { + // Save critical events from previous attempt. + auto critEvents = std::move(stats.CriticalEvents); + stats = Server->GetStats(prevStats); + + // Combine critical events from previous and current attempt. + if (critEvents) { + for (auto& critEvent: stats.CriticalEvents) { + critEvents.push_back(std::move(critEvent)); + } + stats.CriticalEvents = std::move(critEvents); + } + + // Check that the current attempt to get statistics has brought + // everything we need. if (func(stats)) { break; } diff --git a/cloud/contrib/vhost b/cloud/contrib/vhost index 5e464ce321d..c7576a70930 160000 --- a/cloud/contrib/vhost +++ b/cloud/contrib/vhost @@ -1 +1 @@ -Subproject commit 5e464ce321d545f065bca9b87cf68c3cd16c9653 +Subproject commit c7576a70930f585d0629fa75daf1f90e2f4df587 diff --git a/cloud/disk_manager/internal/pkg/clients/nbs/factory.go b/cloud/disk_manager/internal/pkg/clients/nbs/factory.go index a034cf94bed..81a581df0f4 100644 --- a/cloud/disk_manager/internal/pkg/clients/nbs/factory.go +++ b/cloud/disk_manager/internal/pkg/clients/nbs/factory.go @@ -278,6 +278,23 @@ func (f *factory) initMultiZoneClients() { } } +func (f *factory) getClient( + ctx context.Context, + zoneID string, +) (*client, error) { + + client, ok := f.clients[zoneID] + if !ok { + return nil, errors.NewNonRetriableErrorf( + "unknown zone %q, available zones: %q", + zoneID, + f.GetZones(), + ) + } + + return &client, nil +} + func (f *factory) GetZones() []string { return maps.Keys(f.clients) } @@ -292,16 +309,7 @@ func (f *factory) GetClient( zoneID string, ) (Client, error) { - client, ok := f.clients[zoneID] - if !ok { - return nil, errors.NewNonRetriableErrorf( - "unknown zone %q, available zones: %q", - zoneID, - f.GetZones(), - ) - } - - return &client, nil + return f.getClient(ctx, zoneID) } func (f *factory) GetClientFromDefaultZone( @@ -348,13 +356,13 @@ func (f *factory) GetMultiZoneClient( //////////////////////////////////////////////////////////////////////////////// -func NewFactoryWithCreds( +func newFactoryWithCreds( ctx context.Context, config *nbs_config.ClientConfig, creds auth.Credentials, clientMetricsRegistry metrics.Registry, sessionMetricsRegistry metrics.Registry, -) (Factory, error) { +) (*factory, error) { if config.GetDisableAuthentication() { creds = nil @@ -374,6 +382,23 @@ func NewFactoryWithCreds( return f, nil } +func NewFactoryWithCreds( + ctx context.Context, + config *nbs_config.ClientConfig, + creds auth.Credentials, + clientMetricsRegistry metrics.Registry, + sessionMetricsRegistry metrics.Registry, +) (Factory, error) { + + return newFactoryWithCreds( + ctx, + config, + creds, + clientMetricsRegistry, + sessionMetricsRegistry, + ) +} + func NewFactory( ctx context.Context, config *nbs_config.ClientConfig, diff --git a/cloud/disk_manager/internal/pkg/clients/nbs/interface.go b/cloud/disk_manager/internal/pkg/clients/nbs/interface.go index 700b9d0d32f..891f25bcb94 100644 --- a/cloud/disk_manager/internal/pkg/clients/nbs/interface.go +++ b/cloud/disk_manager/internal/pkg/clients/nbs/interface.go @@ -312,15 +312,53 @@ type Client interface { diskID string, fillGeneration uint64, ) error +} + +//////////////////////////////////////////////////////////////////////////////// + +type MultiZoneClient interface { + // Clones volume and deletes its old version with outdated FillGeneration + // (if it exists). + Clone( + ctx context.Context, + diskID string, + dstPlacementGroupID string, + dstPlacementPartitionIndex uint32, + fillGeneration uint64, + baseDiskID string, + ) error +} + +//////////////////////////////////////////////////////////////////////////////// + +type Factory interface { + GetZones() []string + + HasClient(zoneID string) bool + + GetClient(ctx context.Context, zoneID string) (Client, error) + + // Returns client from default zone. Use it carefully. + GetClientFromDefaultZone(ctx context.Context) (Client, error) + + GetMultiZoneClient( + srcZoneID string, + dstZoneID string, + ) (MultiZoneClient, error) +} + +//////////////////////////////////////////////////////////////////////////////// + +// Used in tests. +type TestingClient interface { + Client - // Used in tests. FillDisk( ctx context.Context, diskID string, contentSize uint64, ) (DiskContentInfo, error) - // Used in tests. FillEncryptedDisk( ctx context.Context, diskID string, @@ -328,20 +366,17 @@ type Client interface { encryption *types.EncryptionDesc, ) (DiskContentInfo, error) - // Used in tests. GoWriteRandomBlocksToNbsDisk( ctx context.Context, diskID string, ) (func() error, error) - // Used in tests. ValidateCrc32( ctx context.Context, diskID string, expectedDiskContentInfo DiskContentInfo, ) error - // Used in tests. ValidateCrc32WithEncryption( ctx context.Context, diskID string, @@ -349,57 +384,19 @@ type Client interface { encryption *types.EncryptionDesc, ) error - // Used in tests. CalculateCrc32(diskID string, contentSize uint64) (DiskContentInfo, error) - // Used in tests. CalculateCrc32WithEncryption( diskID string, contentSize uint64, encryption *types.EncryptionDesc, ) (DiskContentInfo, error) - // Used in tests. MountForReadWrite(diskID string) (func(), error) - // Used in tests. Write(diskID string, startIndex int, bytes []byte) error - // Used in tests. GetCheckpoints(ctx context.Context, diskID string) ([]string, error) - // Used in tests. List(ctx context.Context) ([]string, error) } - -//////////////////////////////////////////////////////////////////////////////// - -type MultiZoneClient interface { - // Clones volume and deletes its old version with outdated FillGeneration (if it exists). - Clone( - ctx context.Context, - diskID string, - dstPlacementGroupID string, - dstPlacementPartitionIndex uint32, - fillGeneration uint64, - baseDiskID string, - ) error -} - -//////////////////////////////////////////////////////////////////////////////// - -type Factory interface { - GetZones() []string - - HasClient(zoneID string) bool - - GetClient(ctx context.Context, zoneID string) (Client, error) - - // Returns client from default zone. Use it carefully. - GetClientFromDefaultZone(ctx context.Context) (Client, error) - - GetMultiZoneClient( - srcZoneID string, - dstZoneID string, - ) (MultiZoneClient, error) -} diff --git a/cloud/disk_manager/internal/pkg/clients/nbs/mocks/client_mock.go b/cloud/disk_manager/internal/pkg/clients/nbs/mocks/client_mock.go index a174e056727..802780571d1 100644 --- a/cloud/disk_manager/internal/pkg/clients/nbs/mocks/client_mock.go +++ b/cloud/disk_manager/internal/pkg/clients/nbs/mocks/client_mock.go @@ -342,110 +342,6 @@ func (c *ClientMock) Stat( //////////////////////////////////////////////////////////////////////////////// -func (c *ClientMock) FillDisk( - ctx context.Context, - diskID string, - contentSize uint64, -) (nbs.DiskContentInfo, error) { - - return c.FillEncryptedDisk(ctx, diskID, contentSize, nil) -} - -func (c *ClientMock) FillEncryptedDisk( - ctx context.Context, - diskID string, - contentSize uint64, - encryption *types.EncryptionDesc, -) (nbs.DiskContentInfo, error) { - - args := c.Called(ctx, diskID, contentSize, encryption) - return args.Get(0).(nbs.DiskContentInfo), args.Error(1) -} - -func (c *ClientMock) GoWriteRandomBlocksToNbsDisk( - ctx context.Context, - diskID string, -) (func() error, error) { - - args := c.Called(ctx, diskID) - return args.Get(0).(func() error), args.Error(1) -} - -func (c *ClientMock) ValidateCrc32( - ctx context.Context, - diskID string, - expectedDiskContentInfo nbs.DiskContentInfo, -) error { - - return c.ValidateCrc32WithEncryption( - ctx, - diskID, - expectedDiskContentInfo, - nil, - ) -} - -func (c *ClientMock) ValidateCrc32WithEncryption( - ctx context.Context, - diskID string, - expectedDiskContentInfo nbs.DiskContentInfo, - encryption *types.EncryptionDesc, -) error { - - args := c.Called(ctx, diskID, expectedDiskContentInfo, encryption) - return args.Error(0) -} - -func (c *ClientMock) CalculateCrc32( - diskID string, - contentSize uint64, -) (nbs.DiskContentInfo, error) { - - return c.CalculateCrc32WithEncryption(diskID, contentSize, nil) -} - -func (c *ClientMock) CalculateCrc32WithEncryption( - diskID string, - contentSize uint64, - encryption *types.EncryptionDesc, -) (nbs.DiskContentInfo, error) { - - args := c.Called(diskID, contentSize, encryption) - return args.Get(0).(nbs.DiskContentInfo), args.Error(1) -} - -func (c *ClientMock) MountForReadWrite( - diskID string, -) (func(), error) { - - args := c.Called(diskID) - return args.Get(0).(func()), args.Error(1) -} - -func (c *ClientMock) Write( - diskID string, - startIndex int, - bytes []byte, -) error { - - args := c.Called(diskID, startIndex, bytes) - return args.Error(0) -} - -func (c *ClientMock) GetCheckpoints( - ctx context.Context, - diskID string, -) ([]string, error) { - - args := c.Called(ctx, diskID) - return args.Get(0).([]string), args.Error(1) -} - -func (c *ClientMock) List(ctx context.Context) ([]string, error) { - args := c.Called(ctx) - return args.Get(0).([]string), args.Error(1) -} - func (c *ClientMock) Freeze( ctx context.Context, saveState func() error, diff --git a/cloud/disk_manager/internal/pkg/clients/nbs/testing_client.go b/cloud/disk_manager/internal/pkg/clients/nbs/testing_client.go index 0854e6ef462..07266f1d95c 100644 --- a/cloud/disk_manager/internal/pkg/clients/nbs/testing_client.go +++ b/cloud/disk_manager/internal/pkg/clients/nbs/testing_client.go @@ -10,6 +10,8 @@ import ( "github.com/ydb-platform/nbs/cloud/blockstore/public/api/protos" nbs_client "github.com/ydb-platform/nbs/cloud/blockstore/public/sdk/go/client" + nbs_config "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/clients/nbs/config" + "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/monitoring/metrics" "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/types" "github.com/ydb-platform/nbs/cloud/tasks/logging" "golang.org/x/sync/errgroup" @@ -17,7 +19,46 @@ import ( //////////////////////////////////////////////////////////////////////////////// -func (c *client) FillDisk( +type testingClient struct { + client +} + +func newFactory( + ctx context.Context, + client_config *nbs_config.ClientConfig, +) (*factory, error) { + + return newFactoryWithCreds( + ctx, + client_config, + nil, // creds + metrics.NewEmptyRegistry(), + metrics.NewEmptyRegistry(), + ) +} + +func NewTestingClient( + ctx context.Context, + zoneID string, + client_config *nbs_config.ClientConfig, +) (TestingClient, error) { + + factory, err := newFactory(ctx, client_config) + if err != nil { + return nil, err + } + + c, err := factory.getClient(ctx, zoneID) + if err != nil { + return nil, err + } + + return &testingClient{client: *c}, nil +} + +//////////////////////////////////////////////////////////////////////////////// + +func (c *testingClient) FillDisk( ctx context.Context, diskID string, contentSize uint64, @@ -26,7 +67,7 @@ func (c *client) FillDisk( return c.FillEncryptedDisk(ctx, diskID, contentSize, nil) } -func (c *client) FillEncryptedDisk( +func (c *testingClient) FillEncryptedDisk( ctx context.Context, diskID string, contentSize uint64, @@ -113,7 +154,7 @@ func (c *client) FillEncryptedDisk( }, nil } -func (c *client) GoWriteRandomBlocksToNbsDisk( +func (c *testingClient) GoWriteRandomBlocksToNbsDisk( ctx context.Context, diskID string, ) (func() error, error) { @@ -186,7 +227,7 @@ func (c *client) GoWriteRandomBlocksToNbsDisk( return errGroup.Wait, nil } -func (c *client) CalculateCrc32( +func (c *testingClient) CalculateCrc32( diskID string, contentSize uint64, ) (DiskContentInfo, error) { @@ -194,7 +235,7 @@ func (c *client) CalculateCrc32( return c.CalculateCrc32WithEncryption(diskID, contentSize, nil) } -func (c *client) CalculateCrc32WithEncryption( +func (c *testingClient) CalculateCrc32WithEncryption( diskID string, contentSize uint64, encryption *types.EncryptionDesc, @@ -202,7 +243,7 @@ func (c *client) CalculateCrc32WithEncryption( ctx := setupStderrLogger(context.Background()) - nbsClient, _, err := c.nbs.DiscoverInstance(ctx) + nbsClient, _, err := c.client.nbs.DiscoverInstance(ctx) if err != nil { return DiskContentInfo{}, err } @@ -339,7 +380,7 @@ func (c *client) CalculateCrc32WithEncryption( }, nil } -func (c *client) ValidateCrc32( +func (c *testingClient) ValidateCrc32( ctx context.Context, diskID string, expectedDiskContentInfo DiskContentInfo, @@ -353,7 +394,7 @@ func (c *client) ValidateCrc32( ) } -func (c *client) ValidateCrc32WithEncryption( +func (c *testingClient) ValidateCrc32WithEncryption( ctx context.Context, diskID string, expectedDiskContentInfo DiskContentInfo, @@ -409,13 +450,13 @@ func (c *client) ValidateCrc32WithEncryption( return nil } -func (c *client) MountForReadWrite( +func (c *testingClient) MountForReadWrite( diskID string, ) (func(), error) { ctx := setupStderrLogger(context.Background()) - nbsClient, _, err := c.nbs.DiscoverInstance(ctx) + nbsClient, _, err := c.client.nbs.DiscoverInstance(ctx) if err != nil { return func() {}, err } @@ -447,7 +488,7 @@ func (c *client) MountForReadWrite( return unmountFunc, nil } -func (c *client) Write( +func (c *testingClient) Write( diskID string, startIndex int, bytes []byte, @@ -455,7 +496,7 @@ func (c *client) Write( ctx := setupStderrLogger(context.Background()) - nbsClient, _, err := c.nbs.DiscoverInstance(ctx) + nbsClient, _, err := c.client.nbs.DiscoverInstance(ctx) if err != nil { return err } @@ -513,25 +554,14 @@ func (c *client) Write( return nil } -//////////////////////////////////////////////////////////////////////////////// - -type checkpoint struct { - CheckpointID string `json:"CheckpointId"` - // We don't need other checkpoint fields. -} - -type partitionInfo struct { - Checkpoints []checkpoint `json:"Checkpoints"` - // We don't need other partitionInfo fields. -} - -func (c *client) GetCheckpoints( +func (c *testingClient) GetCheckpoints( ctx context.Context, diskID string, ) ([]string, error) { - return c.nbs.GetCheckpoints(ctx, diskID) + + return c.client.nbs.GetCheckpoints(ctx, diskID) } -func (c *client) List(ctx context.Context) ([]string, error) { - return c.nbs.ListVolumes(ctx) +func (c *testingClient) List(ctx context.Context) ([]string, error) { + return c.client.nbs.ListVolumes(ctx) } diff --git a/cloud/disk_manager/internal/pkg/clients/nbs/tests/client_test.go b/cloud/disk_manager/internal/pkg/clients/nbs/tests/client_test.go index 2d990768693..ea28b6c1423 100644 --- a/cloud/disk_manager/internal/pkg/clients/nbs/tests/client_test.go +++ b/cloud/disk_manager/internal/pkg/clients/nbs/tests/client_test.go @@ -24,6 +24,15 @@ import ( //////////////////////////////////////////////////////////////////////////////// +const ( + zoneID = "zone-a" + otherZoneID = "zone-b" + defaultSessionRediscoverPeriodMin = "10s" + defaultSessionRediscoverPeriodMax = "20s" +) + +//////////////////////////////////////////////////////////////////////////////// + func newContext() context.Context { return logging.SetLogger( context.Background(), @@ -47,6 +56,28 @@ func getOtherZoneEndpoint() string { ) } +func newClientConfig( + sessionRediscoverPeriodMin string, + sessionRediscoverPeriodMax string, +) *config.ClientConfig { + + rootCertsFile := os.Getenv("DISK_MANAGER_RECIPE_ROOT_CERTS_FILE") + + return &config.ClientConfig{ + Zones: map[string]*config.Zone{ + zoneID: { + Endpoints: []string{getEndpoint(), getEndpoint()}, + }, + otherZoneID: { + Endpoints: []string{getOtherZoneEndpoint(), getOtherZoneEndpoint()}, + }, + }, + RootCertsFile: &rootCertsFile, + SessionRediscoverPeriodMin: &sessionRediscoverPeriodMin, + SessionRediscoverPeriodMax: &sessionRediscoverPeriodMax, + } +} + func newFactory( t *testing.T, ctx context.Context, @@ -55,23 +86,14 @@ func newFactory( sessionRediscoverPeriodMax string, ) nbs.Factory { - rootCertsFile := os.Getenv("DISK_MANAGER_RECIPE_ROOT_CERTS_FILE") + clientConfig := newClientConfig( + sessionRediscoverPeriodMin, + sessionRediscoverPeriodMax, + ) factory, err := nbs.NewFactoryWithCreds( ctx, - &config.ClientConfig{ - Zones: map[string]*config.Zone{ - "zone": { - Endpoints: []string{getEndpoint(), getEndpoint()}, - }, - "other": { - Endpoints: []string{getOtherZoneEndpoint(), getOtherZoneEndpoint()}, - }, - }, - RootCertsFile: &rootCertsFile, - SessionRediscoverPeriodMin: &sessionRediscoverPeriodMin, - SessionRediscoverPeriodMax: &sessionRediscoverPeriodMax, - }, + clientConfig, creds, metrics.NewEmptyRegistry(), metrics.NewEmptyRegistry(), @@ -105,16 +127,36 @@ func newClientFull( } func newClient(t *testing.T, ctx context.Context) nbs.Client { - return newClientFull(t, ctx, "zone", nil, "10s", "20s") + return newClientFull( + t, + ctx, + zoneID, + nil, + defaultSessionRediscoverPeriodMin, + defaultSessionRediscoverPeriodMax, + ) +} + +func newTestingClient(t *testing.T, ctx context.Context) nbs.TestingClient { + client, err := nbs.NewTestingClient( + ctx, + zoneID, + newClientConfig( + defaultSessionRediscoverPeriodMin, + defaultSessionRediscoverPeriodMax, + ), + ) + require.NoError(t, err) + return client } func newOtherZoneClient(t *testing.T, ctx context.Context) nbs.Client { - return newClientFull(t, ctx, "other", nil, "10s", "20s") + return newClientFull(t, ctx, otherZoneID, nil, "10s", "20s") } func newMultiZoneClient(t *testing.T, ctx context.Context) nbs.MultiZoneClient { factory := newFactory(t, ctx, nil, "10s", "20s") - client, err := factory.GetMultiZoneClient("zone", "other") + client, err := factory.GetMultiZoneClient(zoneID, otherZoneID) require.NoError(t, err) return client } @@ -666,7 +708,7 @@ func TestUnassignDeletedDisk(t *testing.T) { func TestTokenErrorsShouldBeRetriable(t *testing.T) { ctx := newContext() mockTokenProvider := &mockTokenProvider{} - client := newClientFull(t, ctx, "zone", mockTokenProvider, "10s", "20s") + client := newClientFull(t, ctx, zoneID, mockTokenProvider, "10s", "20s") mockTokenProvider.On("Token", mock.Anything).Return("", assert.AnError).Times(10) mockTokenProvider.On("Token", mock.Anything).Return("", nil) @@ -813,7 +855,7 @@ func TestMountRWDoesNotConflictWithBackgroundRediscover(t *testing.T) { client := newClientFull( t, ctx, - "zone", + zoneID, nil, "500ms", fmt.Sprintf("%vs", sessionRediscoverPeriodMaxSeconds), @@ -1449,7 +1491,7 @@ func TestGetChangedBlocksForLightCheckpoints(t *testing.T) { func TestReadFromProxyOverlayDisk(t *testing.T) { ctx := newContext() - client := newClient(t, ctx) + client := newTestingClient(t, ctx) diskID := t.Name() diskSize := int64(1024 * 4096) @@ -1493,7 +1535,7 @@ func TestReadFromProxyOverlayDisk(t *testing.T) { func TestReadFromProxyOverlayDiskWithMultipartitionBaseDisk(t *testing.T) { ctx := newContext() - client := newClient(t, ctx) + client := newTestingClient(t, ctx) diskID := t.Name() diskSize := int64(1024 * 4096) diff --git a/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_relocation_test.go b/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_relocation_test.go index d9d405f0383..c13914bf0c6 100644 --- a/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_relocation_test.go +++ b/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_relocation_test.go @@ -60,7 +60,7 @@ func setupMigrationTest( require.NoError(t, err) if params.FillDisk { - nbsClient := testcommon.NewNbsClient(t, ctx, params.SrcZoneID) + nbsClient := testcommon.NewNbsTestingClient(t, ctx, params.SrcZoneID) _, err = nbsClient.FillDisk( ctx, params.DiskID, @@ -79,7 +79,7 @@ func successfullyMigrateDisk( params migrationTestParams, ) { - srcZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.SrcZoneID) + srcZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.SrcZoneID) // Writing some additional data to disk in parallel with migration. waitForWrite, err := srcZoneNBSClient.GoWriteRandomBlocksToNbsDisk( @@ -145,7 +145,7 @@ func successfullyMigrateDisk( require.Error(t, err) require.ErrorContains(t, err, "Path not found") - dstZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.DstZoneID) + dstZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.DstZoneID) err = dstZoneNBSClient.ValidateCrc32( ctx, @@ -241,7 +241,7 @@ func setupMigrateEmptyOverlayDiskTest( expectedStorageSize = 0 } - srcZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.SrcZoneID) + srcZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.SrcZoneID) changedBytes, err := srcZoneNBSClient.GetChangedBytes( ctx, params.DiskID, @@ -270,7 +270,7 @@ func successfullyMigrateEmptyOverlayDisk( params, ) - dstZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.DstZoneID) + dstZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.DstZoneID) changedBytes, err := dstZoneNBSClient.GetChangedBytes( ctx, params.DiskID, @@ -291,7 +291,7 @@ func migrateDiskInParallel( migrateWithDifferentDstZoneIDs bool, ) { - srcZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.SrcZoneID) + srcZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.SrcZoneID) // Writing some additional data to disk in parallel with migrations. waitForWrite, err := srcZoneNBSClient.GoWriteRandomBlocksToNbsDisk( @@ -386,7 +386,7 @@ func migrateDiskInParallel( require.Error(t, err) require.ErrorContains(t, err, "Path not found") - dstZoneNBSClient := testcommon.NewNbsClient(t, ctx, dstZoneID) + dstZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, dstZoneID) err := dstZoneNBSClient.ValidateCrc32( ctx, params.DiskID, @@ -411,7 +411,7 @@ func successfullyMigrateEmptyDisk( params migrationTestParams, ) { - srcZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.SrcZoneID) + srcZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.SrcZoneID) reqCtx := testcommon.GetRequestContext(t, ctx) operation, err := client.MigrateDisk(reqCtx, &disk_manager.MigrateDiskRequest{ @@ -457,7 +457,7 @@ func successfullyMigrateEmptyDisk( require.Error(t, err) require.ErrorContains(t, err, "Path not found") - dstZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.DstZoneID) + dstZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.DstZoneID) err = dstZoneNBSClient.ValidateCrc32( ctx, params.DiskID, @@ -622,12 +622,12 @@ func TestDiskServiceMigrateDisk(t *testing.T) { testcommon.DeleteDisk(t, ctx, client, diskID) // Check that disk is deleted. - srcZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.SrcZoneID) + srcZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.SrcZoneID) _, err := srcZoneNBSClient.Describe(ctx, params.DiskID) require.Error(t, err) require.ErrorContains(t, err, "Path not found") - dstZoneNBSClient := testcommon.NewNbsClient(t, ctx, params.DstZoneID) + dstZoneNBSClient := testcommon.NewNbsTestingClient(t, ctx, params.DstZoneID) _, err = dstZoneNBSClient.Describe(ctx, params.DiskID) require.Error(t, err) require.ErrorContains(t, err, "Path not found") diff --git a/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_service_test.go b/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_service_test.go index 3b625ab2a7b..671bea6435d 100644 --- a/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_service_test.go +++ b/cloud/disk_manager/internal/pkg/facade/disk_service_test/disk_service_test.go @@ -189,7 +189,7 @@ func TestDiskServiceCreateDiskFromImageWithForceNotLayered(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") err = nbsClient.ValidateCrc32( ctx, diskID, @@ -353,7 +353,7 @@ func TestDiskServiceCreateDisksFromImageWithConfiguredPool(t *testing.T) { operations = append(operations, operation) } - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") for i, operation := range operations { err := internal_client.WaitOperation(ctx, client, operation.Id) @@ -435,7 +435,7 @@ func testCreateDiskFromIncrementalSnapshot( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") _, err = nbsClient.FillDisk(ctx, diskID1, diskSize) require.NoError(t, err) @@ -551,7 +551,7 @@ func TestDiskServiceCreateDiskFromSnapshot(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskContentInfo, err := nbsClient.FillDisk(ctx, diskID1, diskSize) require.NoError(t, err) @@ -656,7 +656,7 @@ func testCreateDiskFromImage( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") err = nbsClient.ValidateCrc32(ctx, diskID, diskContentInfo) require.NoError(t, err) @@ -734,7 +734,7 @@ func TestDiskServiceCreateDiskFromSnapshotOfOverlayDisk(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskContentInfo, err := nbsClient.FillDisk(ctx, diskID1, imageSize) require.NoError(t, err) @@ -1117,7 +1117,7 @@ func TestDiskServiceCreateEncryptedDiskFromSnapshot(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskParams1, err := nbsClient.Describe(ctx, diskID1) require.NoError(t, err) diff --git a/cloud/disk_manager/internal/pkg/facade/image_service_test/image_service_test.go b/cloud/disk_manager/internal/pkg/facade/image_service_test/image_service_test.go index b1e28b94c58..eb6dbe432a1 100644 --- a/cloud/disk_manager/internal/pkg/facade/image_service_test/image_service_test.go +++ b/cloud/disk_manager/internal/pkg/facade/image_service_test/image_service_test.go @@ -65,7 +65,7 @@ func checkEncryptedSource( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskParams, err := nbsClient.Describe(ctx, diskID1) require.NoError(t, err) @@ -133,7 +133,7 @@ func checkUnencryptedImage( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskParams, err := nbsClient.Describe(ctx, diskID1) require.NoError(t, err) @@ -207,7 +207,7 @@ func testImageServiceCreateImageFromDiskWithKind( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskContentInfo, err := nbsClient.FillDisk(ctx, diskID, diskSize) require.NoError(t, err) @@ -325,7 +325,7 @@ func TestImageServiceCreateImageFromImage(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") err = nbsClient.ValidateCrc32(ctx, diskID2, diskContentInfo) require.NoError(t, err) @@ -382,7 +382,7 @@ func TestImageServiceCreateImageFromSnapshot(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskParams, err := nbsClient.Describe(ctx, diskID) require.NoError(t, err) @@ -561,7 +561,7 @@ func testCreateImageFromURL( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") err = nbsClient.ValidateCrc32( ctx, diskID, @@ -733,7 +733,7 @@ func testImageServiceCreateImageFromURLWhichIsOverwrittenInProcess( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") err = nbsClient.ValidateCrc32( ctx, @@ -1146,7 +1146,7 @@ func TestImageServiceCreateIncrementalImageFromDisk(t *testing.T) { require.Equal(t, float64(1), meta.Progress) testcommon.RequireCheckpoint(t, ctx, diskID1, imageID1) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") waitForWrite, err := nbsClient.GoWriteRandomBlocksToNbsDisk(ctx, diskID1) require.NoError(t, err) err = waitForWrite() diff --git a/cloud/disk_manager/internal/pkg/facade/private_service_test/private_service_test.go b/cloud/disk_manager/internal/pkg/facade/private_service_test/private_service_test.go index 69d569a4ec3..02298119781 100644 --- a/cloud/disk_manager/internal/pkg/facade/private_service_test/private_service_test.go +++ b/cloud/disk_manager/internal/pkg/facade/private_service_test/private_service_test.go @@ -111,7 +111,7 @@ func TestPrivateServiceRetireBaseDisks(t *testing.T) { for i := 0; i < diskCount; i++ { operationID := operations[i].Id - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskID := fmt.Sprintf("%v%v", t.Name(), i) go func() { @@ -224,7 +224,7 @@ func TestPrivateServiceRetireBaseDisksUsingBaseDiskAsSrc(t *testing.T) { err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") err = nbsClient.ValidateCrc32(ctx, diskID, diskContentInfo) require.NoError(t, err) @@ -369,7 +369,7 @@ func TestPrivateServiceOptimizeBaseDisks(t *testing.T) { for i := 0; i < diskCount; i++ { operationID := operations[i].Id - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") diskID := fmt.Sprintf("%v%v", t.Name(), i) go func() { diff --git a/cloud/disk_manager/internal/pkg/facade/snapshot_service_test/snapshot_service_test.go b/cloud/disk_manager/internal/pkg/facade/snapshot_service_test/snapshot_service_test.go index e306ecd808a..844bca351dd 100644 --- a/cloud/disk_manager/internal/pkg/facade/snapshot_service_test/snapshot_service_test.go +++ b/cloud/disk_manager/internal/pkg/facade/snapshot_service_test/snapshot_service_test.go @@ -50,7 +50,7 @@ func testCreateSnapshotFromDisk( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") _, err = nbsClient.FillDisk(ctx, diskID, 64*4096) require.NoError(t, err) @@ -209,7 +209,7 @@ func testCreateIncrementalSnapshotFromDisk( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") contentSize := 134217728 bytes := make([]byte, contentSize) @@ -338,7 +338,7 @@ func TestSnapshotServiceCreateIncrementalSnapshotAfterDeletionOfBaseSnapshot(t * err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") bytes := make([]byte, diskSize) for i := 0; i < len(bytes); i++ { @@ -450,7 +450,7 @@ func TestSnapshotServiceCreateIncrementalSnapshotWhileDeletingBaseSnapshot(t *te err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := testcommon.NewNbsClient(t, ctx, "zone-a") + nbsClient := testcommon.NewNbsTestingClient(t, ctx, "zone-a") _, err = nbsClient.FillDisk(ctx, diskID1, uint64(diskSize)) require.NoError(t, err) diff --git a/cloud/disk_manager/internal/pkg/facade/testcommon/common.go b/cloud/disk_manager/internal/pkg/facade/testcommon/common.go index dbc45f7c237..92b07b424b6 100644 --- a/cloud/disk_manager/internal/pkg/facade/testcommon/common.go +++ b/cloud/disk_manager/internal/pkg/facade/testcommon/common.go @@ -260,59 +260,55 @@ func NewPrivateClient(ctx context.Context) (internal_client.PrivateClient, error ) } -func NewNbsClient( - t *testing.T, - ctx context.Context, - zoneID string, -) nbs.Client { +func newNbsClientClientConfig() *nbs_config.ClientConfig { rootCertsFile := os.Getenv("DISK_MANAGER_RECIPE_ROOT_CERTS_FILE") durableClientTimeout := "5m" discoveryClientHardTimeout := "8m" discoveryClientSoftTimeout := "15s" - factory, err := nbs.NewFactory( - ctx, - &nbs_config.ClientConfig{ - Zones: map[string]*nbs_config.Zone{ - "zone-a": { - Endpoints: []string{ - fmt.Sprintf( - "localhost:%v", - os.Getenv("DISK_MANAGER_RECIPE_NBS_PORT"), - ), - }, + return &nbs_config.ClientConfig{ + Zones: map[string]*nbs_config.Zone{ + "zone-a": { + Endpoints: []string{ + fmt.Sprintf( + "localhost:%v", + os.Getenv("DISK_MANAGER_RECIPE_NBS_PORT"), + ), }, - "zone-b": { - Endpoints: []string{ - fmt.Sprintf( - "localhost:%v", - os.Getenv("DISK_MANAGER_RECIPE_NBS2_PORT"), - ), - }, + }, + "zone-b": { + Endpoints: []string{ + fmt.Sprintf( + "localhost:%v", + os.Getenv("DISK_MANAGER_RECIPE_NBS2_PORT"), + ), }, - "zone-c": { - Endpoints: []string{ - fmt.Sprintf( - "localhost:%v", - os.Getenv("DISK_MANAGER_RECIPE_NBS3_PORT"), - ), - }, + }, + "zone-c": { + Endpoints: []string{ + fmt.Sprintf( + "localhost:%v", + os.Getenv("DISK_MANAGER_RECIPE_NBS3_PORT"), + ), }, }, - RootCertsFile: &rootCertsFile, - DurableClientTimeout: &durableClientTimeout, - DiscoveryClientHardTimeout: &discoveryClientHardTimeout, - DiscoveryClientSoftTimeout: &discoveryClientSoftTimeout, }, - metrics.NewEmptyRegistry(), - metrics.NewEmptyRegistry(), - ) - require.NoError(t, err) + RootCertsFile: &rootCertsFile, + DurableClientTimeout: &durableClientTimeout, + DiscoveryClientHardTimeout: &discoveryClientHardTimeout, + DiscoveryClientSoftTimeout: &discoveryClientSoftTimeout, + } +} - client, err := factory.GetClient(ctx, zoneID) - require.NoError(t, err) +func NewNbsTestingClient( + t *testing.T, + ctx context.Context, + zoneID string, +) nbs.TestingClient { + client, err := nbs.NewTestingClient(ctx, zoneID, newNbsClientClientConfig()) + require.NoError(t, err) return client } @@ -349,7 +345,7 @@ func RequireCheckpoint( checkpointID string, ) { - nbsClient := NewNbsClient(t, ctx, "zone-a") + nbsClient := NewNbsTestingClient(t, ctx, "zone-a") checkpoints, err := nbsClient.GetCheckpoints(ctx, diskID) require.NoError(t, err) @@ -362,8 +358,10 @@ func RequireNoCheckpoints( ctx context.Context, diskID string, ) { - - nbsClient := NewNbsClient(t, ctx, "zone-a") + // TODO: enable this method after resolving this issue + // https://github.com/ydb-platform/nbs/issues/2008. + return + nbsClient := NewNbsTestingClient(t, ctx, "zone-a") checkpoints, err := nbsClient.GetCheckpoints(ctx, diskID) require.NoError(t, err) require.Empty(t, checkpoints) @@ -375,7 +373,7 @@ func WaitForCheckpointsAreEmpty( diskID string, ) { - nbsClient := NewNbsClient(t, ctx, "zone-a") + nbsClient := NewNbsTestingClient(t, ctx, "zone-a") for { checkpoints, err := nbsClient.GetCheckpoints(ctx, diskID) @@ -428,7 +426,7 @@ func CreateImage( err = internal_client.WaitOperation(ctx, client, operation.Id) require.NoError(t, err) - nbsClient := NewNbsClient(t, ctx, "zone-a") + nbsClient := NewNbsTestingClient(t, ctx, "zone-a") diskContentInfo, err := nbsClient.FillDisk(ctx, diskID, imageSize) require.NoError(t, err) @@ -577,7 +575,7 @@ func CheckBaseDiskSlotReleased( } func CheckConsistency(t *testing.T, ctx context.Context) { - nbsClient := NewNbsClient(t, ctx, "zone-a") + nbsClient := NewNbsTestingClient(t, ctx, "zone-a") for { ok := true diff --git a/cloud/disk_manager/test/remote/cmd/main.go b/cloud/disk_manager/test/remote/cmd/main.go index e6dffbcb4a4..c78f7ffc453 100644 --- a/cloud/disk_manager/test/remote/cmd/main.go +++ b/cloud/disk_manager/test/remote/cmd/main.go @@ -21,7 +21,6 @@ import ( "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/clients/nbs" nbs_client_config "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/clients/nbs/config" client_config "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/configs/client/config" - "github.com/ydb-platform/nbs/cloud/disk_manager/internal/pkg/monitoring/metrics" "github.com/ydb-platform/nbs/cloud/disk_manager/pkg/client" test_config "github.com/ydb-platform/nbs/cloud/disk_manager/test/remote/cmd/config" "github.com/ydb-platform/nbs/cloud/tasks/headers" @@ -181,23 +180,13 @@ func newContext(config *client_config.ClientConfig) context.Context { ) } -func newNbsClient( +func newNbsTestingClient( ctx context.Context, config *nbs_client_config.ClientConfig, zoneID string, -) (nbs.Client, error) { +) (nbs.TestingClient, error) { - factory, err := nbs.NewFactory( - ctx, - config, - metrics.NewEmptyRegistry(), - metrics.NewEmptyRegistry(), - ) - if err != nil { - return nil, err - } - - return factory.GetClient(ctx, zoneID) + return nbs.NewTestingClient(ctx, zoneID, config) } //////////////////////////////////////////////////////////////////////////////// @@ -952,7 +941,7 @@ func testCreateDiskFromImageImpl( return resources{}, err } - nbsClient, err := newNbsClient(ctx, nbsConfig, testConfig.GetZoneID()) + nbsClient, err := newNbsTestingClient(ctx, nbsConfig, testConfig.GetZoneID()) if err != nil { return resources{}, err } @@ -1145,7 +1134,7 @@ func testRetireBaseDisks( operation := operations[i] diskID := rs.Disks[i] - nbsClient, err := newNbsClient(ctx, nbsConfig, testConfig.GetZoneID()) + nbsClient, err := newNbsTestingClient(ctx, nbsConfig, testConfig.GetZoneID()) if err != nil { return resources{}, err } @@ -1217,7 +1206,7 @@ func testCreateDiskFromSnapshotImpl( Snapshots: []string{snapshotID}, } - nbsClient, err := newNbsClient(ctx, nbsConfig, zoneID) + nbsClient, err := newNbsTestingClient(ctx, nbsConfig, zoneID) if err != nil { return resources{}, err } @@ -1413,7 +1402,7 @@ func testCreateImageFromImageImpl( Images: []string{imageID1, imageID2}, } - nbsClient, err := newNbsClient(ctx, nbsConfig, testConfig.GetZoneID()) + nbsClient, err := newNbsTestingClient(ctx, nbsConfig, testConfig.GetZoneID()) if err != nil { return resources{}, err } diff --git a/cloud/filestore/config/diagnostics.proto b/cloud/filestore/config/diagnostics.proto index 1e5d689b33a..a9c7f0c0e6e 100644 --- a/cloud/filestore/config/diagnostics.proto +++ b/cloud/filestore/config/diagnostics.proto @@ -4,6 +4,7 @@ package NCloud.NFileStore.NProto; option go_package = "github.com/ydb-platform/nbs/cloud/filestore/config"; +import "cloud/storage/core/protos/diagnostics.proto"; import "cloud/storage/core/protos/trace.proto"; //////////////////////////////////////////////////////////////////////////////// @@ -137,4 +138,7 @@ message TDiagnosticsConfig // Performance profile for SSD filesystems. optional TFileSystemPerformanceProfile SSDFileSystemPerformanceProfile = 27; + + // Type of fetching CPU stats + optional NCloud.NProto.EStatsFetcherType StatsFetcherType = 28; } diff --git a/cloud/filestore/config/storage.proto b/cloud/filestore/config/storage.proto index 7838ae1ca7b..f9b66fe810f 100644 --- a/cloud/filestore/config/storage.proto +++ b/cloud/filestore/config/storage.proto @@ -512,4 +512,9 @@ message TStorageConfig // proportional to the aforementioned value multiplied my the size of // TRange optional uint64 MixedBlocksOffloadedRangesCapacity = 415; + + optional bool YdbViewerServiceEnabled = 416; + + // During the InMemoryIndexCache load, the time between the batches loading + optional uint32 InMemoryIndexCacheLoadSchedulePeriod = 417; // in ms } diff --git a/cloud/filestore/libs/daemon/common/bootstrap.cpp b/cloud/filestore/libs/daemon/common/bootstrap.cpp index b11e5b0d6d0..725b06b5e27 100644 --- a/cloud/filestore/libs/daemon/common/bootstrap.cpp +++ b/cloud/filestore/libs/daemon/common/bootstrap.cpp @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -90,7 +90,7 @@ void TBootstrapCommon::Start() FILESTORE_LOG_START_COMPONENT(BackgroundThreadPool); FILESTORE_LOG_START_COMPONENT(ProfileLog); FILESTORE_LOG_START_COMPONENT(RequestStatsUpdater); - FILESTORE_LOG_START_COMPONENT(CgroupStatsFetcher); + FILESTORE_LOG_START_COMPONENT(StatsFetcher); StartComponents(); @@ -119,7 +119,7 @@ void TBootstrapCommon::Stop() StopComponents(); - FILESTORE_LOG_STOP_COMPONENT(CgroupStatsFetcher); + FILESTORE_LOG_STOP_COMPONENT(StatsFetcher); FILESTORE_LOG_STOP_COMPONENT(RequestStatsUpdater); FILESTORE_LOG_STOP_COMPONENT(ProfileLog); FILESTORE_LOG_STOP_COMPONENT(BackgroundThreadPool); @@ -271,16 +271,16 @@ void TBootstrapCommon::InitActorSystem() STORAGE_INFO("TraceSerializer initialized"); auto cpuWaitFilename = Configs->DiagnosticsConfig->GetCpuWaitFilename(); - CgroupStatsFetcher = BuildCgroupStatsFetcher( + StatsFetcher = NCloud::NStorage::BuildStatsFetcher( + Configs->DiagnosticsConfig->GetStatsFetcherType(), cpuWaitFilename.empty() ? NCloud::NStorage::BuildCpuWaitStatsFilename( Configs->DiagnosticsConfig->GetCpuWaitServiceName()) : std::move(cpuWaitFilename), Log, - logging, - "FILESTORE_CGROUPS"); + logging); - STORAGE_INFO("CgroupStatsFetcher initialized"); + STORAGE_INFO("StatsFetcher initialized"); NStorage::TActorSystemArgs args; args.NodeId = nodeId; @@ -292,7 +292,7 @@ void TBootstrapCommon::InitActorSystem() args.DiagnosticsConfig = Configs->DiagnosticsConfig; args.Metrics = Metrics; args.UserCounters = UserCounters; - args.CgroupStatsFetcher = CgroupStatsFetcher; + args.StatsFetcher = StatsFetcher; args.ModuleFactories = ModuleFactories; ActorSystem = NStorage::CreateActorSystem(args); diff --git a/cloud/filestore/libs/daemon/common/bootstrap.h b/cloud/filestore/libs/daemon/common/bootstrap.h index 634d2969b78..1172d53cd4e 100644 --- a/cloud/filestore/libs/daemon/common/bootstrap.h +++ b/cloud/filestore/libs/daemon/common/bootstrap.h @@ -73,7 +73,7 @@ class TBootstrapCommon ITaskQueuePtr BackgroundThreadPool; IProfileLogPtr ProfileLog; IActorSystemPtr ActorSystem; - NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher; + NCloud::NStorage::IStatsFetcherPtr StatsFetcher; public: TBootstrapCommon( diff --git a/cloud/filestore/libs/daemon/vhost/bootstrap.cpp b/cloud/filestore/libs/daemon/vhost/bootstrap.cpp index 26143541208..8936e7aa892 100644 --- a/cloud/filestore/libs/daemon/vhost/bootstrap.cpp +++ b/cloud/filestore/libs/daemon/vhost/bootstrap.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include #include @@ -285,6 +286,7 @@ void TBootstrapVhost::InitEndpoints() auto serviceConfig = std::make_shared( *localServiceConfig); ThreadPool = CreateThreadPool("svc", serviceConfig->GetNumThreads()); + FileIOService = CreateThreadedAIOService(serviceConfig->GetNumThreads()); LocalService = CreateLocalFileStore( std::move(serviceConfig), Timer, diff --git a/cloud/filestore/libs/diagnostics/config.cpp b/cloud/filestore/libs/diagnostics/config.cpp index 391da103407..8b8d23bc456 100644 --- a/cloud/filestore/libs/diagnostics/config.cpp +++ b/cloud/filestore/libs/diagnostics/config.cpp @@ -41,6 +41,8 @@ namespace { \ xxx(HDDFileSystemPerformanceProfile, TFileSystemPerformanceProfile, {} )\ xxx(SSDFileSystemPerformanceProfile, TFileSystemPerformanceProfile, {} )\ + \ + xxx(StatsFetcherType, NCloud::NProto::EStatsFetcherType, NCloud::NProto::EStatsFetcherType::CGROUP )\ // FILESTORE_DIAGNOSTICS_CONFIG #define FILESTORE_DIAGNOSTICS_DECLARE_CONFIG(name, type, value) \ @@ -227,3 +229,12 @@ void Out( SerializeToTextFormat(v, out); } + +template <> +void Out( + IOutputStream& out, + NCloud::NProto::EStatsFetcherType statsFetcherType) +{ + out << NCloud::NProto::EStatsFetcherType_Name( + statsFetcherType); +} diff --git a/cloud/filestore/libs/diagnostics/config.h b/cloud/filestore/libs/diagnostics/config.h index fe473ac4d30..b0320cb67c1 100644 --- a/cloud/filestore/libs/diagnostics/config.h +++ b/cloud/filestore/libs/diagnostics/config.h @@ -137,6 +137,8 @@ class TDiagnosticsConfig TFileSystemPerformanceProfile GetHDDFileSystemPerformanceProfile() const; TFileSystemPerformanceProfile GetSSDFileSystemPerformanceProfile() const; + NCloud::NProto::EStatsFetcherType GetStatsFetcherType() const; + void Dump(IOutputStream& out) const; void DumpHtml(IOutputStream& out) const; }; diff --git a/cloud/filestore/libs/diagnostics/profile_log.cpp b/cloud/filestore/libs/diagnostics/profile_log.cpp index e3e0dab69f6..ee8df00df9a 100644 --- a/cloud/filestore/libs/diagnostics/profile_log.cpp +++ b/cloud/filestore/libs/diagnostics/profile_log.cpp @@ -41,6 +41,8 @@ class TProfileLog final { } + ~TProfileLog() override; + public: void Start() override; void Stop() override; @@ -52,6 +54,11 @@ class TProfileLog final void Flush(); }; +TProfileLog::~TProfileLog() +{ + Flush(); +} + void TProfileLog::Start() { ScheduleFlush(); diff --git a/cloud/filestore/libs/diagnostics/profile_log_ut.cpp b/cloud/filestore/libs/diagnostics/profile_log_ut.cpp index 0c2528179ae..45c9c67682c 100644 --- a/cloud/filestore/libs/diagnostics/profile_log_ut.cpp +++ b/cloud/filestore/libs/diagnostics/profile_log_ut.cpp @@ -137,9 +137,11 @@ struct TEnv ProfilePath.DeleteIfExists(); } - void ProcessLog() + void ProcessLog(bool runScheduler = true) { - Scheduler->RunAllScheduledTasks(); + if (runScheduler) { + Scheduler->RunAllScheduledTasks(); + } EventProcessor.FlatMessages.clear(); const char* argv[] = {"foo", Settings.FilePath.c_str()}; @@ -368,6 +370,24 @@ Y_UNIT_TEST_SUITE(TProfileLogTest) "fs3\t6000000\t4\t300000\t0", EventProcessor.FlatMessages[5] ); + + // Test flush on destruct + ProfileLog->Write( + {"fs3", + TRequestInfoBuilder() + .SetTimestamp(TInstant::Seconds(9)) + .SetDuration(TDuration::MilliSeconds(100)) + .SetRequestType(4) + .SetError(0) + .Build()}); + + ProfileLog = CreateProfileLogStub(); + ProcessLog(false); + + UNIT_ASSERT_VALUES_EQUAL(7, EventProcessor.FlatMessages.size()); + UNIT_ASSERT_VALUES_EQUAL( + "fs3\t9000000\t4\t100000\t0", + EventProcessor.FlatMessages[6]); } } diff --git a/cloud/filestore/libs/service_local/config.cpp b/cloud/filestore/libs/service_local/config.cpp index b9ca4ac02c7..0db574f37dd 100644 --- a/cloud/filestore/libs/service_local/config.cpp +++ b/cloud/filestore/libs/service_local/config.cpp @@ -19,7 +19,7 @@ constexpr TDuration AsyncHandleOpsPeriod = TDuration::MilliSeconds(50); xxx(PathPrefix, TString, "nfs_" )\ xxx(DefaultPermissions, ui32, 0775 )\ xxx(IdleSessionTimeout, TDuration, TDuration::Seconds(30) )\ - xxx(NumThreads, ui32, 4 )\ + xxx(NumThreads, ui32, 8 )\ xxx(StatePath, TString, "./" )\ xxx(MaxNodeCount, ui32, 1000000 )\ xxx(MaxHandlePerSessionCount, ui32, 10000 )\ diff --git a/cloud/filestore/libs/service_local/index.h b/cloud/filestore/libs/service_local/index.h index 7a5a03048e2..f304a729230 100644 --- a/cloud/filestore/libs/service_local/index.h +++ b/cloud/filestore/libs/service_local/index.h @@ -309,17 +309,25 @@ class TLocalIndex // parent already resolved so we can create node and resolve // this entry - auto node = - TIndexNode::Create(**parentNodeIt, pathElemRecord->Name); - node->SetRecordIndex(pathElemIndex); + try { + auto node = + TIndexNode::Create(**parentNodeIt, pathElemRecord->Name); + node->SetRecordIndex(pathElemIndex); + Nodes.insert(node); - Nodes.insert(node); + STORAGE_TRACE( + "Resolve node end, NodeId=" << pathElemRecord->NodeId); + } catch (const TServiceError& e) { + STORAGE_ERROR( + "Resolve node failed, NodeId=" + << pathElemRecord->NodeId + << ", Exception=" << FormatResultCode(e.GetCode()) + << " " << e.GetMessage()); + NodeTable->DeleteRecord(pathElemIndex); + } unresolvedPath.pop(); unresolvedRecords.erase(pathElemRecord->NodeId); - - STORAGE_TRACE( - "Resolve node end, NodeId=" << pathElemRecord->NodeId); } } } diff --git a/cloud/filestore/libs/service_local/index_ut.cpp b/cloud/filestore/libs/service_local/index_ut.cpp index ef124431ca9..efacb87279e 100644 --- a/cloud/filestore/libs/service_local/index_ut.cpp +++ b/cloud/filestore/libs/service_local/index_ut.cpp @@ -269,6 +269,68 @@ Y_UNIT_TEST_SUITE(TLocalIndex) CheckMissingNodes(pathLen, missingNodes); } + + Y_UNIT_TEST_F(ShouldDiscardDeletedNodes, TEnvironment) + { + RootPath.ForceDelete(); + RootPath.MkDir(); + + StatePath.ForceDelete(); + StatePath.MkDir(); + + auto index = std::make_unique(RootPath, StatePath, 100, Log); + auto rootNode = index->LookupNode(RootNodeId); + + // create /dir1 + auto dir1 = RootPath / "dir1"; + dir1.MkDir(); + auto node1 = TIndexNode::Create(*rootNode, dir1.GetName()); + auto inserted = + index->TryInsertNode(node1, RootNodeId, dir1.GetName()); + UNIT_ASSERT_C(inserted, "Failed to insert node: " << dir1.GetName()); + + // create /dir2/dir3/dir4 + auto dir2 = RootPath / "dir2"; + dir2.MkDir(); + auto node2 = TIndexNode::Create(*rootNode, dir2.GetName()); + inserted = + index->TryInsertNode(node2, RootNodeId, dir2.GetName()); + UNIT_ASSERT_C(inserted, "Failed to insert node: " << dir2.GetName()); + + auto dir3 = dir2 / "dir3"; + dir3.MkDir(); + auto node3 = TIndexNode::Create(*node2, dir3.GetName()); + inserted = + index->TryInsertNode(node3, node2->GetNodeId(), dir3.GetName()); + UNIT_ASSERT_C(inserted, "Failed to insert node: " << dir3.GetName()); + + auto dir4 = dir3 / "dir4"; + dir4.MkDir(); + auto node4 = TIndexNode::Create(*node3, dir4.GetName()); + inserted = + index->TryInsertNode(node4, node3->GetNodeId(), dir4.GetName()); + UNIT_ASSERT_C(inserted, "Failed to insert node: " << dir4.GetName()); + + // delete dir3 + dir3.ForceDelete(); + index = std::make_unique(RootPath, StatePath, 100, Log); + + // /dir1 and /dir2 restored + UNIT_ASSERT_C(index->LookupNode(node1->GetNodeId()), + "Failed to lookup node id: " << node1->GetNodeId() << + ", node: " << dir1.GetName()); + UNIT_ASSERT_C(index->LookupNode(node2->GetNodeId()), + "Failed to lookup node id: " << node1->GetNodeId() << + ", node: " << dir2.GetName()); + + // dir3/dir4 discarded + UNIT_ASSERT_C(!index->LookupNode(node3->GetNodeId()), + "Did not failed to lookup node id: " << node3->GetNodeId() << + ", node: " << dir3.GetName()); + UNIT_ASSERT_C(!index->LookupNode(node4->GetNodeId()), + "Did not failed to lookup node id: " << node4->GetNodeId() << + ", node: " << dir4.GetName()); + } }; } // namespace NCloud::NFileStore diff --git a/cloud/filestore/libs/service_local/service.cpp b/cloud/filestore/libs/service_local/service.cpp index d4c891a8931..9eb5bfdc08a 100644 --- a/cloud/filestore/libs/service_local/service.cpp +++ b/cloud/filestore/libs/service_local/service.cpp @@ -207,7 +207,9 @@ class TLocalFileStore final std::shared_ptr request) override \ { \ Y_UNUSED(callContext); \ - return ExecuteWithProfileLogAsync(*request); \ + return TaskQueue->Execute([this, request = std::move(request)] { \ + return ExecuteWithProfileLogAsync(*request); \ + }); \ } \ // FILESTORE_IMPLEMENT_METHOD_ASYNC diff --git a/cloud/filestore/libs/storage/core/config.cpp b/cloud/filestore/libs/storage/core/config.cpp index d35f7db474c..b238d51bcb6 100644 --- a/cloud/filestore/libs/storage/core/config.cpp +++ b/cloud/filestore/libs/storage/core/config.cpp @@ -219,7 +219,10 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases; xxx(InMemoryIndexCacheNodeRefsCapacity, ui64, 0 )\ xxx(InMemoryIndexCacheNodesToNodeRefsCapacityRatio, ui64, 0 )\ xxx(InMemoryIndexCacheLoadOnTabletStart, bool, false )\ - xxx(InMemoryIndexCacheLoadOnTabletStartRowsPerTx, ui64, 1000000 )\ + xxx(InMemoryIndexCacheLoadOnTabletStartRowsPerTx, ui64, 1000 )\ + xxx(InMemoryIndexCacheLoadSchedulePeriod, \ + TDuration, \ + TDuration::Seconds(0) )\ \ xxx(NonNetworkMetricsBalancingFactor, ui32, 1_KB )\ \ @@ -243,6 +246,7 @@ using TAliases = NProto::TStorageConfig::TFilestoreAliases; xxx(SSProxyFallbackMode, bool, false )\ \ xxx(MixedBlocksOffloadedRangesCapacity, ui64, 0 )\ + xxx(YdbViewerServiceEnabled, bool, false )\ // FILESTORE_STORAGE_CONFIG #define FILESTORE_STORAGE_CONFIG_REF(xxx) \ diff --git a/cloud/filestore/libs/storage/core/config.h b/cloud/filestore/libs/storage/core/config.h index 5030d3c6a34..c07fd814145 100644 --- a/cloud/filestore/libs/storage/core/config.h +++ b/cloud/filestore/libs/storage/core/config.h @@ -241,6 +241,7 @@ class TStorageConfig ui64 GetInMemoryIndexCacheNodesToNodeRefsCapacityRatio() const; bool GetInMemoryIndexCacheLoadOnTabletStart() const; ui64 GetInMemoryIndexCacheLoadOnTabletStartRowsPerTx() const; + TDuration GetInMemoryIndexCacheLoadSchedulePeriod() const; bool GetAsyncDestroyHandleEnabled() const; TDuration GetAsyncHandleOperationPeriod() const; @@ -302,6 +303,8 @@ class TStorageConfig bool GetGuestWritebackCacheEnabled() const; ui64 GetMixedBlocksOffloadedRangesCapacity() const; + + bool GetYdbViewerServiceEnabled() const; }; } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/init/actorsystem.cpp b/cloud/filestore/libs/storage/init/actorsystem.cpp index e465f0f9437..4c476b90e00 100644 --- a/cloud/filestore/libs/storage/init/actorsystem.cpp +++ b/cloud/filestore/libs/storage/init/actorsystem.cpp @@ -85,7 +85,7 @@ class TStorageServicesInitializer final StatsRegistry, Args.ProfileLog, Args.TraceSerializer, - Args.CgroupStatsFetcher); + Args.StatsFetcher); setup->LocalServices.emplace_back( MakeStorageServiceId(), @@ -358,6 +358,8 @@ void TActorSystem::Init() servicesMask.EnableSchemeBoardMonitoring = 1; servicesMask.EnableConfigsDispatcher = Args.StorageConfig->GetConfigsDispatcherServiceEnabled(); + servicesMask.EnableViewerService = + Args.StorageConfig->GetYdbViewerServiceEnabled(); if (Args.AppConfig->HasAuthConfig()) { servicesMask.EnableSecurityServices = 1; diff --git a/cloud/filestore/libs/storage/init/actorsystem.h b/cloud/filestore/libs/storage/init/actorsystem.h index 9f7dd198996..455f0a04916 100644 --- a/cloud/filestore/libs/storage/init/actorsystem.h +++ b/cloud/filestore/libs/storage/init/actorsystem.h @@ -34,7 +34,7 @@ struct TActorSystemArgs std::shared_ptr UserCounters; - NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher; + NCloud::NStorage::IStatsFetcherPtr StatsFetcher; }; //////////////////////////////////////////////////////////////////////////////// diff --git a/cloud/filestore/libs/storage/model/ut/ya.make b/cloud/filestore/libs/storage/model/ut/ya.make index 76fc9540c36..9d217d6ad5a 100644 --- a/cloud/filestore/libs/storage/model/ut/ya.make +++ b/cloud/filestore/libs/storage/model/ut/ya.make @@ -9,5 +9,3 @@ SRCS( ) END() - -RECURSE_FOR_TESTS(ut) diff --git a/cloud/filestore/libs/storage/service/service.cpp b/cloud/filestore/libs/storage/service/service.cpp index d7d0321816a..5dbff3bc5d4 100644 --- a/cloud/filestore/libs/storage/service/service.cpp +++ b/cloud/filestore/libs/storage/service/service.cpp @@ -13,14 +13,14 @@ IActorPtr CreateStorageService( IRequestStatsRegistryPtr statsRegistry, IProfileLogPtr profileLog, ITraceSerializerPtr traceSerialzer, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher) + NCloud::NStorage::IStatsFetcherPtr statsFetcher) { return std::make_unique( std::move(storageConfig), std::move(statsRegistry), std::move(profileLog), std::move(traceSerialzer), - std::move(cgroupStatsFetcher)); + std::move(statsFetcher)); } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/service/service.h b/cloud/filestore/libs/storage/service/service.h index 7222307d268..1bb16f2f986 100644 --- a/cloud/filestore/libs/storage/service/service.h +++ b/cloud/filestore/libs/storage/service/service.h @@ -16,6 +16,6 @@ NActors::IActorPtr CreateStorageService( IRequestStatsRegistryPtr statsRegistry, IProfileLogPtr profileLog, ITraceSerializerPtr traceSerialzer, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher); + NCloud::NStorage::IStatsFetcherPtr xtatsFetcher); } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/service/service_actor.cpp b/cloud/filestore/libs/storage/service/service_actor.cpp index c0bb6d292fb..967c04131e7 100644 --- a/cloud/filestore/libs/storage/service/service_actor.cpp +++ b/cloud/filestore/libs/storage/service/service_actor.cpp @@ -20,11 +20,11 @@ TStorageServiceActor::TStorageServiceActor( IRequestStatsRegistryPtr statsRegistry, IProfileLogPtr profileLog, ITraceSerializerPtr traceSerializer, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher) + NCloud::NStorage::IStatsFetcherPtr statsFetcher) : StorageConfig{std::move(storageConfig)} , ProfileLog{std::move(profileLog)} , TraceSerializer{std::move(traceSerializer)} - , CgroupStatsFetcher(std::move(cgroupStatsFetcher)) + , StatsFetcher(std::move(statsFetcher)) , State{std::make_unique()} , StatsRegistry{std::move(statsRegistry)} {} diff --git a/cloud/filestore/libs/storage/service/service_actor.h b/cloud/filestore/libs/storage/service/service_actor.h index 921f3010f95..8dc4002b3d7 100644 --- a/cloud/filestore/libs/storage/service/service_actor.h +++ b/cloud/filestore/libs/storage/service/service_actor.h @@ -36,7 +36,7 @@ class TStorageServiceActor final const TStorageConfigPtr StorageConfig; const IProfileLogPtr ProfileLog; const ITraceSerializerPtr TraceSerializer; - const NCloud::NStorage::ICgroupStatsFetcherPtr CgroupStatsFetcher; + const NCloud::NStorage::IStatsFetcherPtr StatsFetcher; std::unique_ptr State; ui64 ProxyCounter = 0; @@ -64,7 +64,7 @@ class TStorageServiceActor final IRequestStatsRegistryPtr statsRegistry, IProfileLogPtr profileLog, ITraceSerializerPtr traceSerializer, - NCloud::NStorage::ICgroupStatsFetcherPtr cgroupStatsFetcher); + NCloud::NStorage::IStatsFetcherPtr statsFetcher); ~TStorageServiceActor(); void Bootstrap(const NActors::TActorContext& ctx); diff --git a/cloud/filestore/libs/storage/service/service_actor_update_stats.cpp b/cloud/filestore/libs/storage/service/service_actor_update_stats.cpp index a6167b75774..0a2ff316334 100644 --- a/cloud/filestore/libs/storage/service/service_actor_update_stats.cpp +++ b/cloud/filestore/libs/storage/service/service_actor_update_stats.cpp @@ -2,7 +2,7 @@ #include -#include +#include namespace NCloud::NFileStore::NStorage { @@ -65,11 +65,12 @@ void TStorageServiceActor::HandleUpdateStats( InFlightRequests.erase(it++); } } - if (CgroupStatsFetcher) { + + if (StatsFetcher) { auto now = ctx.Now(); auto interval = (now - LastCpuWaitQuery).MicroSeconds(); - if (auto [cpuWait, error] = CgroupStatsFetcher->GetCpuWait(); + if (auto [cpuWait, error] = StatsFetcher->GetCpuWait(); !HasError(error)) { *CpuWaitFailure = 0; diff --git a/cloud/filestore/libs/storage/tablet/model/read_ahead.cpp b/cloud/filestore/libs/storage/tablet/model/read_ahead.cpp index 5136c2eba94..dc952f79996 100644 --- a/cloud/filestore/libs/storage/tablet/model/read_ahead.cpp +++ b/cloud/filestore/libs/storage/tablet/model/read_ahead.cpp @@ -156,7 +156,7 @@ TMaybe TReadAheadCache::RegisterDescribe( void TReadAheadCache::InvalidateCache(ui64 nodeId) { - NodeStates.clear(nodeId); + NodeStates.erase(nodeId); } void TReadAheadCache::OnDestroyHandle(ui64 nodeId, ui64 handle) diff --git a/cloud/filestore/libs/storage/tablet/model/read_ahead_ut.cpp b/cloud/filestore/libs/storage/tablet/model/read_ahead_ut.cpp index 7190cc54b43..7a3a861be5c 100644 --- a/cloud/filestore/libs/storage/tablet/model/read_ahead_ut.cpp +++ b/cloud/filestore/libs/storage/tablet/model/read_ahead_ut.cpp @@ -713,6 +713,29 @@ Y_UNIT_TEST_SUITE(TReadAheadTest) blobPieces[0].GetRanges(0).GetBlobOffset()); } } + + Y_UNIT_TEST(ShouldInvalidateNodes) + { + TDefaultCache cache; + + RegisterResult(cache, 111, 0, 1_MB); + RegisterResult(cache, 111, 1_MB, 1_MB); + RegisterResult(cache, 111, 2_MB, 1_MB); + RegisterResult(cache, 222, 100_MB, 1_MB); + RegisterResult(cache, 222, 105_MB, 1_MB); + + // both nodes should be present in cache + UNIT_ASSERT_VALUES_EQUAL( + Expected(111, 0, 128_KB, 0), + FillResult(cache, 111, 0, 128_KB)); + + cache.InvalidateCache(111); + + // the first node should be evicted, the second should be present + UNIT_ASSERT_VALUES_EQUAL( + Expected(222, 100_MB, 128_KB, 0), + FillResult(cache, 222, 100_MB, 128_KB)); + } } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp index 004c8a67f2f..c3221c6e83a 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_adddata.cpp @@ -296,8 +296,6 @@ void TIndexTabletActor::HandleGenerateBlobIds( offset += length; } - // TODO(debnatkh): Throttling - response->Record.SetCommitId(commitId); Metrics.GenerateBlobIds.Count.fetch_add(1, std::memory_order_relaxed); diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate.cpp index ed3e082d238..93fa0e2351a 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate.cpp @@ -313,21 +313,44 @@ void TIndexTabletActor::CompleteTx_LoadState( { const ui64 maxRows = Config->GetInMemoryIndexCacheLoadOnTabletStartRowsPerTx(); + const TDuration schedulePeriod = + Config->GetInMemoryIndexCacheLoadSchedulePeriod(); // If necessary, code can iteratively call ReadNodeRefs for all nodes. // This will populate cache with node refs and allow us to perform // ListNodes using in-memory index state by knowing that the nodeRefs // cache is exhaustive + LOG_INFO( + ctx, + TFileStoreComponents::TABLET, + "%s LoadNodeRefs started (maxNodeRefs: %lu, period: %s)", + LogTag.c_str(), + maxRows, + schedulePeriod.ToString().c_str()); ctx.Send( SelfId(), - new TEvIndexTabletPrivate::TEvLoadNodeRefsRequest(0, "", maxRows)); + new TEvIndexTabletPrivate::TEvLoadNodeRefsRequest( + 0, + "", + maxRows, + schedulePeriod)); // Same logic is performed for batch loading nodes as well. The only // difference is that we do not need to keep track of the exhaustiveness // of the cache + LOG_INFO( + ctx, + TFileStoreComponents::TABLET, + "%s LoadNodes started (maxNodes: %lu, period: %s)", + LogTag.c_str(), + maxRows, + schedulePeriod.ToString().c_str()); ctx.Send( SelfId(), - new TEvIndexTabletPrivate::TEvLoadNodesRequest(0, maxRows)); + new TEvIndexTabletPrivate::TEvLoadNodesRequest( + 0, + maxRows, + schedulePeriod)); } ScheduleSyncSessions(ctx); diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_noderefs.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_noderefs.cpp index 9b2996c43ae..aeb01f41e3f 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_noderefs.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_noderefs.cpp @@ -10,7 +10,7 @@ bool TIndexTabletActor::ValidateTx_LoadNodeRefs( const TActorContext& ctx, TTxIndexTablet::TLoadNodeRefs& args) { - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadingNodeRefs (nodeId: %lu, name: %s, maxNodeRefs: %lu)", @@ -36,7 +36,7 @@ bool TIndexTabletActor::PrepareTx_LoadNodeRefs( args.NextNodeId, args.NextCookie); - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadingNodeRefs (nodeId: %lu, name: %s, maxNodeRefs: %lu), read " @@ -55,7 +55,7 @@ void TIndexTabletActor::CompleteTx_LoadNodeRefs( const TActorContext& ctx, TTxIndexTablet::TLoadNodeRefs& args) { - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadNodeRefs iteration completed, next nodeId: %lu, next cookie: " @@ -65,12 +65,13 @@ void TIndexTabletActor::CompleteTx_LoadNodeRefs( args.NextCookie.c_str()); if (args.NextCookie || args.NextNodeId) { - ctx.Send( - SelfId(), + ctx.Schedule( + args.SchedulePeriod, new TEvIndexTabletPrivate::TEvLoadNodeRefsRequest( args.NextNodeId, args.NextCookie, - args.MaxNodeRefs)); + args.MaxNodeRefs, + args.SchedulePeriod)); } else { LOG_INFO( ctx, @@ -90,7 +91,7 @@ void TIndexTabletActor::HandleLoadNodeRefsRequest( { auto* msg = ev->Get(); - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadNodeRefs iteration started (nodeId: %lu, name: %s, " @@ -109,7 +110,8 @@ void TIndexTabletActor::HandleLoadNodeRefsRequest( std::move(requestInfo), msg->NodeId, msg->Cookie, - msg->MaxNodeRefs); + msg->MaxNodeRefs, + msg->SchedulePeriod); } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_nodes.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_nodes.cpp index 678567de557..5f51ff8f6d1 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_nodes.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_loadstate_nodes.cpp @@ -10,7 +10,7 @@ bool TIndexTabletActor::ValidateTx_LoadNodes( const TActorContext& ctx, TTxIndexTablet::TLoadNodes& args) { - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadingNodes (nodeId: %lu, maxNodes: %lu)", @@ -31,7 +31,7 @@ bool TIndexTabletActor::PrepareTx_LoadNodes( bool ready = db.ReadNodes(args.NodeId, args.MaxNodes, args.NextNodeId, nodes); - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadingNodes (nodeId: %lu, maxNodes: %lu), read %lu nodes: %s", @@ -48,7 +48,7 @@ void TIndexTabletActor::CompleteTx_LoadNodes( const TActorContext& ctx, TTxIndexTablet::TLoadNodes& args) { - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadNodes iteration completed, next nodeId: %lu", @@ -56,11 +56,12 @@ void TIndexTabletActor::CompleteTx_LoadNodes( args.NextNodeId); if (args.NextNodeId) { - ctx.Send( - SelfId(), + ctx.Schedule( + args.SchedulePeriod, new TEvIndexTabletPrivate::TEvLoadNodesRequest( args.NextNodeId, - args.MaxNodes)); + args.MaxNodes, + args.SchedulePeriod)); } else { LOG_INFO( ctx, @@ -78,7 +79,7 @@ void TIndexTabletActor::HandleLoadNodesRequest( { auto* msg = ev->Get(); - LOG_INFO( + LOG_DEBUG( ctx, TFileStoreComponents::TABLET, "%s LoadNodes iteration started (nodeId: %lu, maxNodes: %lu)", @@ -94,7 +95,8 @@ void TIndexTabletActor::HandleLoadNodesRequest( ctx, std::move(requestInfo), msg->NodeId, - msg->MaxNodes); + msg->MaxNodes, + msg->SchedulePeriod); } } // namespace NCloud::NFileStore::NStorage diff --git a/cloud/filestore/libs/storage/tablet/tablet_private.h b/cloud/filestore/libs/storage/tablet/tablet_private.h index f4fc7a822c4..74987181ac3 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_private.h +++ b/cloud/filestore/libs/storage/tablet/tablet_private.h @@ -534,14 +534,17 @@ struct TEvIndexTabletPrivate const ui64 NodeId; const TString Cookie; const ui32 MaxNodeRefs; + const TDuration SchedulePeriod; TLoadNodeRefsRequest( ui64 nodeId, TString cookie, - ui32 maxNodeRefs) + ui32 maxNodeRefs, + TDuration schedulePeriod) : NodeId(nodeId) , Cookie(std::move(cookie)) , MaxNodeRefs(maxNodeRefs) + , SchedulePeriod(schedulePeriod) {} }; @@ -553,12 +556,15 @@ struct TEvIndexTabletPrivate { const ui64 NodeId; const ui32 MaxNodes; + const TDuration SchedulePeriod; TLoadNodesRequest( ui64 nodeId, - ui32 maxNodes) + ui32 maxNodes, + TDuration schedulePeriod) : NodeId(nodeId) , MaxNodes(maxNodes) + , SchedulePeriod(schedulePeriod) {} }; diff --git a/cloud/filestore/libs/storage/tablet/tablet_tx.h b/cloud/filestore/libs/storage/tablet/tablet_tx.h index da39fb64dd9..8b98c11b48f 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_tx.h +++ b/cloud/filestore/libs/storage/tablet/tablet_tx.h @@ -2256,6 +2256,7 @@ struct TTxIndexTablet const ui64 NodeId; const TString Cookie; const ui64 MaxNodeRefs; + const TDuration SchedulePeriod; ui64 NextNodeId = 0; TString NextCookie; @@ -2264,11 +2265,13 @@ struct TTxIndexTablet TRequestInfoPtr requestInfo, ui64 nodeId, TString cookie, - ui64 maxNodeRefs) + ui64 maxNodeRefs, + TDuration schedulePeriod) : RequestInfo(std::move(requestInfo)) , NodeId(nodeId) , Cookie(std::move(cookie)) , MaxNodeRefs(maxNodeRefs) + , SchedulePeriod(schedulePeriod) {} void Clear() @@ -2289,16 +2292,19 @@ struct TTxIndexTablet const TRequestInfoPtr RequestInfo; const ui64 NodeId; const ui64 MaxNodes; + const TDuration SchedulePeriod; ui64 NextNodeId = 0; TLoadNodes( TRequestInfoPtr requestInfo, ui64 nodeId, - ui64 maxNodes) + ui64 maxNodes, + TDuration schedulePeriod) : RequestInfo(std::move(requestInfo)) , NodeId(nodeId) , MaxNodes(maxNodes) + , SchedulePeriod(schedulePeriod) {} void Clear() diff --git a/cloud/filestore/libs/storage/tablet/tablet_ut_cache.cpp b/cloud/filestore/libs/storage/tablet/tablet_ut_cache.cpp index 49b49c7a9ad..4863b06c23d 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_ut_cache.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_ut_cache.cpp @@ -920,6 +920,8 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache) storageConfig.SetInMemoryIndexCacheNodeRefsCapacity(100); storageConfig.SetInMemoryIndexCacheLoadOnTabletStart(true); storageConfig.SetInMemoryIndexCacheLoadOnTabletStartRowsPerTx(1); + storageConfig.SetInMemoryIndexCacheLoadSchedulePeriod( + TDuration::Seconds(1).MilliSeconds()); TTestEnv env({}, storageConfig); env.CreateSubDomain("nfs"); @@ -930,22 +932,25 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache) tablet.InitSession("client", "session"); tablet.CreateNode(TCreateNodeArgs::File(RootNodeId, "test1")); - tablet.CreateNode(TCreateNodeArgs::File(RootNodeId, "test2")); env.GetRuntime().ClearCounters(); tablet.RebootTablet(); + + for (int i = 0; i < 10; ++i) { + tablet.AdvanceTime(TDuration::Seconds(1)); + env.GetRuntime().DispatchEvents({}, TDuration::Seconds(1)); + } + tablet.InitSession("client", "session"); - // It will take 2 iterations to load all the nodeRefs (root -> test1 and - // root -> test2) + // It will take 1 iteration to load all the nodeRefs (root -> test)1 UNIT_ASSERT_VALUES_EQUAL( - 2, + 1, env.GetRuntime().GetCounter( TEvIndexTabletPrivate::EEvents::EvLoadNodeRefs)); - // It also will take 3 iterations to load all the nodes (root, test1 and - // test2) + // It also will take 2 iterations to load all the nodes (root and test1) UNIT_ASSERT_VALUES_EQUAL( - 3, + 2, env.GetRuntime().GetCounter( TEvIndexTabletPrivate::EEvents::EvLoadNodes)); @@ -954,7 +959,7 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache) // The noderefs cache is exhaustive thus list nodes should be a cache // hit UNIT_ASSERT_VALUES_EQUAL( - 2, + 1, tablet.ListNodes(RootNodeId)->Record.NodesSize()); auto statsAfter = GetTxStats(env, tablet); @@ -969,28 +974,27 @@ Y_UNIT_TEST_SUITE(TIndexTabletTest_NodesCache) statsBefore = statsAfter; - auto id3 = - tablet.CreateNode(TCreateNodeArgs::Directory(RootNodeId, "test3")) + auto id2 = + tablet.CreateNode(TCreateNodeArgs::Directory(RootNodeId, "test2")) ->Record.GetNode() .GetId(); - tablet.CreateNode(TCreateNodeArgs::File(id3, "test4")); - tablet.CreateNode(TCreateNodeArgs::File(id3, "test5")); - tablet.CreateNode(TCreateNodeArgs::File(id3, "test6")); + tablet.CreateNode(TCreateNodeArgs::File(id2, "test3")); + tablet.CreateNode(TCreateNodeArgs::File(id2, "test4")); + tablet.CreateNode(TCreateNodeArgs::File(id2, "test5")); /* |- test1 |- test2 - |- test3 + |- test3 |- test4 |- test5 - |- test6 */ // The NodeRefs cache is still exhaustive thus list nodes should be a // cache hit - UNIT_ASSERT_VALUES_EQUAL(3, tablet.ListNodes(id3)->Record.NodesSize()); + UNIT_ASSERT_VALUES_EQUAL(3, tablet.ListNodes(id2)->Record.NodesSize()); UNIT_ASSERT_VALUES_EQUAL( - 3, + 2, tablet.ListNodes(RootNodeId)->Record.NodesSize()); statsAfter = GetTxStats(env, tablet); diff --git a/cloud/filestore/libs/storage/testlib/test_env.cpp b/cloud/filestore/libs/storage/testlib/test_env.cpp index 8ff9737af06..7a72c74629e 100644 --- a/cloud/filestore/libs/storage/testlib/test_env.cpp +++ b/cloud/filestore/libs/storage/testlib/test_env.cpp @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include @@ -226,7 +226,7 @@ ui32 TTestEnv::CreateNode(const TString& name) StatsRegistry, ProfileLog, TraceSerializer, - CreateCgroupStatsFetcherStub()); + CreateStatsFetcherStub()); auto indexServiceId = Runtime.Register( indexService.release(), nodeIdx, diff --git a/cloud/filestore/libs/vfs_fuse/loop.cpp b/cloud/filestore/libs/vfs_fuse/loop.cpp index 6c7678139ce..e14efd32b9c 100644 --- a/cloud/filestore/libs/vfs_fuse/loop.cpp +++ b/cloud/filestore/libs/vfs_fuse/loop.cpp @@ -477,7 +477,8 @@ class TSessionThread final { STORAGE_INFO("starting FUSE loop"); - ::NCloud::SetCurrentThreadName("FUSE"); + static std::atomic index = 0; + ::NCloud::SetCurrentThreadName("FUSE" + ToString(index++)); AtomicSet(ThreadId, pthread_self()); fuse_session_loop(Session); diff --git a/cloud/filestore/tests/fio_index/mount-kikimr-test/ya.make b/cloud/filestore/tests/fio_index/mount-kikimr-test/ya.make index 21f78a54a00..7bf21c9c797 100644 --- a/cloud/filestore/tests/fio_index/mount-kikimr-test/ya.make +++ b/cloud/filestore/tests/fio_index/mount-kikimr-test/ya.make @@ -15,9 +15,7 @@ TEST_SRCS( test.py ) -IF (SANITIZER_TYPE) - SKIP_TEST("Suppressed until issue 2831 is resolved") -ENDIF() +SKIP_TEST("Suppressed until issue 2831 is resolved") SET(NFS_FORCE_VERBOSE 1) diff --git a/cloud/filestore/tests/python/lib/daemon_config.py b/cloud/filestore/tests/python/lib/daemon_config.py index e09a558812c..1211bb9f6c8 100644 --- a/cloud/filestore/tests/python/lib/daemon_config.py +++ b/cloud/filestore/tests/python/lib/daemon_config.py @@ -46,6 +46,7 @@ def __init__( config_file=None, storage_config_file=None, app_config=None, + diag_config_file=None, profile_log=None, verbose=False, service_type=None, @@ -74,6 +75,11 @@ def __init__( self.__profile_log_path = self.__profile_file_path(profile_log) + if diag_config_file: + self.diag_config_file_path = self.__config_file_path(diag_config_file) + else: + self.diag_config_file_path = None + self.__restart_interval = restart_interval self.__restart_flag = restart_flag @@ -93,7 +99,7 @@ def __init__( with open(self.__app_config_file_path, "w") as config_file: if self.__app_config: config_file.write(MessageToString(self.__app_config)) - os.fsync(config_file) + config_file.flush() @property def port(self): @@ -282,7 +288,7 @@ def __write_configs(self): path = self.__config_file_path(name) with open(path, "w") as config_file: config_file.write(MessageToString(proto)) - os.fsync(config_file) + config_file.flush() def generate_configs(self, domains_txt, names_txt): self.__proto_configs = {} @@ -320,6 +326,11 @@ def generate_command(self): self.__profile_log_path, ] + self.generate_aux_params() + if self.diag_config_file_path: + command += [ + "--diag-file", self.diag_config_file_path + ] + if self.__service_type == "kikimr": command += [ "--domain", @@ -387,6 +398,7 @@ def __init__( restart_interval=None, access_service_port=0, storage_config=None, + diag_config_file=None, use_secure_registration=False, secure=False, access_service_type=AccessService, @@ -397,6 +409,7 @@ def __init__( config_file="server.txt", storage_config_file="storage.txt", app_config=app_config, + diag_config_file="diag.txt", profile_log="nfs-profile.log", service_type=service_type, verbose=verbose, @@ -412,6 +425,17 @@ def __init__( ic_port=ic_port ) + self.__diag_config = self.__generate_diag_txt() + with open(self.diag_config_file_path, "w") as config_file: + if self.__diag_config: + config_file.write(MessageToString(self.__diag_config)) + config_file.flush() + + def __generate_diag_txt(self): + diag = TDiagnosticsConfig() + diag.ProfileLogTimeThreshold = 100 + return diag + class FilestoreVhostConfigGenerator(FilestoreDaemonConfigGenerator): def __init__( @@ -430,12 +454,14 @@ def __init__( ic_port=None, access_service_type=AccessService, secure=False, + diag_config_file=None, ): super().__init__( binary_path, config_file="vhost.txt", storage_config_file="storage-nolocal.txt", app_config=app_config, + diag_config_file="diag.txt", profile_log="vhost-profile.log", service_type=service_type, verbose=verbose, @@ -453,9 +479,20 @@ def __init__( self.__local_service_port = self._port_manager.get_port() + self.__diag_config = self.__generate_diag_txt() + with open(self.diag_config_file_path, "w") as config_file: + if self.__diag_config: + config_file.write(MessageToString(self.__diag_config)) + config_file.flush() + def generate_aux_params(self): return ["--local-service-port", str(self.__local_service_port)] @property def local_service_port(self): return self.__local_service_port + + def __generate_diag_txt(self): + diag = TDiagnosticsConfig() + diag.ProfileLogTimeThreshold = 100 + return diag diff --git a/cloud/filestore/tests/recipes/service-kikimr/__main__.py b/cloud/filestore/tests/recipes/service-kikimr/__main__.py index 02d91a24962..de30f18bdd4 100644 --- a/cloud/filestore/tests/recipes/service-kikimr/__main__.py +++ b/cloud/filestore/tests/recipes/service-kikimr/__main__.py @@ -42,7 +42,7 @@ def start(argv): kikimr_binary_path = common.binary_path("cloud/storage/core/tools/testing/ydb/bin/ydbd") if args.kikimr_package_path is not None: - kikimr_binary_path = common.build_path("{}/Berkanavt/kikimr/bin/kikimr".format(args.kikimr_package_path)) + kikimr_binary_path = common.build_path("{}/ydbd".format(args.kikimr_package_path)) kikimr_configurator = KikimrConfigGenerator( erasure=None, diff --git a/cloud/storage/core/libs/aio/service.cpp b/cloud/storage/core/libs/aio/service.cpp index 5bb1b50c2ec..e062b7a92c5 100644 --- a/cloud/storage/core/libs/aio/service.cpp +++ b/cloud/storage/core/libs/aio/service.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -223,7 +224,9 @@ class TAIOService final void Run() { SetHighestThreadPriority(); - NCloud::SetCurrentThreadName("AIO"); + + static std::atomic index = 0; + NCloud::SetCurrentThreadName("AIO" + ToString(index++)); timespec timeout = WAIT_TIMEOUT; @@ -248,6 +251,62 @@ class TAIOService final } }; +//////////////////////////////////////////////////////////////////////////////// + +class TThreadedAIOService final + : public IFileIOService +{ +private: + TVector IoServices; + std::atomic NextService = 0; + +public: + TThreadedAIOService(ui32 threadCount, size_t maxEvents) + { + Y_ABORT_UNLESS(threadCount > 0); + + for (ui32 i = 0; i < threadCount; i++) { + IoServices.push_back(CreateAIOService(maxEvents)); + } + } + + void AsyncRead( + TFileHandle& file, + i64 offset, + TArrayRef buffer, + TFileIOCompletion* completion) override + { + auto index = NextService++; + IoServices[index % IoServices.size()] + ->AsyncRead(file, offset, buffer, completion); + } + + void AsyncWrite( + TFileHandle& file, + i64 offset, + TArrayRef buffer, + TFileIOCompletion* completion) override + { + auto index = NextService++; + IoServices[index % IoServices.size()] + ->AsyncWrite(file, offset, buffer, completion); + } + + void Start() override + { + for (auto& ioService: IoServices) { + ioService->Start(); + } + } + + void Stop() override + { + for (auto& ioService: IoServices) { + ioService->Stop(); + } + } +}; + } // namespace //////////////////////////////////////////////////////////////////////////////// @@ -257,4 +316,9 @@ IFileIOServicePtr CreateAIOService(size_t maxEvents) return std::make_shared(maxEvents); } +IFileIOServicePtr CreateThreadedAIOService(ui32 threadCount, size_t maxEvents) +{ + return std::make_shared(threadCount, maxEvents); +} + } // namespace NCloud diff --git a/cloud/storage/core/libs/aio/service.h b/cloud/storage/core/libs/aio/service.h index 53e67a769a9..5af7a6f916c 100644 --- a/cloud/storage/core/libs/aio/service.h +++ b/cloud/storage/core/libs/aio/service.h @@ -8,4 +8,6 @@ namespace NCloud { IFileIOServicePtr CreateAIOService(size_t maxEvents = 1024); +IFileIOServicePtr CreateThreadedAIOService(ui32 threadCount, size_t maxEvents = 1024); + } // namespace NCloud diff --git a/cloud/storage/core/libs/common/error.h b/cloud/storage/core/libs/common/error.h index 92ca5e4f961..636a861363b 100644 --- a/cloud/storage/core/libs/common/error.h +++ b/cloud/storage/core/libs/common/error.h @@ -252,10 +252,10 @@ concept TAcceptsError = requires(T a) }; template -T ErrorResponse(ui32 code, TString message) +T ErrorResponse(ui32 code, TString message, ui32 flags = 0) { T response; - *response.MutableError() = MakeError(code, std::move(message)); + *response.MutableError() = MakeError(code, std::move(message), flags); return response; } @@ -390,40 +390,48 @@ class TResultOrError class TErrorResponse { private: - const ui32 Code; - const TString Message; + NProto::TError Error; public: - TErrorResponse(ui32 code, TString message = {}) - : Code(code) - , Message(std::move(message)) + TErrorResponse(ui32 code, TString message = {}, ui32 flags = 0) + : Error(MakeError(code, std::move(message), flags)) {} TErrorResponse(const NProto::TError& e) - : Code(e.GetCode()) - , Message(e.GetMessage()) + : Error(e) + {} + + TErrorResponse(NProto::TError&& e) + : Error(std::move(e)) {} TErrorResponse(const TServiceError& e) - : Code(e.GetCode()) - , Message(e.GetMessage()) + : Error(MakeError(e.GetCode(), TString(e.GetMessage()))) {} template operator T() const { - return ErrorResponse(Code, Message); + return ErrorResponse( + Error.GetCode(), + Error.GetMessage(), + Error.GetFlags()); } template operator TResultOrError() const { - return MakeError(Code, Message); + return TResultOrError(Error); } operator NProto::TError() const { - return MakeError(Code, Message); + return Error; + } + + operator NProto::TError&& () && + { + return std::move(Error); } }; diff --git a/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.cpp b/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.cpp index e8aebada475..618f6445e8d 100644 --- a/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.cpp +++ b/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.cpp @@ -1,13 +1,11 @@ -#include "cgroup_stats_fetcher.h" +#include "stats_fetcher.h" +#include #include #include -#include #include #include -#include -#include #include namespace NCloud::NStorage { @@ -17,7 +15,7 @@ namespace { //////////////////////////////////////////////////////////////////////////////// struct TCgroupStatsFetcher final - : public ICgroupStatsFetcher + : public IStatsFetcher { private: const TString ComponentName; @@ -122,30 +120,11 @@ struct TCgroupStatsFetcher final } }; -//////////////////////////////////////////////////////////////////////////////// - -struct TCgroupStatsFetcherStub final - : public ICgroupStatsFetcher -{ - void Start() override - { - } - - void Stop() override - { - } - - TResultOrError GetCpuWait() override - { - return TDuration::Zero(); - } -}; - } // namespace //////////////////////////////////////////////////////////////////////////////// -ICgroupStatsFetcherPtr CreateCgroupStatsFetcher( +IStatsFetcherPtr CreateCgroupStatsFetcher( TString componentName, ILoggingServicePtr logging, TString statsFile) @@ -156,39 +135,4 @@ ICgroupStatsFetcherPtr CreateCgroupStatsFetcher( std::move(statsFile)); } -ICgroupStatsFetcherPtr CreateCgroupStatsFetcherStub() -{ - return std::make_shared(); -} - -TString BuildCpuWaitStatsFilename(const TString& serviceName) -{ - static constexpr auto CpuWaitStatsFilenameTemplate = - "/sys/fs/cgroup/cpu/system.slice/%s.service/cpuacct.wait"; - if (!serviceName.empty()) { - return Sprintf(CpuWaitStatsFilenameTemplate, serviceName.c_str()); - } - return {}; -} - -NCloud::NStorage::ICgroupStatsFetcherPtr BuildCgroupStatsFetcher( - TString cpuWaitFilename, - const TLog& log, - ILoggingServicePtr logging, - TString componentName) -{ - if (cpuWaitFilename.empty()) { - const auto& Log = log; - STORAGE_INFO( - "CpuWaitServiceName and CpuWaitFilename are empty, can't build " - "CgroupStatsFetcher"); - return CreateCgroupStatsFetcherStub(); - } - - return CreateCgroupStatsFetcher( - std::move(componentName), - std::move(logging), - std::move(cpuWaitFilename)); -}; - } // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher_ut.cpp b/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher_ut.cpp index 1c886361c41..3cfb512ef78 100644 --- a/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher_ut.cpp +++ b/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher_ut.cpp @@ -1,4 +1,4 @@ -#include "cgroup_stats_fetcher.h" +#include "stats_fetcher.h" #include "critical_events.h" diff --git a/cloud/storage/core/libs/diagnostics/public.h b/cloud/storage/core/libs/diagnostics/public.h index 09a483d193f..91b0af14d31 100644 --- a/cloud/storage/core/libs/diagnostics/public.h +++ b/cloud/storage/core/libs/diagnostics/public.h @@ -84,8 +84,8 @@ namespace NStorage { //////////////////////////////////////////////////////////////////////////////// -struct ICgroupStatsFetcher; -using ICgroupStatsFetcherPtr = std::shared_ptr; +struct IStatsFetcher; +using IStatsFetcherPtr = std::shared_ptr; } // namespace NStorage diff --git a/cloud/storage/core/libs/diagnostics/qemu_ut/bin/ya.make b/cloud/storage/core/libs/diagnostics/qemu_ut/bin/ya.make new file mode 100644 index 00000000000..37e16a42a35 --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/qemu_ut/bin/ya.make @@ -0,0 +1,9 @@ +UNITTEST_FOR(cloud/storage/core/libs/diagnostics) + +IF (OS_LINUX) + SRCS( + task_stats_fetcher_ut.cpp + ) +ENDIF() + +END() diff --git a/cloud/storage/core/libs/diagnostics/qemu_ut/test.py b/cloud/storage/core/libs/diagnostics/qemu_ut/test.py new file mode 100644 index 00000000000..8ef1266040a --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/qemu_ut/test.py @@ -0,0 +1,9 @@ +import yatest.common as common + +tests_bin = "cloud-storage-core-libs-diagnostics-qemu_ut-bin" +tests_bin_path = "cloud/storage/core/libs/diagnostics/qemu_ut/bin/" + tests_bin + + +def test_qemu_ut(): + test_tool = common.binary_path(tests_bin_path) + common.execute(test_tool) diff --git a/cloud/storage/core/libs/diagnostics/qemu_ut/ya.make b/cloud/storage/core/libs/diagnostics/qemu_ut/ya.make new file mode 100644 index 00000000000..8ea9261a722 --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/qemu_ut/ya.make @@ -0,0 +1,16 @@ +PY3TEST() + +INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/medium.inc) +SPLIT_FACTOR(1) + +DEPENDS( + cloud/storage/core/libs/diagnostics/qemu_ut/bin +) + +TEST_SRCS( + test.py +) + +INCLUDE(${ARCADIA_ROOT}/cloud/storage/core/tests/recipes/qemu.inc) + +END() diff --git a/cloud/storage/core/libs/diagnostics/stats_fetcher.cpp b/cloud/storage/core/libs/diagnostics/stats_fetcher.cpp new file mode 100644 index 00000000000..6770576f501 --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/stats_fetcher.cpp @@ -0,0 +1,79 @@ +#include "stats_fetcher.h" + +#include +#include + +#include + +namespace NCloud::NStorage { + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +struct TStatsFetcherStub final + : public IStatsFetcher +{ + void Start() override + { + } + + void Stop() override + { + } + + TResultOrError GetCpuWait() override + { + return TDuration::Zero(); + } +}; + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +IStatsFetcherPtr CreateStatsFetcherStub() +{ + return std::make_shared(); +} + +TString BuildCpuWaitStatsFilename(const TString& serviceName) +{ + static constexpr auto CpuWaitStatsFilenameTemplate = + "/sys/fs/cgroup/cpu/system.slice/%s.service/cpuacct.wait"; + if (!serviceName.empty()) { + return Sprintf(CpuWaitStatsFilenameTemplate, serviceName.c_str()); + } + return {}; +} + +IStatsFetcherPtr BuildStatsFetcher( + NProto::EStatsFetcherType statsFetcherType, + const TString& cpuWaitFilename, + const TLog& log, + ILoggingServicePtr logging) +{ + switch (statsFetcherType) { + case NCloud::NProto::CGROUP: { + if (cpuWaitFilename.empty()) { + const auto& Log = log; + STORAGE_INFO( + "CpuWaitFilename is empty, can't build " + "CgroupStatsFetcher"); + return CreateStatsFetcherStub(); + } + + return CreateCgroupStatsFetcher( + "STORAGE_STATS", + std::move(logging), + std::move(cpuWaitFilename)); + } + case NCloud::NProto::TASKSTATS: + return CreateTaskStatsFetcher( + "STORAGE_STATS", + std::move(logging), + getpid()); + } +} + +} // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h b/cloud/storage/core/libs/diagnostics/stats_fetcher.h similarity index 60% rename from cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h rename to cloud/storage/core/libs/diagnostics/stats_fetcher.h index 19051ce8621..9f5b7ef7ac3 100644 --- a/cloud/storage/core/libs/diagnostics/cgroup_stats_fetcher.h +++ b/cloud/storage/core/libs/diagnostics/stats_fetcher.h @@ -4,6 +4,7 @@ #include #include +#include #include @@ -17,31 +18,36 @@ namespace NCloud::NStorage { //////////////////////////////////////////////////////////////////////////////// -struct ICgroupStatsFetcher +struct IStatsFetcher : public IStartable { - virtual ~ICgroupStatsFetcher() = default; + virtual ~IStatsFetcher() = default; virtual TResultOrError GetCpuWait() = 0; }; -using ICgroupStatsFetcherPtr = std::shared_ptr; +using IStatsFetcherPtr = std::shared_ptr; //////////////////////////////////////////////////////////////////////////////// -ICgroupStatsFetcherPtr CreateCgroupStatsFetcher( +IStatsFetcherPtr CreateCgroupStatsFetcher( TString componentName, ILoggingServicePtr logging, TString statsFile); -ICgroupStatsFetcherPtr CreateCgroupStatsFetcherStub(); +IStatsFetcherPtr CreateTaskStatsFetcher( + TString componentName, + ILoggingServicePtr logging, + int pid); + +IStatsFetcherPtr CreateStatsFetcherStub(); TString BuildCpuWaitStatsFilename(const TString& serviceName); -ICgroupStatsFetcherPtr BuildCgroupStatsFetcher( - TString cpuWaitFilename, +IStatsFetcherPtr BuildStatsFetcher( + NProto::EStatsFetcherType statsFetcherType, + const TString& cpuWaitFilename, const TLog& log, - ILoggingServicePtr logging, - TString componentName); + ILoggingServicePtr logging); } // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/diagnostics/task_stats_fetcher.cpp b/cloud/storage/core/libs/diagnostics/task_stats_fetcher.cpp new file mode 100644 index 00000000000..b929cb6bea1 --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/task_stats_fetcher.cpp @@ -0,0 +1,252 @@ +#include "stats_fetcher.h" + +#include +#include + +#include +#include +#include + +#include +#include + +namespace NCloud::NStorage { + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +void ValidateAttribute(const ::nlattr& attribute, ui16 expectedAttribute) +{ + if (attribute.nla_type != expectedAttribute) { + throw yexception() << "Invalid attribute type: " << attribute.nla_type + << " Expected attribute type: " << expectedAttribute; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +// Documentation: +// https://github.com/torvalds/linux/blob/master/Documentation/accounting/taskstats.rst + +#pragma pack(push, NLMSG_ALIGNTO) + +struct TTaskStatsFamilyIdRequest +{ + ::nlmsghdr MessageHeader = + {sizeof(TTaskStatsFamilyIdRequest), GENL_ID_CTRL, NLM_F_REQUEST, 0, 0}; + ::genlmsghdr GenericHeader = {CTRL_CMD_GETFAMILY, 1, 0}; + ::nlattr FamilyNameAttr = { + sizeof(FamilyName) + NLA_HDRLEN, + CTRL_ATTR_FAMILY_NAME}; + const char FamilyName[sizeof(TASKSTATS_GENL_NAME)] = TASKSTATS_GENL_NAME; +}; + +struct TTaskStatsFamilyIdResponse +{ + ::nlmsghdr MessageHeader; + ::genlmsghdr GenericHeader; + ::nlattr FamilyNameAttr; + char FamilyName[sizeof(TASKSTATS_GENL_NAME)]; + alignas(NLMSG_ALIGNTO)::nlattr FamilyIdAttr; + ui16 FamilyId; + + void Validate() + { + ValidateAttribute(FamilyNameAttr, CTRL_ATTR_FAMILY_NAME); + ValidateAttribute(FamilyIdAttr, CTRL_ATTR_FAMILY_ID); + } +}; + +struct TTaskStatsRequest +{ + ::nlmsghdr MessageHeader; + ::genlmsghdr GenericHeader; + ::nlattr PidAttr; + ui32 Pid; + + TTaskStatsRequest(ui16 familyId, ui32 pid) + : MessageHeader{sizeof(TTaskStatsRequest), familyId, NLM_F_REQUEST, 0, 0} + , GenericHeader{TASKSTATS_CMD_GET, 1, 0} + , PidAttr{sizeof(Pid) + NLA_HDRLEN, TASKSTATS_CMD_ATTR_PID} + , Pid(pid) + {} +}; + +struct TTaskStatsResponse +{ + ::nlmsghdr MessageHeader; + ::genlmsghdr GenericHeader; + ::nlattr AggrPidAttr; + ::nlattr PidAttr; + ui32 Pid; + ::nlattr TaskStatsAttr; + ::taskstats TaskStats; + + void Validate() + { + ValidateAttribute(AggrPidAttr, TASKSTATS_TYPE_AGGR_PID); + ValidateAttribute(PidAttr, TASKSTATS_TYPE_PID); + ValidateAttribute(TaskStatsAttr, TASKSTATS_TYPE_STATS); + } +}; + +#pragma pack(pop) + +//////////////////////////////////////////////////////////////////////////////// + +template +union TNetlinkResponse { + T Msg; + ui8 Buffer[MaxMsgSize]; + + TNetlinkResponse() { + static_assert(sizeof(T) < MaxMsgSize); + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TNetlinkSocket +{ +private: + TSocket Socket; + ui32 SocketTimeoutMs = 100; + +public: + TNetlinkSocket(ui32 socketTimeoutMs = 100) + : Socket(::socket(PF_NETLINK, SOCK_RAW, NETLINK_GENERIC)) + , SocketTimeoutMs(socketTimeoutMs) + { + if (Socket < 0) { + throw yexception() << "Failed to create netlink socket"; + } + Socket.SetSocketTimeout(0, SocketTimeoutMs); + } + + template + void Send(const TNetlinkMessage& msg) + { + auto ret = Socket.Send(&msg, sizeof(msg)); + if (ret == -1) { + throw yexception() + << "Failed to send netlink message: " << strerror(errno); + } + } + + template + void Receive(TNetlinkResponse& response) + { + auto ret = Socket.Recv(&response, sizeof(response)); + if (ret < 0) { + throw yexception() + << "Failed to receive netlink message: " << strerror(errno); + } + + if (response.Msg.MessageHeader.nlmsg_type == NLMSG_ERROR) { + throw yexception() + << "Failed to receive netlink message: kernel returned error"; + } + + if (!NLMSG_OK(&response.Msg.MessageHeader, ret)) { + throw yexception() + << "Failed to parse netlink message: incorrect format"; + } + return; + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +struct TTaskStatsFetcher final: public IStatsFetcher +{ +private: + const TString ComponentName; + const ILoggingServicePtr Logging; + int Pid; + TLog Log; + const TDuration NetlinkSocketTimeout = TDuration::Seconds(1); + TDuration Last; + ui16 FamilyId; + + ui16 GetFamilyId() + { + TNetlinkSocket socket; + socket.Send(TTaskStatsFamilyIdRequest()); + TNetlinkResponse response; + socket.Receive(response); + response.Msg.Validate(); + return response.Msg.FamilyId; + } + +public: + TTaskStatsFetcher( + TString componentName, + ILoggingServicePtr logging, + int pid) + : ComponentName(std::move(componentName)) + , Logging(std::move(logging)) + , Pid(pid) + , FamilyId(0) + { + } + + ~TTaskStatsFetcher() override + { + Stop(); + } + + void Start() override + { + Log = Logging->CreateLog(ComponentName); + } + + void Stop() override + { + } + + TResultOrError GetCpuWait() override + { + try { + if (FamilyId == 0) { + FamilyId = GetFamilyId(); + } + + TNetlinkSocket socket; + socket.Send(TTaskStatsRequest(FamilyId, Pid)); + TNetlinkResponse response; + socket.Receive(response); + response.Msg.Validate(); + auto cpuLack = TDuration::MilliSeconds( + response.Msg.TaskStats.cpu_delay_total / 1000); + auto retval = cpuLack - Last; + Last = cpuLack; + return retval; + } catch (...) { + auto errorMessage = BuildErrorMessageFromException(); + return MakeError(E_FAIL, errorMessage); + } + } + + TString BuildErrorMessageFromException() + { + auto msg = TStringBuilder() << "IO error"; + msg << " with exception " << CurrentExceptionMessage(); + return msg; + } +}; + +} // namespace + +IStatsFetcherPtr CreateTaskStatsFetcher( + TString componentName, + ILoggingServicePtr logging, + int pid) +{ + return std::make_shared( + std::move(componentName), + std::move(logging), + pid); +} + +} // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/diagnostics/task_stats_fetcher_ut.cpp b/cloud/storage/core/libs/diagnostics/task_stats_fetcher_ut.cpp new file mode 100644 index 00000000000..eb329ac2102 --- /dev/null +++ b/cloud/storage/core/libs/diagnostics/task_stats_fetcher_ut.cpp @@ -0,0 +1,39 @@ +#include "stats_fetcher.h" + +#include "critical_events.h" + +#include +#include + +#include +#include + +#include + +namespace NCloud::NStorage { + +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +const TString ComponentName = "STORAGE_STATS"; + +} // namespace + +//////////////////////////////////////////////////////////////////////////////// + +Y_UNIT_TEST_SUITE(TaskStatsFetcherTest) +{ + Y_UNIT_TEST(ShouldGetCpuWait) + { + auto fetcher = CreateTaskStatsFetcher( + ComponentName, + CreateLoggingService("console"), + getpid()); + fetcher->Start(); + auto [cpuWait, error] = fetcher->GetCpuWait(); + UNIT_ASSERT_C(!HasError(error), error); + } +} + +} // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/diagnostics/ya.make b/cloud/storage/core/libs/diagnostics/ya.make index 78876ea5f3a..57be7f8c1ac 100644 --- a/cloud/storage/core/libs/diagnostics/ya.make +++ b/cloud/storage/core/libs/diagnostics/ya.make @@ -2,8 +2,8 @@ LIBRARY() SRCS( busy_idle_calculator.cpp - cgroup_stats_fetcher.cpp counters_helper.cpp + cgroup_stats_fetcher.cpp critical_events.cpp executor_counters.cpp histogram_types.cpp @@ -15,7 +15,9 @@ SRCS( postpone_time_predictor.cpp request_counters.cpp solomon_counters.cpp + stats_fetcher.cpp stats_updater.cpp + task_stats_fetcher.cpp trace_processor_mon.cpp trace_processor.cpp trace_reader.cpp @@ -46,4 +48,5 @@ PEERDIR( END() +RECURSE_FOR_TESTS(qemu_ut) RECURSE_FOR_TESTS(ut) diff --git a/cloud/storage/core/libs/hive_proxy/hive_proxy_events_private.h b/cloud/storage/core/libs/hive_proxy/hive_proxy_events_private.h index 3657b010784..18fa768bd24 100644 --- a/cloud/storage/core/libs/hive_proxy/hive_proxy_events_private.h +++ b/cloud/storage/core/libs/hive_proxy/hive_proxy_events_private.h @@ -2,6 +2,8 @@ #include "public.h" +#include "tablet_boot_info.h" + #include #include @@ -95,6 +97,24 @@ struct TEvHiveProxyPrivate {} }; + // + // ListTabletBootInfoBackups + // + + struct TListTabletBootInfoBackupsRequest + { + }; + + struct TListTabletBootInfoBackupsResponse + { + TVector TabletBootInfos; + + explicit TListTabletBootInfoBackupsResponse( + TVector tabletBootInfos) + : TabletBootInfos(std::move(tabletBootInfos)) + {} + }; + // // Events declaration // @@ -110,6 +130,8 @@ struct TEvHiveProxyPrivate EvReadTabletBootInfoBackupRequest, EvReadTabletBootInfoBackupResponse, EvUpdateTabletBootInfoBackupRequest, + EvListTabletBootInfoBackupsRequest, + EvListTabletBootInfoBackupsResponse, EvEnd }; @@ -128,6 +150,10 @@ struct TEvHiveProxyPrivate TReadTabletBootInfoBackupResponse, EvReadTabletBootInfoBackupResponse>; using TEvUpdateTabletBootInfoBackupRequest = TRequestEvent< TUpdateTabletBootInfoBackupRequest, EvUpdateTabletBootInfoBackupRequest>; + using TEvListTabletBootInfoBackupsRequest = TRequestEvent< + TListTabletBootInfoBackupsRequest, EvListTabletBootInfoBackupsRequest>; + using TEvListTabletBootInfoBackupsResponse = TRequestEvent< + TListTabletBootInfoBackupsResponse, EvListTabletBootInfoBackupsResponse>; }; } // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/hive_proxy/tablet_boot_info.h b/cloud/storage/core/libs/hive_proxy/tablet_boot_info.h new file mode 100644 index 00000000000..2e6e8170153 --- /dev/null +++ b/cloud/storage/core/libs/hive_proxy/tablet_boot_info.h @@ -0,0 +1,26 @@ +#pragma once + +#include "public.h" + +#include + +namespace NCloud::NStorage { + +//////////////////////////////////////////////////////////////////////////////// + +struct TTabletBootInfo +{ + TTabletBootInfo() = default; + + TTabletBootInfo( + NKikimr::TTabletStorageInfoPtr storageInfo, + ui64 suggestedGeneration) + : StorageInfo(std::move(storageInfo)) + , SuggestedGeneration(suggestedGeneration) + {} + + NKikimr::TTabletStorageInfoPtr StorageInfo; + ui64 SuggestedGeneration = 0; +}; + +} // namespace NCloud::NStorage diff --git a/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.cpp b/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.cpp index 3d50728db5d..ea9eed5d417 100644 --- a/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.cpp +++ b/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.cpp @@ -187,6 +187,23 @@ void TTabletBootInfoBackup::HandleBackupTabletBootInfos( NCloud::Reply(ctx, *ev, std::move(response)); } +void TTabletBootInfoBackup::HandleListTabletBootInfoBackups( + const TEvHiveProxyPrivate::TEvListTabletBootInfoBackupsRequest::TPtr& ev, + const TActorContext& ctx) +{ + TVector infos; + for (const auto& [_, info]: BackupProto.GetData()) { + infos.emplace_back( + NKikimr::TabletStorageInfoFromProto(info.GetStorageInfo()), + info.GetSuggestedGeneration()); + } + + auto response = + std::make_unique( + std::move(infos)); + NCloud::Reply(ctx, *ev, std::move(response)); +} + //////////////////////////////////////////////////////////////////////////////// STFUNC(TTabletBootInfoBackup::StateWork) @@ -196,6 +213,7 @@ STFUNC(TTabletBootInfoBackup::StateWork) HFunc(TEvHiveProxyPrivate::TEvReadTabletBootInfoBackupRequest, HandleReadTabletBootInfoBackup); HFunc(TEvHiveProxyPrivate::TEvUpdateTabletBootInfoBackupRequest, HandleUpdateTabletBootInfoBackup); HFunc(TEvHiveProxy::TEvBackupTabletBootInfosRequest, HandleBackupTabletBootInfos); + HFunc(TEvHiveProxyPrivate::TEvListTabletBootInfoBackupsRequest, HandleListTabletBootInfoBackups); default: HandleUnexpectedEvent(ev, LogComponent); break; diff --git a/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.h b/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.h index 17e21733acb..458e7a256f3 100644 --- a/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.h +++ b/cloud/storage/core/libs/hive_proxy/tablet_boot_info_backup.h @@ -21,23 +21,6 @@ namespace NCloud::NStorage { //////////////////////////////////////////////////////////////////////////////// -struct TTabletBootInfo -{ - TTabletBootInfo() = default; - - TTabletBootInfo( - NKikimr::TTabletStorageInfoPtr storageInfo, - ui64 suggestedGeneration) - : StorageInfo(std::move(storageInfo)) - , SuggestedGeneration(suggestedGeneration) - {} - - NKikimr::TTabletStorageInfoPtr StorageInfo; - ui64 SuggestedGeneration = 0; -}; - -//////////////////////////////////////////////////////////////////////////////// - class TTabletBootInfoBackup final : public NActors::TActorBootstrapped { @@ -78,6 +61,10 @@ class TTabletBootInfoBackup final void HandleBackupTabletBootInfos( const TEvHiveProxy::TEvBackupTabletBootInfosRequest::TPtr& ev, const NActors::TActorContext& ctx); + + void HandleListTabletBootInfoBackups( + const TEvHiveProxyPrivate::TEvListTabletBootInfoBackupsRequest::TPtr& ev, + const NActors::TActorContext& ctx); }; } // namespace NCloud::NStorage diff --git a/cloud/storage/core/protos/diagnostics.proto b/cloud/storage/core/protos/diagnostics.proto new file mode 100644 index 00000000000..79cfffcc6b5 --- /dev/null +++ b/cloud/storage/core/protos/diagnostics.proto @@ -0,0 +1,15 @@ +syntax = "proto2"; + +package NCloud.NProto; + +option go_package = "github.com/ydb-platform/nbs/cloud/storage/core/protos"; + +//////////////////////////////////////////////////////////////////////////////// +// CPU stats montitoring type + +enum EStatsFetcherType +{ + CGROUP = 0; + TASKSTATS = 1; +}; + diff --git a/cloud/storage/core/protos/ya.make b/cloud/storage/core/protos/ya.make index 1546617f38e..84f654a146d 100644 --- a/cloud/storage/core/protos/ya.make +++ b/cloud/storage/core/protos/ya.make @@ -11,6 +11,7 @@ SRCS( authorization_mode.proto certificate.proto config_dispatcher_settings.proto + diagnostics.proto endpoints.proto error.proto media.proto diff --git a/cloud/storage/core/tools/analytics/cpu-wait-monitor/main.cpp b/cloud/storage/core/tools/analytics/cpu-wait-monitor/main.cpp index e8663e38414..e63a6448e36 100644 --- a/cloud/storage/core/tools/analytics/cpu-wait-monitor/main.cpp +++ b/cloud/storage/core/tools/analytics/cpu-wait-monitor/main.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/contrib/libs/grpc/src/core/lib/iomgr/tcp_client_posix.cc b/contrib/libs/grpc/src/core/lib/iomgr/tcp_client_posix.cc index 720cf269f2c..4e437e034cb 100644 --- a/contrib/libs/grpc/src/core/lib/iomgr/tcp_client_posix.cc +++ b/contrib/libs/grpc/src/core/lib/iomgr/tcp_client_posix.cc @@ -329,6 +329,7 @@ int64_t grpc_tcp_client_create_from_prepared_fd( err = connect(fd, reinterpret_cast(addr->addr), addr->len); } while (err < 0 && errno == EINTR); + int connect_errno = (err < 0) ? errno : 0; auto addr_uri = grpc_sockaddr_to_uri(addr); if (!addr_uri.ok()) { @@ -340,7 +341,7 @@ int64_t grpc_tcp_client_create_from_prepared_fd( TString name = y_absl::StrCat("tcp-client:", addr_uri.value()); grpc_fd* fdobj = grpc_fd_create(fd, name.c_str(), true); int64_t connection_id = 0; - if (errno == EWOULDBLOCK || errno == EINPROGRESS) { + if (connect_errno == EWOULDBLOCK || connect_errno == EINPROGRESS) { // Connection is still in progress. connection_id = g_connection_id.fetch_add(1, std::memory_order_acq_rel); } @@ -352,10 +353,10 @@ int64_t grpc_tcp_client_create_from_prepared_fd( grpc_core::ExecCtx::Run(DEBUG_LOCATION, closure, y_absl::OkStatus()); return 0; } - if (errno != EWOULDBLOCK && errno != EINPROGRESS) { + if (connect_errno != EWOULDBLOCK && connect_errno != EINPROGRESS) { // Connection already failed. Return 0 to discourage any cancellation // attempts. - grpc_error_handle error = GRPC_OS_ERROR(errno, "connect"); + grpc_error_handle error = GRPC_OS_ERROR(connect_errno, "connect"); error = grpc_error_set_str( error, grpc_core::StatusStrProperty::kTargetAddress, addr_uri.value()); grpc_fd_orphan(fdobj, nullptr, nullptr, "tcp_client_connect_error"); diff --git a/example/0-setup.sh b/example/0-setup.sh index 3b67a81a6d5..09971cb5700 100755 --- a/example/0-setup.sh +++ b/example/0-setup.sh @@ -77,6 +77,13 @@ FileDevices: { BlockSize: 4096 } +ThrottlingConfig: { + InfraThrottlingConfigPath: "nbs/nbs-throttling.json" + DefaultNetworkMbitThroughput: 100 + DirectCopyBandwidthFraction: 0.5 + MaxDeviceBandwidthMiB: 15 +} + EOF cat > $BIN_DIR/nbs/nbs-location-$1.txt < $BIN_DIR/nbs/nbs-location-0.txt <