From a103bc2df39f5ae61b976406132168634b1a4c2a Mon Sep 17 00:00:00 2001 From: Evgenik2 Date: Tue, 30 Sep 2025 17:34:23 +0300 Subject: [PATCH] Add overrides in state storage self heal (#26064) --- ydb/core/blobstorage/nodewarden/distconf.h | 5 ++- .../nodewarden/distconf_generate.cpp | 7 ++-- .../blobstorage/nodewarden/distconf_invoke.h | 4 +-- .../distconf_invoke_state_storage.cpp | 21 +++++------ ...distconf_statestorage_config_generator.cpp | 30 ++++++++++------ .../distconf_statestorage_config_generator.h | 7 +++- .../blobstorage/nodewarden/distconf_ut.cpp | 36 ++++++++++++++++--- ydb/core/cms/config.h | 6 ++++ ydb/core/cms/sentinel.cpp | 2 ++ .../blobstorage_distributed_config.proto | 6 ++++ ydb/core/protos/cms.proto | 2 ++ .../test_distconf_sentinel_node_status.py | 20 ++++++++++- 12 files changed, 114 insertions(+), 32 deletions(-) diff --git a/ydb/core/blobstorage/nodewarden/distconf.h b/ydb/core/blobstorage/nodewarden/distconf.h index b6794e174aaf..0c2b9c5ebae7 100644 --- a/ydb/core/blobstorage/nodewarden/distconf.h +++ b/ydb/core/blobstorage/nodewarden/distconf.h @@ -469,7 +469,10 @@ namespace NKikimr::NStorage { bool GenerateStateStorageConfig(NKikimrConfig::TDomainsConfig::TStateStorage *ss , const NKikimrBlobStorage::TStorageConfig& baseConfig , std::unordered_set& usedNodes - , const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig = {}); + , const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig = {} + , ui32 overrideReplicasInRingCount = 0 + , ui32 overrideRingsCount = 0 + ); //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Bridge ops diff --git a/ydb/core/blobstorage/nodewarden/distconf_generate.cpp b/ydb/core/blobstorage/nodewarden/distconf_generate.cpp index 03fb7219b153..fc1406776e98 100644 --- a/ydb/core/blobstorage/nodewarden/distconf_generate.cpp +++ b/ydb/core/blobstorage/nodewarden/distconf_generate.cpp @@ -583,7 +583,10 @@ namespace NKikimr::NStorage { bool TDistributedConfigKeeper::GenerateStateStorageConfig(NKikimrConfig::TDomainsConfig::TStateStorage *ss , const NKikimrBlobStorage::TStorageConfig& baseConfig, std::unordered_set& usedNodes - , const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig) { + , const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig + , ui32 overrideReplicasInRingCount + , ui32 overrideRingsCount + ) { std::map>>> nodes; bool goodConfig = true; for (const auto& node : baseConfig.GetAllNodes()) { @@ -592,7 +595,7 @@ namespace NKikimr::NStorage { nodes[pileId][location.GetDataCenterId()].emplace_back(node.GetNodeId(), location); } for (auto& [pileId, nodesByDataCenter] : nodes) { - TStateStoragePerPileGenerator generator(nodesByDataCenter, SelfHealNodesState, pileId, usedNodes, oldConfig); + TStateStoragePerPileGenerator generator(nodesByDataCenter, SelfHealNodesState, pileId, usedNodes, oldConfig, overrideReplicasInRingCount, overrideRingsCount); generator.AddRingGroup(ss); goodConfig &= generator.IsGoodConfig(); } diff --git a/ydb/core/blobstorage/nodewarden/distconf_invoke.h b/ydb/core/blobstorage/nodewarden/distconf_invoke.h index 59598425717f..dca0e49ef7e4 100644 --- a/ydb/core/blobstorage/nodewarden/distconf_invoke.h +++ b/ydb/core/blobstorage/nodewarden/distconf_invoke.h @@ -107,12 +107,12 @@ namespace NKikimr::NStorage { void ReassignStateStorageNode(const TQuery::TReassignStateStorageNode& cmd); void ReconfigStateStorage(const NKikimrBlobStorage::TStateStorageConfig& cmd); void SelfHealStateStorage(const TQuery::TSelfHealStateStorage& cmd); - void SelfHealStateStorage(ui32 waitForConfigStep, bool forceHeal, bool pileupReplicas); + void SelfHealStateStorage(ui32 waitForConfigStep, bool forceHeal, bool pileupReplicas, ui32 overrideReplicasInRingCount, ui32 overrideRingsCount); void SelfHealNodesStateUpdate(const TQuery::TSelfHealNodesStateUpdate& cmd); void GetStateStorageConfig(const TQuery::TGetStateStorageConfig& cmd); void GetCurrentStateStorageConfig(NKikimrBlobStorage::TStateStorageConfig* currentConfig, bool getNodesState); - bool GetRecommendedStateStorageConfig(NKikimrBlobStorage::TStateStorageConfig* currentConfig, bool pileupReplicas); + bool GetRecommendedStateStorageConfig(NKikimrBlobStorage::TStateStorageConfig* currentConfig, bool pileupReplicas, ui32 overrideReplicasInRingCount, ui32 overrideRingsCount); void AdjustRingGroupActorIdOffsetInRecommendedStateStorageConfig(NKikimrBlobStorage::TStateStorageConfig* currentConfig); //////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Storage configuration YAML manipulation diff --git a/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp b/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp index ef9ab8fe8cf7..19e645d9c15a 100644 --- a/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp +++ b/ydb/core/blobstorage/nodewarden/distconf_invoke_state_storage.cpp @@ -9,19 +9,19 @@ namespace NKikimr::NStorage { using TInvokeRequestHandlerActor = TDistributedConfigKeeper::TInvokeRequestHandlerActor; - bool TInvokeRequestHandlerActor::GetRecommendedStateStorageConfig(NKikimrBlobStorage::TStateStorageConfig* currentConfig, bool pileupReplicas) { + bool TInvokeRequestHandlerActor::GetRecommendedStateStorageConfig(NKikimrBlobStorage::TStateStorageConfig* currentConfig, bool pileupReplicas, ui32 overrideReplicasInRingCount, ui32 overrideRingsCount) { const NKikimrBlobStorage::TStorageConfig& config = *Self->StorageConfig; bool result = true; std::unordered_set usedNodes; - result &= Self->GenerateStateStorageConfig(currentConfig->MutableStateStorageConfig(), config, usedNodes, config.GetStateStorageConfig()); + result &= Self->GenerateStateStorageConfig(currentConfig->MutableStateStorageConfig(), config, usedNodes, config.GetStateStorageConfig(), overrideReplicasInRingCount, overrideRingsCount); if (pileupReplicas) { usedNodes.clear(); } - result &= Self->GenerateStateStorageConfig(currentConfig->MutableStateStorageBoardConfig(), config, usedNodes, config.GetStateStorageBoardConfig()); + result &= Self->GenerateStateStorageConfig(currentConfig->MutableStateStorageBoardConfig(), config, usedNodes, config.GetStateStorageBoardConfig(), overrideReplicasInRingCount, overrideRingsCount); if (pileupReplicas) { usedNodes.clear(); } - result &= Self->GenerateStateStorageConfig(currentConfig->MutableSchemeBoardConfig(), config, usedNodes, config.GetSchemeBoardConfig()); + result &= Self->GenerateStateStorageConfig(currentConfig->MutableSchemeBoardConfig(), config, usedNodes, config.GetSchemeBoardConfig(), overrideReplicasInRingCount, overrideRingsCount); return result; } @@ -97,7 +97,7 @@ namespace NKikimr::NStorage { auto* currentConfig = record->MutableStateStorageConfig(); if (cmd.GetRecommended()) { - GetRecommendedStateStorageConfig(currentConfig, cmd.GetPileupReplicas()); + GetRecommendedStateStorageConfig(currentConfig, cmd.GetPileupReplicas(), cmd.GetOverrideReplicasInRingCount(), cmd.GetOverrideRingsCount()); AdjustRingGroupActorIdOffsetInRecommendedStateStorageConfig(currentConfig); } else { GetCurrentStateStorageConfig(currentConfig, cmd.GetNodesState()); @@ -112,19 +112,20 @@ namespace NKikimr::NStorage { Self->SelfHealNodesState[node.GetNodeId()] = node.GetState(); } if (cmd.GetEnableSelfHealStateStorage()) { - SelfHealStateStorage(cmd.GetWaitForConfigStep(), true, cmd.GetPileupReplicas()); + SelfHealStateStorage(cmd.GetWaitForConfigStep(), true, cmd.GetPileupReplicas(), cmd.GetOverrideReplicasInRingCount(), cmd.GetOverrideRingsCount()); } } void TInvokeRequestHandlerActor::SelfHealStateStorage(const TQuery::TSelfHealStateStorage& cmd) { - SelfHealStateStorage(cmd.GetWaitForConfigStep(), cmd.GetForceHeal(), cmd.GetPileupReplicas()); + SelfHealStateStorage(cmd.GetWaitForConfigStep(), cmd.GetForceHeal(), cmd.GetPileupReplicas(), cmd.GetOverrideReplicasInRingCount(), cmd.GetOverrideRingsCount()); } - void TInvokeRequestHandlerActor::SelfHealStateStorage(ui32 waitForConfigStep, bool forceHeal, bool pileupReplicas) { + void TInvokeRequestHandlerActor::SelfHealStateStorage(ui32 waitForConfigStep, bool forceHeal, bool pileupReplicas, ui32 overrideReplicasInRingCount, ui32 overrideRingsCount) { RunCommonChecks(); - STLOG(PRI_DEBUG, BS_NODE, NW105, "TInvokeRequestHandlerActor::SelfHealStateStorage", (waitForConfigStep, waitForConfigStep), (forceHeal, forceHeal), (pileupReplicas, pileupReplicas)); + STLOG(PRI_DEBUG, BS_NODE, NW105, "TInvokeRequestHandlerActor::SelfHealStateStorage", (waitForConfigStep, waitForConfigStep), + (forceHeal, forceHeal), (pileupReplicas, pileupReplicas), (overrideReplicasInRingCount, overrideReplicasInRingCount), (overrideRingsCount, overrideRingsCount)); NKikimrBlobStorage::TStateStorageConfig targetConfig; - if (!GetRecommendedStateStorageConfig(&targetConfig, pileupReplicas) && !forceHeal) { + if (!GetRecommendedStateStorageConfig(&targetConfig, pileupReplicas, overrideReplicasInRingCount, overrideRingsCount) && !forceHeal) { throw TExError() << "Recommended configuration has faulty nodes and can not be applyed"; } diff --git a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp index e175348c621e..88b4cb8d867e 100644 --- a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp +++ b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.cpp @@ -15,11 +15,15 @@ namespace NKikimr::NStorage { , TBridgePileId pileId , std::unordered_set& usedNodes , const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig + , ui32 overrideReplicasInRingCount + , ui32 overrideRingsCount ) : PileId(pileId) , SelfHealNodesState(selfHealNodesState) , UsedNodes(usedNodes) , OldConfig(oldConfig) + , OverrideReplicasInRingCount(overrideReplicasInRingCount) + , OverrideRingsCount(overrideRingsCount) { FillNodeGroups(nodes); CalculateRingsParameters(); @@ -56,25 +60,29 @@ namespace NKikimr::NStorage { void TStateStoragePerPileGenerator::CalculateRingsParameters() { ui32 minNodesInGroup = NodeGroups[0].Nodes.size(); if (NodeGroups.size() == 1) { - if (minNodesInGroup < 5) { - RingsInGroupCount = minNodesInGroup; - NToSelect = minNodesInGroup < 3 ? 1 : 3; - } else { + RingsInGroupCount = OverrideRingsCount; + if (RingsInGroupCount == 0) RingsInGroupCount = minNodesInGroup < 8 ? minNodesInGroup : 8; - NToSelect = 5; + if (RingsInGroupCount > minNodesInGroup) { + RingsInGroupCount = minNodesInGroup; } - ReplicasInRingCount = 1 + minNodesInGroup / 1000; + NToSelect = RingsInGroupCount < 3 ? 1 : (RingsInGroupCount < 5 ? 3 : 5); + ReplicasInRingCount = OverrideReplicasInRingCount > 0 ? OverrideReplicasInRingCount : (1 + minNodesInGroup / 1000); } else { - RingsInGroupCount = minNodesInGroup < 3 ? 1 : 3; + if (OverrideRingsCount == 3 || OverrideRingsCount == 9) { + RingsInGroupCount = OverrideRingsCount / 3; + } else { + RingsInGroupCount = minNodesInGroup < 3 ? 1 : 3; + } NToSelect = RingsInGroupCount < 3 ? 3 : 9; ui32 nodesCnt = 0; for (auto& n : NodeGroups) { nodesCnt += n.Nodes.size(); } - ReplicasInRingCount = 1 + nodesCnt / 1000; - if (ReplicasInRingCount * RingsInGroupCount > minNodesInGroup) { - ReplicasInRingCount = 1; - } + ReplicasInRingCount = OverrideReplicasInRingCount > 0 ? OverrideReplicasInRingCount : (1 + nodesCnt / 1000); + } + if (ReplicasInRingCount * RingsInGroupCount > minNodesInGroup) { + ReplicasInRingCount = 1; } } diff --git a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h index 6a99b697872a..4f8f98c5aa60 100644 --- a/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h +++ b/ydb/core/blobstorage/nodewarden/distconf_statestorage_config_generator.h @@ -12,7 +12,10 @@ namespace NKikimr::NStorage { const std::unordered_map& selfHealNodesState, TBridgePileId pileId, std::unordered_set& usedNodes, - const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig); + const NKikimrConfig::TDomainsConfig::TStateStorage& oldConfig, + ui32 overrideReplicasInRingCount, + ui32 overrideRingsCount + ); bool IsGoodConfig() const; void AddRingGroup(NKikimrConfig::TDomainsConfig::TStateStorage *ss); @@ -41,5 +44,7 @@ namespace NKikimr::NStorage { ui32 RingsInGroupCount = 1; ui32 ReplicasInRingCount = 1; ui32 NToSelect = 1; + ui32 OverrideReplicasInRingCount = 0; + ui32 OverrideRingsCount = 0; }; } diff --git a/ydb/core/blobstorage/nodewarden/distconf_ut.cpp b/ydb/core/blobstorage/nodewarden/distconf_ut.cpp index 2d2dfd5049bf..c602e6560a5a 100644 --- a/ydb/core/blobstorage/nodewarden/distconf_ut.cpp +++ b/ydb/core/blobstorage/nodewarden/distconf_ut.cpp @@ -23,7 +23,7 @@ namespace NBlobStorageNodeWardenTest{ Y_UNIT_TEST_SUITE(TDistconfGenerateConfigTest) { - NKikimrConfig::TDomainsConfig::TStateStorage GenerateSimpleStateStorage(ui32 nodes, std::unordered_set usedNodes = {}) { + NKikimrConfig::TDomainsConfig::TStateStorage GenerateSimpleStateStorage(ui32 nodes, std::unordered_set usedNodes = {}, ui32 overrideReplicasInRingCount = 0, ui32 overrideRingsCount = 0) { NKikimr::NStorage::TDistributedConfigKeeper keeper(nullptr, nullptr, true); NKikimrConfig::TDomainsConfig::TStateStorage ss; NKikimrBlobStorage::TStorageConfig config; @@ -31,11 +31,21 @@ Y_UNIT_TEST_SUITE(TDistconfGenerateConfigTest) { auto *node = config.AddAllNodes(); node->SetNodeId(i + 1); } - keeper.GenerateStateStorageConfig(&ss, config, usedNodes); + keeper.GenerateStateStorageConfig(&ss, config, usedNodes, {}, overrideReplicasInRingCount, overrideRingsCount); return ss; } - NKikimrConfig::TDomainsConfig::TStateStorage GenerateDCStateStorage(ui32 dcCnt, ui32 racksCnt, ui32 nodesInRack, std::unordered_map nodesState = {}, std::unordered_set usedNodes = {}, std::vector oldConfig = {}, ui32 oldNToSelect = 9) { + NKikimrConfig::TDomainsConfig::TStateStorage GenerateDCStateStorage( + ui32 dcCnt + , ui32 racksCnt + , ui32 nodesInRack + , std::unordered_map nodesState = {} + , std::unordered_set usedNodes = {} + , std::vector oldConfig = {} + , ui32 oldNToSelect = 9 + , ui32 overrideReplicasInRingCount = 0 + , ui32 overrideRingsCount = 0 + ) { NKikimrBlobStorage::TStorageConfig config; ui32 nodeId = 1; NKikimr::NStorage::TDistributedConfigKeeper keeper(nullptr, nullptr, true); @@ -63,7 +73,7 @@ Y_UNIT_TEST_SUITE(TDistconfGenerateConfigTest) { for (auto [nodeId, state] : nodesState) { keeper.SelfHealNodesState[nodeId] = state; } - keeper.GenerateStateStorageConfig(&ss, config, usedNodes, oldSS); + keeper.GenerateStateStorageConfig(&ss, config, usedNodes, oldSS, overrideReplicasInRingCount, overrideRingsCount); return ss; } @@ -173,6 +183,24 @@ Y_UNIT_TEST_SUITE(TDistconfGenerateConfigTest) { // DC disconnected - use previous config for this DC CheckStateStorage(GenerateDCStateStorage(3, 3, 3, { {10, 2}, {11, 4}, {13, 3}, {14, 4}, {15, 2} }, {}, {1, 5, 8, 10, 14, 17, 19, 22, 25}, 9), 9, {1, 4, 7, 10, 14, 17, 19, 22, 25}); } + + Y_UNIT_TEST(GenerateConfigReplicasDensity) { + CheckStateStorage(GenerateSimpleStateStorage(100, {}, 0, 0), 5, {1, 2, 3, 4, 5, 6, 7, 8}); + CheckStateStorage(GenerateSimpleStateStorage(100, {}, 1, 1), 1, {1}); + CheckStateStorage2(GenerateSimpleStateStorage(100, {}, 3, 3), + "{ RingGroups { NToSelect: 3 Ring { Node: 1 Node: 2 Node: 3 }" + " Ring { Node: 4 Node: 5 Node: 6 } Ring { Node: 7 Node: 8 Node: 9 } } }"); + CheckStateStorage(GenerateSimpleStateStorage(100, {}, 20, 10), 5, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + CheckStateStorage2(GenerateSimpleStateStorage(16, {}, 4, 2), "{ RingGroups { NToSelect: 1 Ring { Node: 1 Node: 2 Node: 3 Node: 4 } Ring { Node: 5 Node: 6 Node: 7 Node: 8 } } }"); + CheckStateStorage(GenerateDCStateStorage(3, 3, 3, {}, {}, {}, 9, 0, 0), 9, {1, 4, 7, 10, 13, 16, 19, 22, 25}); + CheckStateStorage(GenerateDCStateStorage(3, 3, 3, {}, {}, {}, 9, 1, 3), 3, {1, 10, 19}); + CheckStateStorage2(GenerateDCStateStorage(3, 3, 3, {}, {}, {}, 9, 2, 3), "{ RingGroups { NToSelect: 3 Ring { Node: 1 Node: 2 } Ring { Node: 10 Node: 11 } Ring { Node: 19 Node: 20 } } }"); + CheckStateStorage2(GenerateDCStateStorage(3, 3, 3, {}, {}, {}, 9, 3, 3), "{ RingGroups { NToSelect: 3 Ring { Node: 1 Node: 2 Node: 3 } Ring { Node: 10 Node: 11 Node: 12 } Ring { Node: 19 Node: 20 Node: 21 } } }"); + CheckStateStorage2(GenerateDCStateStorage(3, 3, 3, {}, {}, {}, 9, 2, 1),"{ RingGroups { NToSelect: 9 " + "Ring { Node: 1 Node: 2 } Ring { Node: 4 Node: 5 } Ring { Node: 7 Node: 8 } " + "Ring { Node: 10 Node: 11 } Ring { Node: 13 Node: 14 } Ring { Node: 16 Node: 17 } " + "Ring { Node: 19 Node: 20 } Ring { Node: 22 Node: 23 } Ring { Node: 25 Node: 26 } } }"); + } } } } diff --git a/ydb/core/cms/config.h b/ydb/core/cms/config.h index 3cfa9825e4e2..2d521a2030e5 100644 --- a/ydb/core/cms/config.h +++ b/ydb/core/cms/config.h @@ -21,6 +21,8 @@ struct TCmsSentinelConfig { TDuration WaitForConfigStep; TDuration RelaxTime; bool PileupReplicas; + ui32 OverrideReplicasInRingCount; + ui32 OverrideRingsCount; void Serialize(NKikimrCms::TCmsConfig::TSentinelConfig::TStateStorageSelfHealConfig &config) const { config.SetEnable(Enable); @@ -30,6 +32,8 @@ struct TCmsSentinelConfig { config.SetWaitForConfigStep(WaitForConfigStep.GetValue()); config.SetRelaxTime(RelaxTime.GetValue()); config.SetPileupReplicas(PileupReplicas); + config.SetOverrideReplicasInRingCount(OverrideReplicasInRingCount); + config.SetOverrideRingsCount(OverrideRingsCount); } void Deserialize(const NKikimrCms::TCmsConfig::TSentinelConfig::TStateStorageSelfHealConfig &config) { @@ -40,6 +44,8 @@ struct TCmsSentinelConfig { WaitForConfigStep = TDuration::MicroSeconds(config.GetWaitForConfigStep()); RelaxTime = TDuration::MicroSeconds(config.GetRelaxTime()); PileupReplicas = config.GetPileupReplicas(); + OverrideReplicasInRingCount = config.GetOverrideReplicasInRingCount(); + OverrideRingsCount = config.GetOverrideRingsCount(); } }; diff --git a/ydb/core/cms/sentinel.cpp b/ydb/core/cms/sentinel.cpp index 1e42871ebafd..02a48b51566a 100644 --- a/ydb/core/cms/sentinel.cpp +++ b/ydb/core/cms/sentinel.cpp @@ -1151,6 +1151,8 @@ class TSentinel: public TActorBootstrapped { updateRequest->SetWaitForConfigStep(Config.StateStorageSelfHealConfig.WaitForConfigStep.GetValue() / 1000000); // milliseconds -> seconds updateRequest->SetEnableSelfHealStateStorage(Config.StateStorageSelfHealConfig.Enable); updateRequest->SetPileupReplicas(Config.StateStorageSelfHealConfig.PileupReplicas); + updateRequest->SetOverrideReplicasInRingCount(Config.StateStorageSelfHealConfig.OverrideReplicasInRingCount); + updateRequest->SetOverrideRingsCount(Config.StateStorageSelfHealConfig.OverrideRingsCount); for (auto& [nodeId, node] : SentinelState->Nodes) { SentinelState->NeedSelfHealStateStorage |= node.Compute(); auto* nodeState = updateRequest->AddNodesState(); diff --git a/ydb/core/protos/blobstorage_distributed_config.proto b/ydb/core/protos/blobstorage_distributed_config.proto index 5748a2c916a7..296a57c76009 100644 --- a/ydb/core/protos/blobstorage_distributed_config.proto +++ b/ydb/core/protos/blobstorage_distributed_config.proto @@ -225,12 +225,16 @@ message TEvNodeConfigInvokeOnRoot { optional bool Recommended = 1; optional bool PileupReplicas = 2; optional bool NodesState = 3; + optional uint32 OverrideReplicasInRingCount = 4; + optional uint32 OverrideRingsCount = 5; } message TSelfHealStateStorage { optional uint32 WaitForConfigStep = 1; optional bool ForceHeal = 2; optional bool PileupReplicas = 3; + optional uint32 OverrideReplicasInRingCount = 4; + optional uint32 OverrideRingsCount = 5; } message TSelfHealNodesStateUpdate { @@ -242,6 +246,8 @@ message TEvNodeConfigInvokeOnRoot { optional uint32 WaitForConfigStep = 2; repeated TNodeState NodesState = 3; optional bool PileupReplicas = 4; + optional uint32 OverrideReplicasInRingCount = 5; + optional uint32 OverrideRingsCount = 6; } message TAdvanceGeneration diff --git a/ydb/core/protos/cms.proto b/ydb/core/protos/cms.proto index 17caf4b7872b..78613b0839cd 100644 --- a/ydb/core/protos/cms.proto +++ b/ydb/core/protos/cms.proto @@ -439,6 +439,8 @@ message TCmsConfig { optional uint32 WaitForConfigStep = 5 [default = 60000000]; optional uint32 RelaxTime = 6 [default = 600000000]; optional bool PileupReplicas = 7 [default = false]; + optional uint32 OverrideReplicasInRingCount = 8 [default = 0]; + optional uint32 OverrideRingsCount = 9 [default = 0]; } message TStateLimit { diff --git a/ydb/tests/functional/config/test_distconf_sentinel_node_status.py b/ydb/tests/functional/config/test_distconf_sentinel_node_status.py index 97e5e8e12870..f5836b5d11d5 100644 --- a/ydb/tests/functional/config/test_distconf_sentinel_node_status.py +++ b/ydb/tests/functional/config/test_distconf_sentinel_node_status.py @@ -37,6 +37,8 @@ class KiKiMRDistConfNodeStatusTest(object): pileup_replicas = False state_storage_rings = None n_to_select = None + override_rings_count = 0 + override_replicas_in_ring_count = 0 metadata_section = { "kind": "MainConfig", "version": 0, @@ -53,7 +55,9 @@ def setup_class(cls): "node_bad_state_limit": 3, "wait_for_config_step": 1000000, "relax_time": 10000000, - "pileup_replicas": cls.pileup_replicas + "pileup_replicas": cls.pileup_replicas, + "override_rings_count": cls.override_rings_count, + "override_replicas_in_ring_count": cls.override_replicas_in_ring_count }, "default_state_limit": 2, "update_config_interval": 2000000, @@ -246,3 +250,17 @@ def do_test(self, configName): assert_eq(rgSS["Ring"], rgSSB2["Ring"]) assert_eq(rgSS["Ring"], rgSB2["Ring"]) assert_ne(rgSSB["Ring"], rgSSB2["Ring"]) + + +class TestKiKiMRDistConfSelfHealOverrides(KiKiMRDistConfNodeStatusTest): + erasure = Erasure.MIRROR_3_DC + nodes_count = 12 + override_replicas_in_ring_count = 2 + override_rings_count = 3 + + def do_test(self, configName): + time.sleep(25) + rg2 = get_ring_group(self.do_request_config(), configName) + assert_eq(rg2["NToSelect"], 3) + assert_eq(len(rg2["Ring"]), 3) + assert_eq(len(rg2["Ring"][0]["Node"]), 2)