Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Release-7.3] Do not select a machine to build team if each SS of the machine has too many ServerTeams #11666

Draft
wants to merge 5 commits into
base: release-7.3
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fdbclient/ServerKnobs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ void ServerKnobs::initialize(Randomize randomize, ClientKnobs* clientKnobs, IsSi
init( REBALANCE_STORAGE_QUEUE_SHARD_PER_KSEC_MIN, SHARD_MIN_BYTES_PER_KSEC);
init( DD_ENABLE_REBALANCE_STORAGE_QUEUE_WITH_LIGHT_WRITE_SHARD, true ); if ( isSimulated ) DD_ENABLE_REBALANCE_STORAGE_QUEUE_WITH_LIGHT_WRITE_SHARD = deterministicRandom()->coinflip();
init( DD_WAIT_TSS_DATA_MOVE_DELAY, 15.0 ); if (isSimulated) DD_WAIT_TSS_DATA_MOVE_DELAY = deterministicRandom()->randomInt(5, 30);
init( BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM, 0 ); if (isSimulated) BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM = deterministicRandom()->randomInt(0, 10);

// Large teams are disabled when SHARD_ENCODE_LOCATION_METADATA is enabled
init( DD_MAX_SHARDS_ON_LARGE_TEAMS, 100 ); if( randomize && BUGGIFY ) DD_MAX_SHARDS_ON_LARGE_TEAMS = deterministicRandom()->randomInt(0, 3);
Expand Down
2 changes: 2 additions & 0 deletions fdbclient/include/fdbclient/ServerKnobs.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,8 @@ class ServerKnobs : public KnobsImpl<ServerKnobs> {
bool DD_ENABLE_REBALANCE_STORAGE_QUEUE_WITH_LIGHT_WRITE_SHARD; // Enable to allow storage queue rebalancer to move
// light-traffic shards out of the overloading server
double DD_WAIT_TSS_DATA_MOVE_DELAY;
int BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM; // Enable to check if server context when building teams in the
// first specified attempts

// TeamRemover to remove redundant teams
bool TR_FLAG_DISABLE_MACHINE_TEAM_REMOVER; // disable the machineTeamRemover actor
Expand Down
100 changes: 78 additions & 22 deletions fdbserver/DDTeamCollection.actor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "fdbserver/DDTeamCollection.h"
#include "fdbserver/ExclusionTracker.actor.h"
#include "fdbserver/DataDistributionTeam.h"
#include "fdbserver/Knobs.h"
#include "flow/Error.h"
#include "flow/IRandom.h"
#include "flow/Trace.h"
#include "flow/network.h"
Expand Down Expand Up @@ -1226,8 +1228,7 @@ class DDTeamCollectionImpl {
state bool hasWrongDC = !self->isCorrectDC(*server);
state bool hasInvalidLocality =
!self->isValidLocality(self->configuration.storagePolicy, server->getLastKnownInterface().locality);
state int targetTeamNumPerServer =
(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (self->configuration.storageTeamSize + 1)) / 2;
state int targetTeamNumPerServer = self->getTargetTeamNumPerServer();
state Future<Void> storageMetadataTracker = self->updateStorageMetadata(server);
try {
loop {
Expand Down Expand Up @@ -5080,13 +5081,53 @@ Reference<TCServerInfo> DDTeamCollection::findOneLeastUsedServer() const {
}
}

Reference<TCMachineTeamInfo> DDTeamCollection::findOneRandomMachineTeam(TCServerInfo const& chosenServer) const {
int DDTeamCollection::getTargetTeamNumPerServer() const {
// The numTeamsPerServerFactor is calculated as
// (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER + ideal_num_of_teams_per_server) / 2
// ideal_num_of_teams_per_server is (#teams * storageTeamSize) / #servers, which is
// (#servers * DESIRED_TEAMS_PER_SERVER * storageTeamSize) / #servers.
return (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2;
}

bool DDTeamCollection::isServerAvailableToBuildMoreServerTeam(const TCServerInfo& server) const {
ASSERT(SERVER_KNOBS->BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM > 0);
// server is available if serverTeams is less than the targetTeamNumPerServer and the server is healthy
return server.getTeams().size() < getTargetTeamNumPerServer() && !server_status.get(server.getId()).isUnhealthy();
}

bool DDTeamCollection::isMachineAvailableToBuildMoreServerTeam(const TCMachineInfo& machine) const {
ASSERT(SERVER_KNOBS->BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM > 0);
for (const auto& server : machine.serversOnMachine) {
if (isServerAvailableToBuildMoreServerTeam(*server)) {
return true; // Machine is available if any server is available
}
}
return false;
}

bool DDTeamCollection::isMachineTeamAvailableToBuildMoreServerTeam(const TCMachineTeamInfo& machineTeam) const {
// This checking takes effects only if BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM is set > 0
ASSERT(SERVER_KNOBS->BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM > 0);
for (const auto& machine : machineTeam.getMachines()) {
if (!isMachineAvailableToBuildMoreServerTeam(*machine)) {
return false; // Machine team is unavailable if any machine is not available
}
}
return true;
}

Reference<TCMachineTeamInfo> DDTeamCollection::findOneRandomMachineTeam(TCServerInfo const& chosenServer,
bool considerContext) const {
if (!chosenServer.machine->machineTeams.empty()) {
std::vector<Reference<TCMachineTeamInfo>> healthyMachineTeamsForChosenServer;
for (auto& mt : chosenServer.machine->machineTeams) {
if (isMachineTeamHealthy(*mt)) {
healthyMachineTeamsForChosenServer.push_back(mt);
if (!isMachineTeamHealthy(*mt)) {
continue;
}
if (considerContext && !isMachineTeamAvailableToBuildMoreServerTeam(*mt)) {
continue;
}
healthyMachineTeamsForChosenServer.push_back(mt);
}
if (!healthyMachineTeamsForChosenServer.empty()) {
return deterministicRandom()->randomChoice(healthyMachineTeamsForChosenServer);
Expand Down Expand Up @@ -5217,8 +5258,7 @@ std::pair<Reference<TCMachineTeamInfo>, int> DDTeamCollection::getMachineTeamWit
std::pair<Reference<TCMachineTeamInfo>, int> DDTeamCollection::getMachineTeamWithMostMachineTeams() const {
Reference<TCMachineTeamInfo> retMT;
int maxNumMachineTeams = 0;
int targetMachineTeamNumPerMachine =
(SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2;
int targetMachineTeamNumPerMachine = getTargetTeamNumPerServer();

for (auto& mt : machineTeams) {
// The representative team number for the machine team mt is
Expand All @@ -5240,7 +5280,7 @@ std::pair<Reference<TCMachineTeamInfo>, int> DDTeamCollection::getMachineTeamWit
std::pair<Reference<TCTeamInfo>, int> DDTeamCollection::getServerTeamWithMostProcessTeams() const {
Reference<TCTeamInfo> retST;
int maxNumProcessTeams = 0;
int targetTeamNumPerServer = (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2;
int targetTeamNumPerServer = getTargetTeamNumPerServer();

for (auto& t : teams) {
// The minimum number of teams of a server in a team is the representative team number for the team t
Expand Down Expand Up @@ -5275,10 +5315,9 @@ int DDTeamCollection::getHealthyMachineTeamCount() const {
bool DDTeamCollection::notEnoughMachineTeamsForAMachine() const {
// If we want to remove the machine team with most machine teams, we use the same logic as
// notEnoughTeamsForAServer
int targetMachineTeamNumPerMachine =
SERVER_KNOBS->TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS
? (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2
: SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER;
int targetMachineTeamNumPerMachine = SERVER_KNOBS->TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS
? getTargetTeamNumPerServer()
: SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER;
for (auto& [_, machine] : machine_info) {
// If SERVER_KNOBS->TR_FLAG_REMOVE_MT_WITH_MOST_TEAMS is false,
// The desired machine team number is not the same with the desired server team number
Expand All @@ -5296,11 +5335,7 @@ bool DDTeamCollection::notEnoughTeamsForAServer() const {
// We build more teams than we finally want so that we can use serverTeamRemover() actor to remove the teams
// whose member belong to too many teams. This allows us to get a more balanced number of teams per server.
// We want to ensure every server has targetTeamNumPerServer teams.
// The numTeamsPerServerFactor is calculated as
// (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER + ideal_num_of_teams_per_server) / 2
// ideal_num_of_teams_per_server is (#teams * storageTeamSize) / #servers, which is
// (#servers * DESIRED_TEAMS_PER_SERVER * storageTeamSize) / #servers.
int targetTeamNumPerServer = (SERVER_KNOBS->DESIRED_TEAMS_PER_SERVER * (configuration.storageTeamSize + 1)) / 2;
int targetTeamNumPerServer = getTargetTeamNumPerServer();
ASSERT_GT(targetTeamNumPerServer, 0);
for (auto& [serverID, server] : server_info) {
if (server->getTeams().size() < targetTeamNumPerServer && !server_status.get(serverID).isUnhealthy()) {
Expand Down Expand Up @@ -5370,6 +5405,8 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
int bestScore = std::numeric_limits<int>::max();
int maxAttempts = SERVER_KNOBS->BEST_OF_AMT; // BEST_OF_AMT = 4
bool earlyQuitBuild = false;
int considerContextBudget = SERVER_KNOBS->BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM;
// When the knob is on, the first considerContextBudget attempts check if server's teams exceed targetTeamServer
for (int i = 0; i < maxAttempts && i < 100; ++i) {
// Step 1: Choose 1 least used server and then choose 1 least used machine team from the server
Reference<TCServerInfo> chosenServer = findOneLeastUsedServer();
Expand All @@ -5382,20 +5419,29 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
// instead of choosing the least used machine team.
// The correlation happens, for example, when we add two new machines, we may always choose the machine
// team with these two new machines because they are typically less used.
Reference<TCMachineTeamInfo> chosenMachineTeam = findOneRandomMachineTeam(*chosenServer);
Reference<TCMachineTeamInfo> chosenMachineTeam =
findOneRandomMachineTeam(*chosenServer, considerContextBudget > 0);

if (!chosenMachineTeam.isValid()) {
// We may face the situation that temporarily we have no healthy machine.
TraceEvent(SevWarn, "MachineTeamNotFound")
.detail("Primary", primary)
.detail("MachineTeams", machineTeams.size());
if (considerContextBudget > 0) {
// If we are not able to get any machine team With considerContextBudget,
// we disable considerContextBudget and increase maxAttempts (as if we did not
// considerContextBudget) and we retry
considerContextBudget = 0;
maxAttempts++;
}
continue; // try randomly to find another least used server
}

// From here, chosenMachineTeam must have a healthy server team
// Step 2: Randomly pick 1 server from each machine in the chosen machine team to form a server team
std::vector<UID> serverTeam;
int chosenServerCount = 0;
Optional<Reference<TCMachineInfo>> unavailableMachine;
for (auto& machine : chosenMachineTeam->getMachines()) {
UID serverID;
if (machine == chosenServer->machine) {
Expand All @@ -5404,13 +5450,22 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
serverID = chosenServer->getId();
++chosenServerCount;
} else {
std::vector<Reference<TCServerInfo>> healthyProcesses;
std::vector<Reference<TCServerInfo>> candidateProcesses;
for (auto it : machine->serversOnMachine) {
if (!server_status.get(it->getId()).isUnhealthy()) {
healthyProcesses.push_back(it);
if (server_status.get(it->getId()).isUnhealthy()) {
continue;
}
if (considerContextBudget > 0) {
// When BUDGET_CHECK_SERVER_CONTEXT_WHEN_BUILD_TEAM is set > 0,
// we build team on a server which does not have teams more than the target count.
if (!isServerAvailableToBuildMoreServerTeam(*it)) {
continue;
}
}
candidateProcesses.push_back(it);
}
serverID = deterministicRandom()->randomChoice(healthyProcesses)->getId();
ASSERT_WE_THINK(candidateProcesses.size() > 0);
serverID = deterministicRandom()->randomChoice(candidateProcesses)->getId();
}
serverTeam.push_back(serverID);
}
Expand All @@ -5422,6 +5477,7 @@ int DDTeamCollection::addTeamsBestOf(int teamsToBuild, int desiredTeams, int max
int overlap = overlappingMembers(serverTeam);
if (overlap == serverTeam.size()) {
maxAttempts += 1;
considerContextBudget--;
continue;
}

Expand Down
15 changes: 14 additions & 1 deletion fdbserver/include/fdbserver/DDTeamCollection.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {

// Randomly choose one machine team that has chosenServer and has the correct size
// When configuration is changed, we may have machine teams with old storageTeamSize
Reference<TCMachineTeamInfo> findOneRandomMachineTeam(TCServerInfo const& chosenServer) const;
Reference<TCMachineTeamInfo> findOneRandomMachineTeam(TCServerInfo const& chosenServer, bool considerContext) const;

// Returns a server team from given "servers", empty team if not found.
// When "wantHealthy" is true, only return if the team is healthy.
Expand Down Expand Up @@ -389,10 +389,23 @@ class DDTeamCollection : public ReferenceCounted<DDTeamCollection> {
// Return the healthy server with the least number of correct-size server teams
Reference<TCServerInfo> findOneLeastUsedServer() const;

// Return ture if the input server has server team size smaller than the target server teams
bool isServerAvailableToBuildMoreServerTeam(const TCServerInfo& server) const;

// Return true if any server on the input machine has server team size smaller than the target server teams
bool isMachineAvailableToBuildMoreServerTeam(const TCMachineInfo& machine) const;

// Return true if each machine of the input machineTeam has at least one server that has server team size is
// smaller than the target server teams, i,e, still has room to add more serverTeam
bool isMachineTeamAvailableToBuildMoreServerTeam(const TCMachineTeamInfo& machineTeam) const;

// A server team should always come from servers on a machine team
// Check if it is true
bool isOnSameMachineTeam(TCTeamInfo const& team) const;

// Return the targetTeamNumPerServer. For the calculation, see comments of the implementation
int getTargetTeamNumPerServer() const;

int calculateHealthyServerCount() const;

int calculateHealthyMachineCount() const;
Expand Down