Skip to content

Commit

Permalink
Address PR comments
Browse files Browse the repository at this point in the history
Signed-off-by: Eduardo J. Ortega U <[email protected]>
  • Loading branch information
ejortegau committed Nov 4, 2024
1 parent 92a11e0 commit 6db5006
Show file tree
Hide file tree
Showing 5 changed files with 89 additions and 79 deletions.
21 changes: 7 additions & 14 deletions go/vt/vtctl/reparentutil/emergency_reparenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
// Here we also check for split brain scenarios and check that the selected replica must be more advanced than all the other valid candidates.
// We fail in case there is a split brain detected.
// The validCandidateTablets list is sorted by the replication positions with ties broken by promotion rules.
intermediateSource, validCandidateTablets, err = erp.findMostAdvanced(validCandidates, tabletMap, opts)
intermediateSource, validCandidateTablets, err = erp.findMostAdvanced(validCandidates, tabletMap, stoppedReplicationSnapshot.backingUpTablets, opts)
if err != nil {
return err
}
Expand All @@ -258,7 +258,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
// 2. Remove the tablets with the Must_not promote rule
// 3. Remove cross-cell tablets if PreventCrossCellPromotion is specified
// Our final primary candidate MUST belong to this list of valid candidates
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, stoppedReplicationSnapshot.backingUpTablets, prevPrimary, opts)
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, prevPrimary, opts)
if err != nil {
return err
}
Expand Down Expand Up @@ -397,6 +397,7 @@ func (erp *EmergencyReparenter) waitForAllRelayLogsToApply(
func (erp *EmergencyReparenter) findMostAdvanced(
validCandidates map[string]replication.Position,
tabletMap map[string]*topo.TabletInfo,
backingUpTablets map[string]bool,
opts EmergencyReparentOptions,
) (*topodatapb.Tablet, []*topodatapb.Tablet, error) {
erp.logger.Infof("started finding the intermediate source")
Expand All @@ -407,7 +408,7 @@ func (erp *EmergencyReparenter) findMostAdvanced(
}

// sort the tablets for finding the best intermediate source in ERS
err = sortTabletsForReparent(validTablets, tabletPositions, nil, opts.durability)
err = sortTabletsForReparent(validTablets, tabletPositions, nil, backingUpTablets, opts.durability)
if err != nil {
return nil, nil, err
}
Expand Down Expand Up @@ -738,9 +739,8 @@ func (erp *EmergencyReparenter) identifyPrimaryCandidate(
}

// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any constraint failures and can make forward progress on being promoted
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackingUp map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
var restrictedValidTablets []*topodatapb.Tablet
var notPreferredValidTablets []*topodatapb.Tablet
for _, tablet := range validTablets {
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
// Remove tablets which have MustNot promote rule since they must never be promoted
Expand All @@ -767,16 +767,9 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
}
continue
}
// Put candidates that are running a backup in a separate list
backingUp, ok := tabletsBackingUp[tabletAliasStr]
if ok && backingUp {
erp.logger.Infof("Setting %s in list of valid candidates taking a backup", tabletAliasStr)
notPreferredValidTablets = append(notPreferredValidTablets, tablet)
} else {
restrictedValidTablets = append(restrictedValidTablets, tablet)
}
restrictedValidTablets = append(restrictedValidTablets, tablet)
}
return append(restrictedValidTablets, notPreferredValidTablets...), nil
return restrictedValidTablets, nil
}

// findErrantGTIDs tries to find errant GTIDs for the valid candidates and returns the updated list of valid candidates.
Expand Down
42 changes: 25 additions & 17 deletions go/vt/vtctl/reparentutil/emergency_reparenter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2813,6 +2813,7 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) {
name string
validCandidates map[string]replication.Position
tabletMap map[string]*topo.TabletInfo
backingUpTablets map[string]bool
emergencyReparentOps EmergencyReparentOptions
result *topodatapb.Tablet
err string
Expand Down Expand Up @@ -2859,6 +2860,7 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
result: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Expand Down Expand Up @@ -2909,6 +2911,7 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
result: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Expand Down Expand Up @@ -2963,6 +2966,7 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
result: &topodatapb.Tablet{
Alias: &topodatapb.TabletAlias{
Cell: "zone1",
Expand Down Expand Up @@ -3017,7 +3021,8 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) {
},
},
},
err: "split brain detected between servers",
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
err: "split brain detected between servers",
},
}

Expand All @@ -3028,7 +3033,7 @@ func TestEmergencyReparenter_findMostAdvanced(t *testing.T) {
erp := NewEmergencyReparenter(nil, nil, logutil.NewMemoryLogger())

test.emergencyReparentOps.durability = durability
winningTablet, _, err := erp.findMostAdvanced(test.validCandidates, test.tabletMap, test.emergencyReparentOps)
winningTablet, _, err := erp.findMostAdvanced(test.validCandidates, test.tabletMap, test.backingUpTablets, test.emergencyReparentOps)
if test.err != "" {
assert.Error(t, err)
assert.Contains(t, err.Error(), test.err)
Expand Down Expand Up @@ -4468,11 +4473,6 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
}
replicaBackingUp := map[string]bool{
topoproto.TabletAliasString(primaryTablet.Alias): false, topoproto.TabletAliasString(replicaTablet.Alias): true,
topoproto.TabletAliasString(rdonlyTablet.Alias): false, topoproto.TabletAliasString(replicaCrossCellTablet.Alias): false,
topoproto.TabletAliasString(rdonlyCrossCellTablet.Alias): false,
}
tests := []struct {
name string
durability string
Expand All @@ -4491,13 +4491,6 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
tabletsReachable: allTablets,
tabletsBackingUp: noTabletsBackingUp,
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaTablet, replicaCrossCellTablet},
}, {
name: "host backing up must be last in the list",
durability: "none",
validTablets: allTablets,
tabletsReachable: allTablets,
tabletsBackingUp: replicaBackingUp,
filteredTablets: []*topodatapb.Tablet{primaryTablet, replicaCrossCellTablet, replicaTablet},
}, {
name: "filter cross cell",
durability: "none",
Expand Down Expand Up @@ -4575,7 +4568,7 @@ func TestEmergencyReparenter_filterValidCandidates(t *testing.T) {
tt.opts.durability = durability
logger := logutil.NewMemoryLogger()
erp := NewEmergencyReparenter(nil, nil, logger)
tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.tabletsBackingUp, tt.prevPrimary, tt.opts)
tabletList, err := erp.filterValidCandidates(tt.validTablets, tt.tabletsReachable, tt.prevPrimary, tt.opts)
if tt.errShouldContain != "" {
require.Error(t, err)
require.Contains(t, err.Error(), tt.errShouldContain)
Expand Down Expand Up @@ -4620,6 +4613,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
statusMap map[string]*replicationdatapb.StopReplicationStatus
primaryStatusMap map[string]*replicationdatapb.PrimaryStatus
tabletMap map[string]*topo.TabletInfo
backingUpTablets map[string]bool
wantedCandidates []string
wantMostAdvancedPossible []string
wantErr string
Expand Down Expand Up @@ -4685,6 +4679,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000102", "zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000102"},
},
Expand Down Expand Up @@ -4749,6 +4744,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000102", "zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000102"},
},
Expand Down Expand Up @@ -4813,6 +4809,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000102", "zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000104"},
},
Expand Down Expand Up @@ -4877,6 +4874,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000102", "zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000102"},
},
Expand Down Expand Up @@ -4941,6 +4939,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000102", "zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000102", "zone1-0000000103"},
},
Expand Down Expand Up @@ -5004,6 +5003,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
Position: getRelayLogPosition("", "1-31", "1-50"),
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000103"},
},
Expand Down Expand Up @@ -5068,6 +5068,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000102", "zone1-0000000103"},
wantMostAdvancedPossible: []string{"zone1-0000000103"},
},
Expand Down Expand Up @@ -5131,6 +5132,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
Position: getRelayLogPosition("", "1-31", "1-50"),
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000103"},
wantMostAdvancedPossible: []string{"zone1-0000000103"},
},
Expand Down Expand Up @@ -5195,6 +5197,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000103", "zone1-0000000104"},
},
Expand Down Expand Up @@ -5259,6 +5262,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000103", "zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000103"},
},
Expand Down Expand Up @@ -5306,6 +5310,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000103"},
wantMostAdvancedPossible: []string{"zone1-0000000103"},
},
Expand Down Expand Up @@ -5370,6 +5375,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
},
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000104"},
wantMostAdvancedPossible: []string{"zone1-0000000104"},
},
Expand Down Expand Up @@ -5433,6 +5439,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
Position: getRelayLogPosition("", "1-31", "1-50"),
},
},
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantedCandidates: []string{"zone1-0000000103"},
wantMostAdvancedPossible: []string{"zone1-0000000103"},
},
Expand Down Expand Up @@ -5492,7 +5499,8 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
Position: getRelayLogPosition("", "1-31", "1-50"),
},
},
wantErr: "could not read reparent journal information",
backingUpTablets: map[string]bool{"zone1-0000000100": false, "zone1-0000000101": false, "zone1-0000000102": false},
wantErr: "could not read reparent journal information",
},
}
for _, tt := range tests {
Expand All @@ -5519,7 +5527,7 @@ func TestEmergencyReparenterFindErrantGTIDs(t *testing.T) {
dp, err := GetDurabilityPolicy("semi_sync")
require.NoError(t, err)
ers := EmergencyReparenter{logger: logutil.NewCallbackLogger(func(*logutilpb.Event) {})}
winningPrimary, _, err := ers.findMostAdvanced(candidates, tt.tabletMap, EmergencyReparentOptions{durability: dp})
winningPrimary, _, err := ers.findMostAdvanced(candidates, tt.tabletMap, tt.backingUpTablets, EmergencyReparentOptions{durability: dp})
require.NoError(t, err)
require.True(t, slices.Contains(tt.wantMostAdvancedPossible, winningPrimary.Hostname), winningPrimary.Hostname)
})
Expand Down
17 changes: 14 additions & 3 deletions go/vt/vtctl/reparentutil/reparent_sorter.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package reparentutil

import (
"sort"
"vitess.io/vitess/go/vt/topo/topoproto"

"vitess.io/vitess/go/mysql/replication"
"vitess.io/vitess/go/vt/vterrors"
Expand All @@ -32,16 +33,18 @@ type reparentSorter struct {
tablets []*topodatapb.Tablet
positions []replication.Position
innodbBufferPool []int
backingUp map[string]bool
durability Durabler
}

// newReparentSorter creates a new reparentSorter
func newReparentSorter(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, durability Durabler) *reparentSorter {
func newReparentSorter(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, backingUp map[string]bool, durability Durabler) *reparentSorter {
return &reparentSorter{
tablets: tablets,
positions: positions,
durability: durability,
innodbBufferPool: innodbBufferPool,
backingUp: backingUp,
}
}

Expand Down Expand Up @@ -71,6 +74,14 @@ func (rs *reparentSorter) Less(i, j int) bool {
return true
}

// We want tablets backing up always at the end of the list, so that's the first thing we check
if !rs.backingUp[topoproto.TabletAliasString(rs.tablets[i].Alias)] && rs.backingUp[topoproto.TabletAliasString(rs.tablets[j].Alias)] {
return true
}
if rs.backingUp[topoproto.TabletAliasString(rs.tablets[i].Alias)] && !rs.backingUp[topoproto.TabletAliasString(rs.tablets[j].Alias)] {
return false
}

if !rs.positions[i].AtLeast(rs.positions[j]) {
// [i] does not have all GTIDs that [j] does
return false
Expand Down Expand Up @@ -100,13 +111,13 @@ func (rs *reparentSorter) Less(i, j int) bool {

// sortTabletsForReparent sorts the tablets, given their positions for emergency reparent shard and planned reparent shard.
// Tablets are sorted first by their replication positions, with ties broken by the promotion rules.
func sortTabletsForReparent(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, durability Durabler) error {
func sortTabletsForReparent(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, backingUp map[string]bool, durability Durabler) error {
// throw an error internal error in case of unequal number of tablets and positions
// fail-safe code prevents panic in sorting in case the lengths are unequal
if len(tablets) != len(positions) {
return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unequal number of tablets and positions")
}

sort.Sort(newReparentSorter(tablets, positions, innodbBufferPool, durability))
sort.Sort(newReparentSorter(tablets, positions, innodbBufferPool, backingUp, durability))
return nil
}
Loading

0 comments on commit 6db5006

Please sign in to comment.