Skip to content

Commit 546e38c

Browse files
committed
Auto sync members in v3store is IsLearner differs between v2 and v3 store
Signed-off-by: Benjamin Wang <benjamin.ahrtr@gmail.com>
1 parent abaa1f0 commit 546e38c

File tree

10 files changed

+464
-16
lines changed

10 files changed

+464
-16
lines changed

server/etcdserver/api/membership/cluster.go

Lines changed: 57 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -524,26 +524,72 @@ func (c *RaftCluster) PromoteMember(id types.ID, shouldApplyV3 ShouldApplyV3) {
524524
isLearner.Set(0)
525525
}
526526

527-
if c.be != nil && shouldApplyV3 {
528-
c.members[id].RaftAttributes.IsLearner = false
529-
c.updateMembershipMetric(id, true)
530-
c.be.MustSaveMemberToBackend(c.members[id])
527+
if c.be != nil {
528+
m := c.members[id]
529+
if shouldApplyV3 {
530+
m.RaftAttributes.IsLearner = false
531+
c.updateMembershipMetric(id, true)
532+
c.be.MustSaveMemberToBackend(m)
531533

532-
c.lg.Info(
533-
"promote member",
534-
zap.String("cluster-id", c.cid.String()),
535-
zap.String("local-member-id", c.localID.String()),
536-
zap.String("promoted-member-id", id.String()),
537-
)
534+
c.lg.Info(
535+
"promote member",
536+
zap.String("cluster-id", c.cid.String()),
537+
zap.String("local-member-id", c.localID.String()),
538+
zap.String("promoted-member-id", id.String()),
539+
)
540+
} else {
541+
// Workaround the issues which have already been affected by
542+
// https://github.com/etcd-io/etcd/issues/19557. The learner
543+
// promotion request had been applied to v3store, but not saved
544+
// to v2snapshot yet when in 3.5. Once upgrading to 3.6, the
545+
// patch here ensure the issue can be automatically fixed.
546+
if m.IsLearner {
547+
m.RaftAttributes.IsLearner = false
548+
c.lg.Info("Forcibly apply member promotion request", zap.String("member", fmt.Sprintf("%+v", *m)))
549+
c.be.MustSaveMemberToBackend(m)
550+
} else {
551+
c.lg.Info(
552+
"ignore already promoted member",
553+
zap.String("cluster-id", c.cid.String()),
554+
zap.String("local-member-id", c.localID.String()),
555+
)
556+
}
557+
}
538558
} else {
539559
c.lg.Info(
540-
"ignore already promoted member",
560+
"ignore already promoted member due to backend being nil",
541561
zap.String("cluster-id", c.cid.String()),
542562
zap.String("local-member-id", c.localID.String()),
543563
)
544564
}
545565
}
546566

567+
// SyncLearnerPromotionIfNeeded provides a workaround solution to fix the issues
568+
// which have already been affected by https://github.com/etcd-io/etcd/issues/19557.
569+
func (c *RaftCluster) SyncLearnerPromotionIfNeeded() {
570+
c.Lock()
571+
defer c.Unlock()
572+
573+
v2Members, _ := membersFromStore(c.lg, c.v2store)
574+
v3Members, _ := c.be.MustReadMembersFromBackend()
575+
576+
for id, v3Member := range v3Members {
577+
v2Member, ok := v2Members[id]
578+
if !ok {
579+
// This isn't an error. The conf change on the member hasn't been saved to the v2 snapshot yet.
580+
c.lg.Info("Detected member only in v3store but missing in v2store", zap.String("member", fmt.Sprintf("%+v", *v3Member)))
581+
continue
582+
}
583+
584+
if !v2Member.IsLearner && v3Member.IsLearner {
585+
syncedV3Member := v3Member.Clone()
586+
syncedV3Member.IsLearner = false
587+
c.lg.Warn("Syncing member in v3store", zap.String("member", fmt.Sprintf("%+v", *syncedV3Member)))
588+
c.be.MustHackySaveMemberToBackend(syncedV3Member)
589+
}
590+
}
591+
}
592+
547593
func (c *RaftCluster) UpdateRaftAttributes(id types.ID, raftAttr RaftAttributes, shouldApplyV3 ShouldApplyV3) {
548594
c.Lock()
549595
defer c.Unlock()

server/etcdserver/api/membership/cluster_test.go

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"testing"
2424

2525
"github.com/stretchr/testify/assert"
26+
"github.com/stretchr/testify/require"
2627
"go.uber.org/zap/zaptest"
2728

2829
"go.etcd.io/etcd/client/pkg/v3/testutil"
@@ -1048,3 +1049,128 @@ func TestClusterStore(t *testing.T) {
10481049
})
10491050
}
10501051
}
1052+
1053+
func TestSyncLearnerPromotion(t *testing.T) {
1054+
tcs := []struct {
1055+
name string
1056+
1057+
storeV2Members []*Member
1058+
backendMembers []*Member
1059+
1060+
expectV3Members map[types.ID]*Member
1061+
}{
1062+
{
1063+
name: "v3store should keep unchanged if the member doesn't exist in v2store",
1064+
storeV2Members: []*Member{
1065+
{
1066+
ID: 100,
1067+
RaftAttributes: RaftAttributes{
1068+
PeerURLs: []string{"http://10.0.0.10:2380"},
1069+
IsLearner: false,
1070+
},
1071+
},
1072+
},
1073+
backendMembers: []*Member{
1074+
{
1075+
ID: 200,
1076+
RaftAttributes: RaftAttributes{
1077+
PeerURLs: []string{"http://10.0.0.9:2380"},
1078+
IsLearner: true,
1079+
},
1080+
},
1081+
},
1082+
expectV3Members: map[types.ID]*Member{
1083+
200: {
1084+
ID: 200,
1085+
RaftAttributes: RaftAttributes{
1086+
PeerURLs: []string{"http://10.0.0.9:2380"},
1087+
IsLearner: true,
1088+
},
1089+
},
1090+
},
1091+
},
1092+
{
1093+
name: "v3store should keep unchanged if v2store.IsLearner is true",
1094+
storeV2Members: []*Member{
1095+
{
1096+
ID: 100,
1097+
RaftAttributes: RaftAttributes{
1098+
PeerURLs: []string{"http://10.0.0.9:2380"},
1099+
IsLearner: true,
1100+
},
1101+
},
1102+
},
1103+
backendMembers: []*Member{
1104+
{
1105+
ID: 100,
1106+
RaftAttributes: RaftAttributes{
1107+
PeerURLs: []string{"http://10.0.0.9:2380"},
1108+
IsLearner: true,
1109+
},
1110+
},
1111+
},
1112+
expectV3Members: map[types.ID]*Member{
1113+
100: {
1114+
ID: 100,
1115+
RaftAttributes: RaftAttributes{
1116+
PeerURLs: []string{"http://10.0.0.9:2380"},
1117+
IsLearner: true,
1118+
},
1119+
},
1120+
},
1121+
},
1122+
{
1123+
name: "v3store should be updated if v2.IsLearner is false and v3store.IsLearner is true",
1124+
storeV2Members: []*Member{
1125+
{
1126+
ID: 100,
1127+
RaftAttributes: RaftAttributes{
1128+
PeerURLs: []string{"http://10.0.0.9:2380"},
1129+
IsLearner: false,
1130+
},
1131+
},
1132+
},
1133+
backendMembers: []*Member{
1134+
{
1135+
ID: 100,
1136+
RaftAttributes: RaftAttributes{
1137+
PeerURLs: []string{"http://10.0.0.9:2380"},
1138+
IsLearner: true,
1139+
},
1140+
},
1141+
},
1142+
expectV3Members: map[types.ID]*Member{
1143+
100: {
1144+
ID: 100,
1145+
RaftAttributes: RaftAttributes{
1146+
PeerURLs: []string{"http://10.0.0.9:2380"},
1147+
IsLearner: false,
1148+
},
1149+
},
1150+
},
1151+
},
1152+
}
1153+
for _, tc := range tcs {
1154+
t.Run(tc.name, func(t *testing.T) {
1155+
lg := zaptest.NewLogger(t)
1156+
be := newMembershipBackend()
1157+
1158+
for _, m := range tc.backendMembers {
1159+
be.MustSaveMemberToBackend(m)
1160+
}
1161+
1162+
st := v2store.New()
1163+
for _, m := range tc.storeV2Members {
1164+
mustSaveMemberToStore(lg, st, m)
1165+
}
1166+
1167+
cluster := NewCluster(lg)
1168+
cluster.SetBackend(be)
1169+
cluster.SetStore(st)
1170+
1171+
cluster.SyncLearnerPromotionIfNeeded()
1172+
v3Members, _ := cluster.be.MustReadMembersFromBackend()
1173+
require.Equal(t, tc.expectV3Members, v3Members)
1174+
})
1175+
}
1176+
}

server/etcdserver/api/membership/membership_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,10 @@ func (b *backendMock) MustSaveMemberToBackend(m *Member) {
8585
b.members[m.ID] = m
8686
}
8787

88+
func (b *backendMock) MustHackySaveMemberToBackend(m *Member) {
89+
b.members[m.ID] = m
90+
}
91+
8892
func (b *backendMock) TrimMembershipFromBackend() error {
8993
b.members = make(map[types.ID]*Member)
9094
b.removed = make(map[types.ID]bool)

server/etcdserver/api/membership/store.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ type ClusterVersionBackend interface {
3939
type MemberBackend interface {
4040
MustReadMembersFromBackend() (map[types.ID]*Member, map[types.ID]bool)
4141
MustSaveMemberToBackend(*Member)
42+
MustHackySaveMemberToBackend(*Member)
4243
TrimMembershipFromBackend() error
4344
MustDeleteMemberFromBackend(types.ID)
4445
}

server/etcdserver/bootstrap.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,11 @@ func (c *bootstrappedCluster) Finalize(cfg config.ServerConfig, s *bootstrappedS
448448
}
449449
c.cl.SetStore(s.st)
450450
c.cl.SetBackend(schema.NewMembershipBackend(cfg.Logger, s.backend.be))
451+
452+
// Workaround the issues which have already been affected
453+
// by https://github.com/etcd-io/etcd/issues/19557.
454+
c.cl.SyncLearnerPromotionIfNeeded()
455+
451456
if s.wal.haveWAL {
452457
c.cl.Recover(api.UpdateCapability)
453458
if c.databaseFileMissing(s) {

server/storage/backend/verify.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,14 @@ func verifyLockEnabled() bool {
6262

6363
func insideApply() bool {
6464
stackTraceStr := string(debug.Stack())
65+
66+
// Exclude the case of `MustHackySaveMemberToBackend`, which is
67+
// used to workaround the situations which are already affected
68+
// by https://github.com/etcd-io/etcd/issues/19557.
69+
if strings.Contains(stackTraceStr, "MustHackySaveMemberToBackend") {
70+
return false
71+
}
72+
6573
return strings.Contains(stackTraceStr, ".applyEntries")
6674
}
6775

server/storage/schema/membership.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,22 @@ func (s *membershipBackend) MustSaveMemberToBackend(m *membership.Member) {
5757
tx.UnsafePut(Members, mkey, mvalue)
5858
}
5959

60+
// MustHackySaveMemberToBackend updates the member in a hacky way.
61+
// It's only used to fix the issues which are already affected by
62+
// https://github.com/etcd-io/etcd/issues/19557.
63+
func (s *membershipBackend) MustHackySaveMemberToBackend(m *membership.Member) {
64+
mkey := BackendMemberKey(m.ID)
65+
mvalue, err := json.Marshal(m)
66+
if err != nil {
67+
s.lg.Panic("failed to marshal member", zap.Error(err))
68+
}
69+
70+
tx := s.be.BatchTx()
71+
tx.LockOutsideApply()
72+
defer tx.Unlock()
73+
tx.UnsafePut(Members, mkey, mvalue)
74+
}
75+
6076
// TrimClusterFromBackend removes all information about cluster (versions)
6177
// from the v3 backend.
6278
func (s *membershipBackend) TrimClusterFromBackend() error {

0 commit comments

Comments
 (0)