Skip to content

Commit 8785910

Browse files
committed
Add e2e downgrade automatic cancellation test
Verify that the downgrade can be cancelled automatically when the downgrade is completed (using `no inflight downgrade job`` as the indicator) Please see: #19365 (comment) Reference: #17976 Signed-off-by: Chun-Hung Tseng <henrytseng@google.com>
1 parent 53b88df commit 8785910

File tree

2 files changed

+114
-10
lines changed

2 files changed

+114
-10
lines changed

tests/e2e/cluster_downgrade_test.go

Lines changed: 93 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
127127
})
128128
}
129129
cc := epc.Etcdctl()
130-
t.Logf("Cluster created")
130+
t.Log("Cluster created")
131131
if len(epc.Procs) > 1 {
132132
t.Log("Waiting health interval to required to make membership changes")
133133
time.Sleep(etcdserver.HealthInterval)
@@ -140,7 +140,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
140140
resp, err := cc.MemberAddAsLearner(t.Context(), "fake1", []string{"http://127.0.0.1:1001"})
141141
require.NoError(t, err)
142142
if triggerSnapshot {
143-
t.Logf("Generating snapshot")
143+
t.Log("Generating snapshot")
144144
generateSnapshot(t, snapshotCount, cc)
145145
verifySnapshot(t, epc)
146146
}
@@ -150,7 +150,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
150150
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
151151

152152
if triggerCancellation == cancelRightBeforeEnable {
153-
t.Logf("Cancelling downgrade before enabling")
153+
t.Log("Cancelling downgrade before enabling")
154154
e2e.DowngradeCancel(t, epc)
155155
t.Log("Downgrade cancelled, validating if cluster is in the right state")
156156
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
@@ -163,7 +163,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
163163
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
164164

165165
if triggerCancellation == cancelRightAfterEnable {
166-
t.Logf("Cancelling downgrade right after enabling (no node is downgraded yet)")
166+
t.Log("Cancelling downgrade right after enabling (no node is downgraded yet)")
167167
e2e.DowngradeCancel(t, epc)
168168
t.Log("Downgrade cancelled, validating if cluster is in the right state")
169169
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
@@ -200,7 +200,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
200200
resp, err = cc.MemberAddAsLearner(t.Context(), "fake2", []string{"http://127.0.0.1:1002"})
201201
require.NoError(t, err)
202202
if triggerSnapshot {
203-
t.Logf("Generating snapshot")
203+
t.Log("Generating snapshot")
204204
generateSnapshot(t, snapshotCount, cc)
205205
verifySnapshot(t, epc)
206206
}
@@ -228,6 +228,85 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
228228
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
229229
}
230230

231+
func TestDowngradeAutoCancelAfterCompletion(t *testing.T) {
232+
clusterSize := 3
233+
234+
currentEtcdBinary := e2e.BinPath.Etcd
235+
lastReleaseBinary := e2e.BinPath.EtcdLastRelease
236+
if !fileutil.Exist(lastReleaseBinary) {
237+
t.Skipf("%q does not exist", lastReleaseBinary)
238+
}
239+
240+
currentVersion, err := e2e.GetVersionFromBinary(currentEtcdBinary)
241+
require.NoError(t, err)
242+
// wipe any pre-release suffix like -alpha.0 we see commonly in builds
243+
currentVersion.PreRelease = ""
244+
245+
lastVersion, err := e2e.GetVersionFromBinary(lastReleaseBinary)
246+
require.NoError(t, err)
247+
248+
require.Equalf(t, lastVersion.Minor, currentVersion.Minor-1, "unexpected minor version difference")
249+
currentVersionStr := currentVersion.String()
250+
lastVersionStr := lastVersion.String()
251+
252+
lastClusterVersion := semver.New(lastVersionStr)
253+
lastClusterVersion.Patch = 0
254+
255+
e2e.BeforeTest(t)
256+
257+
t.Logf("Create cluster with version %s", currentVersionStr)
258+
var snapshotCount uint64 = 10
259+
epc := newCluster(t, clusterSize, snapshotCount)
260+
for i := 0; i < len(epc.Procs); i++ {
261+
e2e.ValidateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{
262+
Cluster: currentVersionStr,
263+
Server: version.Version,
264+
Storage: currentVersionStr,
265+
})
266+
}
267+
cc := epc.Etcdctl()
268+
t.Log("Cluster created")
269+
if len(epc.Procs) > 1 {
270+
t.Log("Waiting health interval to required to make membership changes")
271+
time.Sleep(etcdserver.HealthInterval)
272+
}
273+
274+
t.Log("Downgrade should be disabled")
275+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: false})
276+
277+
t.Log("Adding member to test membership, but a learner avoid breaking quorum")
278+
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
279+
require.NoError(t, err)
280+
t.Log("Removing learner to test membership")
281+
_, err = cc.MemberRemove(context.Background(), resp.Member.ID)
282+
require.NoError(t, err)
283+
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
284+
285+
e2e.DowngradeEnable(t, epc, lastVersion)
286+
287+
t.Log("Downgrade should be enabled")
288+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
289+
290+
t.Logf("Starting downgrade process for all nodes to %q", lastVersionStr)
291+
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, []int{0, 1, 2}, true, currentVersion, lastClusterVersion)
292+
require.NoError(t, err)
293+
294+
afterMembers, afterKV := getMembersAndKeys(t, cc)
295+
assert.Equal(t, beforeKV.Kvs, afterKV.Kvs)
296+
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
297+
298+
if len(epc.Procs) > 1 {
299+
t.Log("Waiting health interval to required to make membership changes")
300+
time.Sleep(etcdserver.HealthInterval)
301+
}
302+
303+
t.Log("Downgrade should be disabled")
304+
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: false})
305+
306+
t.Log("Downgrade cancellation is automatically cancelled since the cluster has been downgraded, validating if cluster is in the right state")
307+
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, lastClusterVersion))
308+
}
309+
231310
func newCluster(t *testing.T, clusterSize int, snapshotCount uint64) *e2e.EtcdProcessCluster {
232311
epc, err := e2e.NewEtcdProcessCluster(t.Context(), t,
233312
e2e.WithClusterSize(clusterSize),
@@ -250,7 +329,7 @@ func generateSnapshot(t *testing.T, snapshotCount uint64, cc *e2e.EtcdctlV3) {
250329
defer cancel()
251330

252331
var i uint64
253-
t.Logf("Adding keys")
332+
t.Log("Adding keys")
254333
for i = 0; i < snapshotCount*3; i++ {
255334
err := cc.Put(ctx, fmt.Sprintf("%d", i), "1", config.PutOptions{})
256335
assert.NoError(t, err)
@@ -264,7 +343,7 @@ func verifySnapshot(t *testing.T, epc *e2e.EtcdProcessCluster) {
264343
_, err := ss.Load()
265344
require.NoError(t, err)
266345
}
267-
t.Logf("All members have a valid snapshot")
346+
t.Log("All members have a valid snapshot")
268347
}
269348

270349
func verifySnapshotMembers(t *testing.T, epc *e2e.EtcdProcessCluster, expectedMembers *clientv3.MemberListResponse) {
@@ -301,11 +380,17 @@ func getMembersAndKeys(t *testing.T, cc *e2e.EtcdctlV3) (*clientv3.MemberListRes
301380
func generateIdenticalVersions(clusterSize int, ver *semver.Version) []*version.Versions {
302381
ret := make([]*version.Versions, clusterSize)
303382

383+
// storage version string is non-empty starting from 3.6.0
384+
storageStr := ver.String()
385+
if ver.LessThan(version.V3_6) {
386+
storageStr = ""
387+
}
388+
304389
for i := range clusterSize {
305390
ret[i] = &version.Versions{
306391
Cluster: ver.String(),
307392
Server: ver.String(),
308-
Storage: ver.String(),
393+
Storage: storageStr,
309394
}
310395
}
311396

tests/framework/e2e/downgrade.go

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
package e2e
1616

1717
import (
18+
"context"
1819
"encoding/json"
1920
"fmt"
2021
"math/rand"
@@ -28,6 +29,7 @@ import (
2829

2930
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
3031
"go.etcd.io/etcd/api/v3/version"
32+
"go.etcd.io/etcd/pkg/v3/expect"
3133
"go.etcd.io/etcd/server/v3/etcdserver"
3234
"go.etcd.io/etcd/tests/v3/framework/testutils"
3335
)
@@ -58,7 +60,7 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
5860
var err error
5961
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
6062
for {
61-
t.Logf("etcdctl downgrade cancel")
63+
t.Log("etcdctl downgrade cancel")
6264
err = c.DowngradeCancel(t.Context())
6365
if err != nil {
6466
if strings.Contains(err.Error(), "no inflight downgrade job") {
@@ -72,7 +74,7 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
7274
continue
7375
}
7476

75-
t.Logf("etcdctl downgrade cancel executed successfully")
77+
t.Log("etcdctl downgrade cancel executed successfully")
7678
break
7779
}
7880
})
@@ -165,6 +167,23 @@ func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcess
165167
t.Log("Waiting health interval to make sure the leader propagates version to new processes")
166168
time.Sleep(etcdserver.HealthInterval)
167169

170+
if opString == "downgrading" && len(membersToChange) == len(clus.Procs) {
171+
noError := false
172+
var err error
173+
for i := 0; i < 3 && !noError; i++ {
174+
testutils.ExecuteWithTimeout(t, 15*time.Second, func() {
175+
lg.Info("Waiting for downgrade completion log line")
176+
leader := clus.WaitLeader(t)
177+
_, err = clus.Procs[leader].Logs().ExpectWithContext(context.Background(), expect.ExpectedResponse{Value: "the cluster has been downgraded"})
178+
if err == nil {
179+
noError = true
180+
}
181+
})
182+
}
183+
184+
require.NoError(t, err)
185+
}
186+
168187
lg.Info("Validating versions")
169188
clusterVersion := targetVersion
170189
if !isDowngrade {

0 commit comments

Comments
 (0)