Skip to content

Commit 7d169b2

Browse files
damzIbrahim Jarif
authored andcommitted
levels: Compaction incorrectly drops some delete markers (#1422)
fd89894 ("Compaction: Expired keys and delete markers are never purged") revealed a bug in the compaction logic that leads to delete markers being incorrectly dropped during compaction. During compaction, `*levelsController.compactBuildTables` decides to drop a delete key if there is no overlap with levels lower than the bottom layer of the compaction definition. It does that by checking only the `top` table, thinking that proving that the `top` table doesn't overlap is sufficient to prove that the `bottom` table doesn't. Unfortunately, this is not the case. Not in general, and even less in the case of `DropPrefix()` where we run a same-level compaction and `top` is empty. The faulty condition was introduced way back when in e597fb7 ("Discard key versions during compaction"). (cherry picked from commit 5b90893)
1 parent c30daf9 commit 7d169b2

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

levels.go

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -484,11 +484,8 @@ func (s *levelsController) compactBuildTables(
484484
botTables := cd.bot
485485

486486
// Check overlap of the top level with the levels which are not being
487-
// compacted in this compaction. We don't need to check overlap of the bottom
488-
// tables with other levels because if the top tables overlap with any of the lower
489-
// levels, it implies bottom level also overlaps because top and bottom tables
490-
// overlap with each other.
491-
hasOverlap := s.checkOverlap(cd.top, cd.nextLevel.level+1)
487+
// compacted in this compaction.
488+
hasOverlap := s.checkOverlap(cd.allTables(), cd.nextLevel.level+1)
492489

493490
// Try to collect stats so that we can inform value log about GC. That would help us find which
494491
// value log file should be GCed.
@@ -800,6 +797,13 @@ func (cd *compactDef) unlockLevels() {
800797
cd.thisLevel.RUnlock()
801798
}
802799

800+
func (cd *compactDef) allTables() []*table.Table {
801+
ret := make([]*table.Table, 0, len(cd.top)+len(cd.bot))
802+
ret = append(ret, cd.top...)
803+
ret = append(ret, cd.bot...)
804+
return ret
805+
}
806+
803807
func (s *levelsController) fillTablesL0(cd *compactDef) bool {
804808
cd.lockLevels()
805809
defer cd.unlockLevels()

levels_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,40 @@ func TestCompaction(t *testing.T) {
303303
getAllAndCheck(t, db, []keyValVersion{})
304304
})
305305
})
306+
t.Run("with bottom overlap", func(t *testing.T) {
307+
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
308+
l1 := []keyValVersion{{"foo", "bar", 3, bitDelete}}
309+
l2 := []keyValVersion{{"foo", "bar", 2, 0}, {"fooz", "baz", 2, bitDelete}}
310+
l3 := []keyValVersion{{"fooz", "baz", 1, 0}}
311+
createAndOpen(db, l1, 1)
312+
createAndOpen(db, l2, 2)
313+
createAndOpen(db, l3, 3)
314+
315+
// Set a high discard timestamp so that all the keys are below the discard timestamp.
316+
db.SetDiscardTs(10)
317+
318+
getAllAndCheck(t, db, []keyValVersion{
319+
{"foo", "bar", 3, bitDelete},
320+
{"foo", "bar", 2, 0},
321+
{"fooz", "baz", 2, bitDelete},
322+
{"fooz", "baz", 1, 0},
323+
})
324+
cdef := compactDef{
325+
thisLevel: db.lc.levels[1],
326+
nextLevel: db.lc.levels[2],
327+
top: db.lc.levels[1].tables,
328+
bot: db.lc.levels[2].tables,
329+
}
330+
require.NoError(t, db.lc.runCompactDef(1, cdef))
331+
// the top table at L1 doesn't overlap L3, but the bottom table at L2
332+
// does, delete keys should not be removed.
333+
getAllAndCheck(t, db, []keyValVersion{
334+
{"foo", "bar", 3, bitDelete},
335+
{"fooz", "baz", 2, bitDelete},
336+
{"fooz", "baz", 1, 0},
337+
})
338+
})
339+
})
306340
t.Run("without overlap", func(t *testing.T) {
307341
runBadgerTest(t, &opt, func(t *testing.T, db *DB) {
308342
l1 := []keyValVersion{{"foo", "bar", 3, bitDelete}, {"fooz", "baz", 1, bitDelete}}

0 commit comments

Comments
 (0)