Skip to content

Commit 8b1006b

Browse files
authored
Use the window size for sampling, instead of a fixed 10MB size. (#501)
* Use the window size for sampling, instead of a fixed 10MB size. * Use count window, just like size window. * Ensure that we return ErrNoRewrite if no logs are picked.
1 parent e201d7b commit 8b1006b

File tree

2 files changed

+23
-10
lines changed

2 files changed

+23
-10
lines changed

integration/testgc/main.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,10 @@ func main() {
128128
// Run value log GC.
129129
defer closer.Done()
130130
var count int
131-
ticker := time.NewTicker(30 * time.Second)
131+
ticker := time.NewTicker(5 * time.Second)
132132
defer ticker.Stop()
133133
for range ticker.C {
134+
again:
134135
select {
135136
case <-closer.HasBeenClosed():
136137
log.Printf("Num times value log GC was successful: %d\n", count)
@@ -142,6 +143,7 @@ func main() {
142143
log.Printf("Result of value log GC: %v\n", err)
143144
if err == nil {
144145
count++
146+
goto again
145147
}
146148
}
147149
}()
@@ -218,4 +220,5 @@ func main() {
218220
log.Fatalf("Error while iterating: %v", err)
219221
}
220222
log.Println("Iteration done. Test successful.")
223+
time.Sleep(time.Minute) // Time to do some poking around.
221224
}

value.go

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1068,11 +1068,15 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
10681068
tr.SetError()
10691069
return err
10701070
}
1071-
window := float64(fi.Size()) * 0.1 // 10% of the file as window.
1071+
1072+
// Set up the sampling window sizes.
1073+
sizeWindow := float64(fi.Size()) * 0.1 // 10% of the file as window.
1074+
countWindow := int(float64(vlog.opt.ValueLogMaxEntries) * 0.01) // 1% of num entries.
1075+
tr.LazyPrintf("Size window: %5.2f. Count window: %d.", sizeWindow, countWindow)
10721076

10731077
// Pick a random start point for the log.
10741078
skipFirstM := float64(rand.Int63n(fi.Size())) // Pick a random starting location.
1075-
skipFirstM -= window // Avoid hitting EOF by moving back by window.
1079+
skipFirstM -= sizeWindow // Avoid hitting EOF by moving back by window.
10761080
skipFirstM /= float64(mi) // Convert to MBs.
10771081
tr.LazyPrintf("Skip first %5.2f MB of file of size: %d MB", skipFirstM, fi.Size()/mi)
10781082
var skipped float64
@@ -1084,18 +1088,18 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
10841088
var numIterations int
10851089
err = vlog.iterate(lf, 0, func(e Entry, vp valuePointer) error {
10861090
numIterations++
1087-
esz := float64(vp.Len) / (1 << 20) // in MBs. +4 for the CAS stuff.
1091+
esz := float64(vp.Len) / (1 << 20) // in MBs.
10881092
if skipped < skipFirstM {
10891093
skipped += esz
10901094
return nil
10911095
}
10921096

1093-
// Sample until we reach window size or 10K entries or exceed 10 seconds.
1094-
if r.count > 10000 {
1095-
tr.LazyPrintf("Stopping sampling after 10K entries.")
1097+
// Sample until we reach the window sizes or exceed 10 seconds.
1098+
if r.count > countWindow {
1099+
tr.LazyPrintf("Stopping sampling after %d entries.", countWindow)
10961100
return errStop
10971101
}
1098-
if r.total > window {
1102+
if r.total > sizeWindow {
10991103
tr.LazyPrintf("Stopping sampling after reaching window size.")
11001104
return errStop
11011105
}
@@ -1158,8 +1162,9 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
11581162
tr.LazyPrintf("Fid: %d. Skipped: %5.2fMB Num iterations: %d. Data status=%+v\n",
11591163
lf.fid, skipped, numIterations, r)
11601164

1161-
// If we sampled at least 10MB, we can make a call about rewrite.
1162-
if (r.count < 10000 && r.total < 10.0) || r.discard < discardRatio*r.total {
1165+
// If we couldn't sample at least a 1000 KV pairs or at least 75% of the window size,
1166+
// and what we can discard is below the threshold, we should skip the rewrite.
1167+
if (r.count < countWindow && r.total < sizeWindow*0.75) || r.discard < discardRatio*r.total {
11631168
tr.LazyPrintf("Skipping GC on fid: %d", lf.fid)
11641169
return ErrNoRewrite
11651170
}
@@ -1185,13 +1190,18 @@ func (vlog *valueLog) runGC(discardRatio float64, head valuePointer) error {
11851190
case vlog.garbageCh <- struct{}{}:
11861191
// Pick a log file for GC.
11871192
tr := trace.New("Badger.ValueLog", "GC")
1193+
tr.SetMaxEvents(100)
11881194
defer func() {
11891195
tr.Finish()
11901196
<-vlog.garbageCh
11911197
}()
11921198

11931199
var err error
11941200
files := vlog.pickLog(head, tr)
1201+
if len(files) == 0 {
1202+
tr.LazyPrintf("PickLog returned zero results.")
1203+
return ErrNoRewrite
1204+
}
11951205
tried := make(map[uint32]bool)
11961206
for _, lf := range files {
11971207
if _, done := tried[lf.fid]; done {

0 commit comments

Comments
 (0)