@@ -1068,11 +1068,15 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
10681068 tr .SetError ()
10691069 return err
10701070 }
1071- window := float64 (fi .Size ()) * 0.1 // 10% of the file as window.
1071+
1072+ // Set up the sampling window sizes.
1073+ sizeWindow := float64 (fi .Size ()) * 0.1 // 10% of the file as window.
1074+ countWindow := int (float64 (vlog .opt .ValueLogMaxEntries ) * 0.01 ) // 1% of num entries.
1075+ tr .LazyPrintf ("Size window: %5.2f. Count window: %d." , sizeWindow , countWindow )
10721076
10731077 // Pick a random start point for the log.
10741078 skipFirstM := float64 (rand .Int63n (fi .Size ())) // Pick a random starting location.
1075- skipFirstM -= window // Avoid hitting EOF by moving back by window.
1079+ skipFirstM -= sizeWindow // Avoid hitting EOF by moving back by window.
10761080 skipFirstM /= float64 (mi ) // Convert to MBs.
10771081 tr .LazyPrintf ("Skip first %5.2f MB of file of size: %d MB" , skipFirstM , fi .Size ()/ mi )
10781082 var skipped float64
@@ -1084,18 +1088,18 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
10841088 var numIterations int
10851089 err = vlog .iterate (lf , 0 , func (e Entry , vp valuePointer ) error {
10861090 numIterations ++
1087- esz := float64 (vp .Len ) / (1 << 20 ) // in MBs. +4 for the CAS stuff.
1091+ esz := float64 (vp .Len ) / (1 << 20 ) // in MBs.
10881092 if skipped < skipFirstM {
10891093 skipped += esz
10901094 return nil
10911095 }
10921096
1093- // Sample until we reach window size or 10K entries or exceed 10 seconds.
1094- if r .count > 10000 {
1095- tr .LazyPrintf ("Stopping sampling after 10K entries." )
1097+ // Sample until we reach the window sizes or exceed 10 seconds.
1098+ if r .count > countWindow {
1099+ tr .LazyPrintf ("Stopping sampling after %d entries." , countWindow )
10961100 return errStop
10971101 }
1098- if r .total > window {
1102+ if r .total > sizeWindow {
10991103 tr .LazyPrintf ("Stopping sampling after reaching window size." )
11001104 return errStop
11011105 }
@@ -1158,8 +1162,9 @@ func (vlog *valueLog) doRunGC(lf *logFile, discardRatio float64, tr trace.Trace)
11581162 tr .LazyPrintf ("Fid: %d. Skipped: %5.2fMB Num iterations: %d. Data status=%+v\n " ,
11591163 lf .fid , skipped , numIterations , r )
11601164
1161- // If we sampled at least 10MB, we can make a call about rewrite.
1162- if (r .count < 10000 && r .total < 10.0 ) || r .discard < discardRatio * r .total {
1165+ // If we couldn't sample at least a 1000 KV pairs or at least 75% of the window size,
1166+ // and what we can discard is below the threshold, we should skip the rewrite.
1167+ if (r .count < countWindow && r .total < sizeWindow * 0.75 ) || r .discard < discardRatio * r .total {
11631168 tr .LazyPrintf ("Skipping GC on fid: %d" , lf .fid )
11641169 return ErrNoRewrite
11651170 }
@@ -1185,13 +1190,18 @@ func (vlog *valueLog) runGC(discardRatio float64, head valuePointer) error {
11851190 case vlog .garbageCh <- struct {}{}:
11861191 // Pick a log file for GC.
11871192 tr := trace .New ("Badger.ValueLog" , "GC" )
1193+ tr .SetMaxEvents (100 )
11881194 defer func () {
11891195 tr .Finish ()
11901196 <- vlog .garbageCh
11911197 }()
11921198
11931199 var err error
11941200 files := vlog .pickLog (head , tr )
1201+ if len (files ) == 0 {
1202+ tr .LazyPrintf ("PickLog returned zero results." )
1203+ return ErrNoRewrite
1204+ }
11951205 tried := make (map [uint32 ]bool )
11961206 for _ , lf := range files {
11971207 if _ , done := tried [lf .fid ]; done {
0 commit comments