@@ -71,16 +71,16 @@ class TopN {
71
71
// UNORDERED and a peek_bottom() function call is invoked.
72
72
//
73
73
// o HEAP_SORTED: in this state, the array is kept as a heap and
74
- // there are exactly ( limit_+1) elements in the array. This
74
+ // there are exactly limit_ elements in the array. This
75
75
// state is reached when at least (limit_+1) elements are
76
76
// pushed in.
77
77
//
78
78
// The state transition graph is at follows:
79
79
//
80
- // peek_bottom() (limit_+1) elements
80
+ // peek_bottom() (limit_+1) elements pushed
81
81
// UNORDERED --------------> BOTTOM_KNOWN --------------------> HEAP_SORTED
82
82
// | ^
83
- // | (limit_+1) elements |
83
+ // | (limit_+1) elements pushed |
84
84
// +-----------------------------------------------------------+
85
85
86
86
enum State { UNORDERED, BOTTOM_KNOWN, HEAP_SORTED };
@@ -94,14 +94,18 @@ class TopN {
94
94
95
95
// Number of elements currently held by this TopN object. This
96
96
// will be no greater than 'limit' passed to the constructor.
97
- size_t size () const { return std::min ( elements_.size (), limit_ ); }
97
+ size_t size () const { return elements_.size (); }
98
98
99
99
bool empty () const { return size () == 0 ; }
100
100
101
101
// If you know how many elements you will push at the time you create the
102
102
// TopN object, you can call reserve to preallocate the memory that TopN
103
103
// will need to process all 'n' pushes. Calling this method is optional.
104
- void reserve (size_t n) { elements_.reserve (std::min (n, limit_ + 1 )); }
104
+ void reserve (size_t n) {
105
+ // We may need limit_+1 for the case where we transition from an unsorted
106
+ // set of limit_ elements to a heap.
107
+ elements_.reserve (std::min (n, limit_ + 1 ));
108
+ }
105
109
106
110
// Push 'v'. If the maximum number of elements was exceeded, drop the
107
111
// lowest element and return it in 'dropped' (if given). If the maximum is not
@@ -170,7 +174,7 @@ class TopN {
170
174
// with no guarantees about the order of iteration. These iterators are
171
175
// invalidated by mutation of the data structure.
172
176
UnsortedIterator unsorted_begin () const { return elements_.begin (); }
173
- UnsortedIterator unsorted_end () const { return elements_.begin () + size (); }
177
+ UnsortedIterator unsorted_end () const { return elements_.end (); }
174
178
175
179
// Accessor for comparator template argument.
176
180
Cmp *comparator () { return &cmp_; }
@@ -184,13 +188,10 @@ class TopN {
184
188
void PushInternal (U &&v, T *dropped); // NOLINT(build/c++11)
185
189
186
190
// elements_ can be in one of two states:
187
- // elements_.size() <= limit_: elements_ is an unsorted vector of elements
188
- // pushed so far.
189
- // elements_.size() > limit_: The last element of elements_ is unused;
190
- // the other elements of elements_ are an stl heap whose size is exactly
191
- // limit_. In this case elements_.size() is exactly one greater than
192
- // limit_, but don't use "elements_.size() == limit_ + 1" to check for
193
- // that because you'll get a false positive if limit_ == size_t(-1).
191
+ // elements_.size() <= limit_ && state_ != HEAP_SORTED:
192
+ // elements_ is an unsorted vector of elements pushed so far.
193
+ // elements_.size() == limit_ && state_ == HEAP_SORTED:
194
+ // elements_ is an stl heap.
194
195
std::vector<T> elements_;
195
196
size_t limit_; // Maximum number of elements to find
196
197
Cmp cmp_; // Greater-than comparison function
@@ -208,6 +209,8 @@ void TopN<T, Cmp>::PushInternal(U &&v, T *dropped) { // NOLINT(build/c++11)
208
209
return ;
209
210
}
210
211
if (state_ != HEAP_SORTED) {
212
+ // We may temporarily extend one beyond limit_ elements here. This is
213
+ // necessary for finding and removing the smallest element.
211
214
elements_.push_back (std::forward<U>(v)); // NOLINT(build/c++11)
212
215
if (state_ == UNORDERED || cmp_ (elements_.back (), elements_.front ())) {
213
216
// Easy case: we just pushed the new element back
@@ -223,25 +226,32 @@ void TopN<T, Cmp>::PushInternal(U &&v, T *dropped) { // NOLINT(build/c++11)
223
226
if (elements_.size () == limit_ + 1 ) {
224
227
// Transition from unsorted vector to a heap.
225
228
std::make_heap (elements_.begin (), elements_.end (), cmp_);
226
- if (dropped) *dropped = std::move (elements_.front ());
227
229
std::pop_heap (elements_.begin (), elements_.end (), cmp_);
230
+ if (dropped) *dropped = std::move (elements_.back ());
231
+ elements_.pop_back (); // Restore to size limit_.
228
232
state_ = HEAP_SORTED;
233
+ } else if (state_ == UNORDERED ||
234
+ cmp_ (elements_.back (), elements_.front ())) {
235
+ // Easy case: we just push the new element back
236
+ } else {
237
+ // To maintain the BOTTOM_KNOWN state, we need to make sure that
238
+ // the element at position 0 is always the smallest. So we put
239
+ // the new element at position 0 and push the original bottom
240
+ // element in the back.
241
+ // Warning: this code is subtle.
242
+ using std::swap;
243
+ swap (elements_.front (), elements_.back ());
229
244
}
245
+
230
246
} else {
231
247
// Only insert the new element if it is greater than the least element.
232
248
if (cmp_ (v, elements_.front ())) {
233
- // Store new element in the last slot of elements_. Remember from the
234
- // comments on elements_ that this last slot is unused, so we don't
235
- // overwrite anything useful.
236
- elements_.back () = std::forward<U>(v); // NOLINT(build/c++11)
237
-
238
- // stp::pop_heap() swaps elements_.front() and elements_.back() and
239
- // rearranges elements from [elements_.begin(), elements_.end() - 1) such
240
- // that they are a heap according to cmp_. Net effect: remove
241
- // elements_.front() from the heap, and add the new element instead. For
242
- // more info, see https://en.cppreference.com/w/cpp/algorithm/pop_heap.
249
+ // Remove the top (smallest) element of the min heap, then push the new
250
+ // value in.
243
251
std::pop_heap (elements_.begin (), elements_.end (), cmp_);
244
252
if (dropped) *dropped = std::move (elements_.back ());
253
+ elements_.back () = std::forward<U>(v);
254
+ std::push_heap (elements_.begin (), elements_.end (), cmp_);
245
255
} else {
246
256
if (dropped) *dropped = std::forward<U>(v); // NOLINT(build/c++11)
247
257
}
@@ -277,7 +287,6 @@ std::unique_ptr<std::vector<T>> TopN<T, Cmp>::Extract() {
277
287
if (state_ != HEAP_SORTED) {
278
288
std::sort (out->begin (), out->end (), cmp_);
279
289
} else {
280
- out->pop_back ();
281
290
std::sort_heap (out->begin (), out->end (), cmp_);
282
291
}
283
292
return out;
@@ -287,10 +296,6 @@ template <class T, class Cmp>
287
296
std::unique_ptr<std::vector<T>> TopN<T, Cmp>::ExtractUnsorted() {
288
297
std::unique_ptr<std::vector<T>> out (new std::vector<T>);
289
298
out->swap (elements_);
290
- if (state_ == HEAP_SORTED) {
291
- // Remove the limit_+1'th element.
292
- out->pop_back ();
293
- }
294
299
return out;
295
300
}
296
301
@@ -308,7 +313,6 @@ void TopN<T, Cmp>::ExtractNondestructive(std::vector<T> *output) const {
308
313
if (state_ != HEAP_SORTED) {
309
314
std::sort (output->begin (), output->end (), cmp_);
310
315
} else {
311
- output->pop_back ();
312
316
std::sort_heap (output->begin (), output->end (), cmp_);
313
317
}
314
318
}
@@ -324,10 +328,6 @@ template <class T, class Cmp>
324
328
void TopN<T, Cmp>::ExtractUnsortedNondestructive(std::vector<T> *output) const {
325
329
CHECK (output != nullptr );
326
330
*output = elements_;
327
- if (state_ == HEAP_SORTED) {
328
- // Remove the limit_+1'th element.
329
- output->pop_back ();
330
- }
331
331
}
332
332
333
333
template <class T , class Cmp >
0 commit comments