7
7
#include < iterator>
8
8
#include < limits>
9
9
#include < memory>
10
+ #include < type_traits>
10
11
#include < vector>
11
12
12
13
namespace ylt ::metric::detail {
13
14
14
- template <std::size_t frac_bit = 6 >
15
+ template <typename uint_type, std::size_t frac_bit = 6 >
15
16
class summary_impl {
17
+ static_assert (sizeof (uint_type) >= 4 );
18
+ static_assert (std::is_unsigned_v<uint_type>);
16
19
constexpr static uint32_t decode_impl (uint16_t float16_value) {
17
20
float16_value <<= (8 - frac_bit);
18
21
uint32_t sign = float16_value >> 15 ;
@@ -57,7 +60,8 @@ class summary_impl {
57
60
static constexpr float float16_max = (1ull << 63 ) * 2 .0f ; // 2^64
58
61
59
62
static uint16_t encode (float flt) {
60
- unsigned int & fltInt32 = *(unsigned int *)&flt;
63
+ static_assert (sizeof (float ) == 4 );
64
+ uint32_t & fltInt32 = *(uint32_t *)&flt;
61
65
if (std::abs (flt) >= float16_max || std::isnan (flt)) {
62
66
flt = (fltInt32 & 0x8000'0000 ) ? (-float16_max) : (float16_max);
63
67
}
@@ -88,9 +92,9 @@ class summary_impl {
88
92
89
93
struct data_t {
90
94
static constexpr size_t piece_size = bucket_size / piece_cnt;
91
- using piece_t = std::array<std::atomic<uint32_t >, piece_size>;
95
+ using piece_t = std::array<std::atomic<uint_type >, piece_size>;
92
96
93
- std::atomic<uint32_t >& operator [](std::size_t index) {
97
+ std::atomic<uint_type >& operator [](std::size_t index) {
94
98
piece_t * piece = arr[index / piece_size];
95
99
if (piece == nullptr ) {
96
100
auto ptr = new piece_t {};
@@ -122,7 +126,7 @@ class summary_impl {
122
126
}
123
127
template <bool inc_order>
124
128
void stat_impl (uint64_t & count,
125
- std::vector<std::pair<int16_t , uint32_t >>& result, int i) {
129
+ std::vector<std::pair<int16_t , uint_type >>& result, int i) {
126
130
auto piece = arr[i].load (std::memory_order_relaxed);
127
131
if (piece) {
128
132
if constexpr (inc_order) {
@@ -146,7 +150,7 @@ class summary_impl {
146
150
}
147
151
}
148
152
void stat (uint64_t & count,
149
- std::vector<std::pair<int16_t , uint32_t >>& result) {
153
+ std::vector<std::pair<int16_t , uint_type >>& result) {
150
154
for (int i = piece_cnt - 1 ; i >= piece_cnt / 2 ; --i) {
151
155
stat_impl<false >(count, result, i);
152
156
}
@@ -182,36 +186,38 @@ class summary_impl {
182
186
static inline const unsigned long ms_count =
183
187
std::chrono::steady_clock::duration{std::chrono::milliseconds{1 }}.count();
184
188
185
- constexpr static unsigned int near_uint32_max = 4290000000U ;
189
+ constexpr static uint32_t near_uint32_max = 4290000000U ;
186
190
187
191
void increase (data_t & arr, uint16_t pos) {
188
- if (arr[pos].fetch_add (1 , std::memory_order::relaxed) >
189
- near_uint32_max) /* no overflow*/ [[likely]] {
190
- arr[pos].fetch_sub (1 , std::memory_order::relaxed);
191
- int upper = (pos < bucket_size / 2 ) ? (bucket_size / 2 ) : (bucket_size);
192
- int lower = (pos < bucket_size / 2 ) ? (0 ) : (bucket_size / 2 );
193
- for (int delta = 1 , lim = (std::max)(upper - pos, pos - lower + 1 );
194
- delta < lim; ++delta) {
195
- if (pos + delta < upper) {
196
- if (arr[pos + delta].fetch_add (1 , std::memory_order::relaxed) <=
197
- near_uint32_max) {
198
- break ;
192
+ auto res = arr[pos].fetch_add (1 , std::memory_order::relaxed);
193
+ if constexpr (std::is_same_v<uint_type, uint32_t >) {
194
+ if (res > near_uint32_max) /* no overflow*/ [[likely]] {
195
+ arr[pos].fetch_sub (1 , std::memory_order::relaxed);
196
+ int upper = (pos < bucket_size / 2 ) ? (bucket_size / 2 ) : (bucket_size);
197
+ int lower = (pos < bucket_size / 2 ) ? (0 ) : (bucket_size / 2 );
198
+ for (int delta = 1 , lim = (std::max)(upper - pos, pos - lower + 1 );
199
+ delta < lim; ++delta) {
200
+ if (pos + delta < upper) {
201
+ if (arr[pos + delta].fetch_add (1 , std::memory_order::relaxed) <=
202
+ near_uint32_max) {
203
+ break ;
204
+ }
205
+ arr[pos + delta].fetch_sub (1 , std::memory_order::relaxed);
199
206
}
200
- arr[ pos + delta]. fetch_sub ( 1 , std::memory_order::relaxed);
201
- }
202
- if (pos - delta >= lower ) {
203
- if (arr[pos - delta]. fetch_add ( 1 , std::memory_order::relaxed) <=
204
- near_uint32_max) {
205
- break ;
207
+ if ( pos - delta >= lower) {
208
+ if (arr[pos - delta]. fetch_add ( 1 , std::memory_order::relaxed) <=
209
+ near_uint32_max ) {
210
+ break ;
211
+ }
212
+ arr[pos - delta]. fetch_sub ( 1 , std::memory_order::relaxed) ;
206
213
}
207
- arr[pos - delta].fetch_sub (1 , std::memory_order::relaxed);
208
214
}
209
215
}
210
216
}
211
217
}
212
218
213
219
struct data_copy_t {
214
- std::vector<std::pair<int16_t , uint32_t >> arr[2 ];
220
+ std::vector<std::pair<int16_t , uint_type >> arr[2 ];
215
221
int index[2 ] = {}, smaller_one;
216
222
void init () {
217
223
if (arr[0 ][0 ] <= arr[1 ][0 ]) {
@@ -231,7 +237,7 @@ class summary_impl {
231
237
}
232
238
}
233
239
int16_t value () { return arr[smaller_one][index[smaller_one]].first ; }
234
- uint32_t count () { return arr[smaller_one][index[smaller_one]].second ; }
240
+ uint_type count () { return arr[smaller_one][index[smaller_one]].second ; }
235
241
};
236
242
237
243
public:
@@ -304,6 +310,9 @@ class summary_impl {
304
310
e = 1 ;
305
311
}
306
312
auto target_count = std::min<double >(e * count, count);
313
+ if (e == 0 ) {
314
+ target_count = std::min (uint64_t {1 }, count);
315
+ }
307
316
while (true ) {
308
317
if (target_count <= count_now) [[unlikely]] {
309
318
result.push_back (v);
0 commit comments