@@ -319,7 +319,7 @@ impl Segment {
319
319
#[ must_use]
320
320
#[ allow( clippy:: iter_without_into_iter) ]
321
321
#[ doc( hidden) ]
322
- pub fn iter ( & self ) -> impl DoubleEndedIterator < Item = crate :: Result < InternalValue > > {
322
+ pub fn iter ( & self ) -> Option < impl DoubleEndedIterator < Item = crate :: Result < InternalValue > > > {
323
323
self . range ( ..)
324
324
}
325
325
@@ -366,7 +366,65 @@ impl Segment {
366
366
false
367
367
}
368
368
369
+ /// Extracts the common prefix from a range's start and end bounds
370
+ fn extract_common_prefix_from_range < R : RangeBounds < UserKey > > (
371
+ & self ,
372
+ range : & R ,
373
+ ) -> Option < Vec < u8 > > {
374
+ // If we have a prefix extractor, we need to compare the extracted prefixes
375
+ // Otherwise, we compare the raw keys
376
+ let Some ( ref prefix_extractor) = self . prefix_extractor else {
377
+ // No prefix extractor, don't do bloom filter optimization
378
+ return None ;
379
+ } ;
380
+
381
+ let start_key = match range. start_bound ( ) {
382
+ Bound :: Included ( key) | Bound :: Excluded ( key) => Some ( key. as_ref ( ) ) ,
383
+ Bound :: Unbounded => None ,
384
+ } ;
385
+
386
+ let end_key = match range. end_bound ( ) {
387
+ Bound :: Included ( key) | Bound :: Excluded ( key) => Some ( key. as_ref ( ) ) ,
388
+ Bound :: Unbounded => None ,
389
+ } ;
390
+
391
+ match ( start_key, end_key) {
392
+ ( Some ( start) , Some ( end) ) => {
393
+ // Extract prefixes using the prefix extractor
394
+ let start_prefixes: Vec < _ > = prefix_extractor. extract ( start) . collect ( ) ;
395
+ let end_prefixes: Vec < _ > = prefix_extractor. extract ( end) . collect ( ) ;
396
+
397
+ // If either key is out of domain (no prefixes), we can't optimize
398
+ if start_prefixes. is_empty ( ) || end_prefixes. is_empty ( ) {
399
+ return None ;
400
+ }
401
+
402
+ // Check if the first prefix of each is the same
403
+ // For most extractors, there's only one prefix
404
+ if let ( Some ( start_prefix) , Some ( end_prefix) ) =
405
+ ( start_prefixes. first ( ) , end_prefixes. first ( ) )
406
+ {
407
+ if start_prefix == end_prefix {
408
+ Some ( start_prefix. to_vec ( ) )
409
+ } else {
410
+ // Different prefixes, no common prefix
411
+ None
412
+ }
413
+ } else {
414
+ None
415
+ }
416
+ }
417
+ ( Some ( key) , None ) | ( None , Some ( key) ) => {
418
+ // If only one bound exists, use its extracted prefix
419
+ let prefixes: Vec < _ > = prefix_extractor. extract ( key) . collect ( ) ;
420
+ prefixes. first ( ) . map ( |p| p. to_vec ( ) )
421
+ }
422
+ ( None , None ) => None ,
423
+ }
424
+ }
425
+
369
426
/// Creates a ranged iterator over the `Segment`.
427
+ /// Returns None if the bloom filter indicates no keys with the common prefix exist.
370
428
///
371
429
/// # Errors
372
430
///
@@ -377,17 +435,20 @@ impl Segment {
377
435
pub fn range < R : RangeBounds < UserKey > > (
378
436
& self ,
379
437
range : R ,
380
- ) -> impl DoubleEndedIterator < Item = crate :: Result < InternalValue > > {
438
+ ) -> Option < impl DoubleEndedIterator < Item = crate :: Result < InternalValue > > > {
381
439
use crate :: fallible_clipping_iter:: FallibleClippingIter ;
382
440
use block_index:: iter:: create_index_block_reader;
383
441
384
- // Check bloom filter for prefix existence if we have a start bound
385
- let skip_by_bloom = if let Bound :: Included ( key) | Bound :: Excluded ( key) = range. start_bound ( )
386
- {
387
- self . should_skip_by_bloom_filter ( key)
388
- } else {
389
- false
390
- } ;
442
+ // Check bloom filter using common prefix from range bounds
443
+ if let Some ( common_prefix) = self . extract_common_prefix_from_range ( & range) {
444
+ if self . should_skip_by_bloom_filter ( & UserKey :: from ( common_prefix) ) {
445
+ #[ cfg( feature = "metrics" ) ]
446
+ self . metrics
447
+ . bloom_filter_hits
448
+ . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: Relaxed ) ;
449
+ return None ;
450
+ }
451
+ }
391
452
392
453
// TODO: enum_dispatch BlockIndex::iter
393
454
let index_block = match & * self . block_index {
@@ -417,28 +478,16 @@ impl Segment {
417
478
self . metrics . clone ( ) ,
418
479
) ;
419
480
420
- // Handle bloom filter skip case
421
- if skip_by_bloom {
422
- #[ cfg( feature = "metrics" ) ]
423
- self . metrics
424
- . bloom_filter_hits
425
- . fetch_add ( 1 , std:: sync:: atomic:: Ordering :: Relaxed ) ;
426
-
427
- // Set invalid bounds to ensure no items are yielded
428
- iter. set_lower_bound ( UserKey :: from ( & [ 255u8 ; 1 ] ) ) ;
429
- iter. set_upper_bound ( UserKey :: from ( & [ 0u8 ; 1 ] ) ) ;
430
- } else {
431
- // Set normal iterator bounds based on range
432
- if let Bound :: Excluded ( key) | Bound :: Included ( key) = range. start_bound ( ) {
433
- iter. set_lower_bound ( key. clone ( ) ) ;
434
- }
481
+ // Set normal iterator bounds based on range
482
+ if let Bound :: Excluded ( key) | Bound :: Included ( key) = range. start_bound ( ) {
483
+ iter. set_lower_bound ( key. clone ( ) ) ;
484
+ }
435
485
436
- if let Bound :: Excluded ( key) | Bound :: Included ( key) = range. end_bound ( ) {
437
- iter. set_upper_bound ( key. clone ( ) ) ;
438
- }
486
+ if let Bound :: Excluded ( key) | Bound :: Included ( key) = range. end_bound ( ) {
487
+ iter. set_upper_bound ( key. clone ( ) ) ;
439
488
}
440
489
441
- FallibleClippingIter :: new ( iter, range)
490
+ Some ( FallibleClippingIter :: new ( iter, range) )
442
491
}
443
492
444
493
/// Tries to recover a segment from a file.
@@ -804,10 +853,12 @@ mod tests {
804
853
"should use full index, so only TLI exists" ,
805
854
) ;
806
855
807
- assert_eq ! ( items, & * segment. iter( ) . flatten( ) . collect:: <Vec <_>>( ) ) ;
856
+ let iter = segment. iter ( ) . unwrap ( ) ;
857
+ assert_eq ! ( items, & * iter. flatten( ) . collect:: <Vec <_>>( ) ) ;
858
+ let iter = segment. iter ( ) . unwrap ( ) ;
808
859
assert_eq ! (
809
860
items. iter( ) . rev( ) . cloned( ) . collect:: <Vec <_>>( ) ,
810
- & * segment . iter( ) . rev( ) . flatten( ) . collect:: <Vec <_>>( ) ,
861
+ & * iter. rev( ) . flatten( ) . collect:: <Vec <_>>( ) ,
811
862
) ;
812
863
}
813
864
@@ -865,6 +916,7 @@ mod tests {
865
916
items. iter( ) . skip( 1 ) . cloned( ) . collect:: <Vec <_>>( ) ,
866
917
& * segment
867
918
. range( UserKey :: from( "b" ) ..)
919
+ . unwrap( )
868
920
. flatten( )
869
921
. collect:: <Vec <_>>( )
870
922
) ;
@@ -873,6 +925,7 @@ mod tests {
873
925
items. iter( ) . skip( 1 ) . rev( ) . cloned( ) . collect:: <Vec <_>>( ) ,
874
926
& * segment
875
927
. range( UserKey :: from( "b" ) ..)
928
+ . unwrap( )
876
929
. rev( )
877
930
. flatten( )
878
931
. collect:: <Vec <_>>( ) ,
@@ -929,8 +982,7 @@ mod tests {
929
982
"should use full index, so only TLI exists" ,
930
983
) ;
931
984
932
- let mut iter = segment
933
- . range ( UserKey :: from ( 5u64 . to_be_bytes ( ) ) ..UserKey :: from ( 10u64 . to_be_bytes ( ) ) ) ;
985
+ let mut iter = segment. range ( ..) . unwrap ( ) ;
934
986
935
987
let mut count = 0 ;
936
988
@@ -950,7 +1002,7 @@ mod tests {
950
1002
}
951
1003
}
952
1004
953
- assert_eq ! ( 5 , count) ;
1005
+ assert_eq ! ( 10 , count) ;
954
1006
}
955
1007
956
1008
Ok ( ( ) )
@@ -1009,6 +1061,7 @@ mod tests {
1009
1061
items. iter( ) . skip( 1 ) . take( 3 ) . cloned( ) . collect:: <Vec <_>>( ) ,
1010
1062
& * segment
1011
1063
. range( UserKey :: from( "b" ) ..=UserKey :: from( "d" ) )
1064
+ . unwrap( )
1012
1065
. flatten( )
1013
1066
. collect:: <Vec <_>>( )
1014
1067
) ;
@@ -1023,6 +1076,7 @@ mod tests {
1023
1076
. collect:: <Vec <_>>( ) ,
1024
1077
& * segment
1025
1078
. range( UserKey :: from( "b" ) ..=UserKey :: from( "d" ) )
1079
+ . unwrap( )
1026
1080
. rev( )
1027
1081
. flatten( )
1028
1082
. collect:: <Vec <_>>( ) ,
0 commit comments