Skip to content

Commit c09ffa3

Browse files
committed
Add prefix bloom filter support
1 parent 1486943 commit c09ffa3

File tree

20 files changed

+3389
-69
lines changed

20 files changed

+3389
-69
lines changed

benches/run_reader.rs

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
2+
use lsm_tree::prefix::FixedPrefixExtractor;
3+
use lsm_tree::{AbstractTree, Config};
4+
use std::sync::Arc;
5+
use std::time::Instant;
6+
use tempfile::TempDir;
7+
8+
fn create_tree_with_segments(
9+
segment_count: usize,
10+
with_prefix_extractor: bool,
11+
) -> (TempDir, lsm_tree::Tree) {
12+
let tempdir = tempfile::tempdir().unwrap();
13+
14+
let mut config = Config::new(&tempdir);
15+
if with_prefix_extractor {
16+
config = config.prefix_extractor(Arc::new(FixedPrefixExtractor::new(8)));
17+
}
18+
19+
let tree = config.open().unwrap();
20+
21+
// Create segments with distinct prefixes
22+
for segment_idx in 0..segment_count {
23+
let prefix = format!("seg{:04}", segment_idx);
24+
25+
// Add 100 keys per segment
26+
for key_idx in 0..100 {
27+
let key = format!("{}_{:04}", prefix, key_idx);
28+
tree.insert(key.as_bytes(), vec![0u8; 100], 0);
29+
}
30+
31+
// Flush to create a segment
32+
tree.flush_active_memtable(0).unwrap();
33+
}
34+
35+
(tempdir, tree)
36+
}
37+
38+
fn benchmark_range_query(c: &mut Criterion) {
39+
let mut group = c.benchmark_group("range_query");
40+
41+
// Test different segment counts
42+
for segment_count in [10, 100, 500, 1000] {
43+
// Benchmark without prefix extractor
44+
group.bench_with_input(
45+
BenchmarkId::new("no_prefix", segment_count),
46+
&segment_count,
47+
|b, &count| {
48+
let (_tempdir, tree) = create_tree_with_segments(count, false);
49+
50+
b.iter(|| {
51+
// Query for a range that doesn't exist
52+
let start: &[u8] = b"zzz_0000";
53+
let end: &[u8] = b"zzz_9999";
54+
let iter = tree.range(start..=end, 0, None);
55+
// Force evaluation by counting
56+
let count = iter.count();
57+
black_box(count);
58+
});
59+
},
60+
);
61+
62+
// Benchmark with prefix extractor
63+
group.bench_with_input(
64+
BenchmarkId::new("with_prefix", segment_count),
65+
&segment_count,
66+
|b, &count| {
67+
let (_tempdir, tree) = create_tree_with_segments(count, true);
68+
69+
b.iter(|| {
70+
// Query for a range that doesn't exist (will check filters)
71+
let start: &[u8] = b"zzz_0000";
72+
let end: &[u8] = b"zzz_9999";
73+
let iter = tree.range(start..=end, 0, None);
74+
// Force evaluation by counting
75+
let count = iter.count();
76+
black_box(count);
77+
});
78+
},
79+
);
80+
81+
// Benchmark with prefix extractor - existing prefix
82+
group.bench_with_input(
83+
BenchmarkId::new("with_prefix_exists", segment_count),
84+
&segment_count,
85+
|b, &count| {
86+
let (_tempdir, tree) = create_tree_with_segments(count, true);
87+
88+
b.iter(|| {
89+
// Query for a range that exists in the middle
90+
let mid = count / 2;
91+
let prefix = format!("seg{:04}", mid);
92+
let start_str = format!("{}_0000", prefix);
93+
let end_str = format!("{}_0099", prefix);
94+
let start: &[u8] = start_str.as_bytes();
95+
let end: &[u8] = end_str.as_bytes();
96+
let iter = tree.range(start..=end, 0, None);
97+
// Force evaluation by counting
98+
let count = iter.count();
99+
black_box(count);
100+
});
101+
},
102+
);
103+
}
104+
105+
group.finish();
106+
}
107+
108+
fn benchmark_timing_comparison(_c: &mut Criterion) {
109+
println!("\n=== RunReader Performance Benchmark ===");
110+
println!("Testing impact of prefix filter checks on large runs\n");
111+
112+
for segment_count in [100, 500, 1000] {
113+
println!("\n--- Testing with {} segments ---", segment_count);
114+
115+
// Test without prefix extractor
116+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
117+
118+
let start = Instant::now();
119+
for _ in 0..100 {
120+
let start_key: &[u8] = b"zzz_0000";
121+
let end_key: &[u8] = b"zzz_9999";
122+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
123+
let _ = iter.count();
124+
}
125+
let no_prefix_time = start.elapsed();
126+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
127+
128+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
129+
130+
// Test with prefix extractor
131+
let (_tempdir_with_prefix, tree_with_prefix) =
132+
create_tree_with_segments(segment_count, true);
133+
134+
let start = Instant::now();
135+
for _ in 0..100 {
136+
let start_key: &[u8] = b"zzz_0000";
137+
let end_key: &[u8] = b"zzz_9999";
138+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
139+
let _ = iter.count();
140+
}
141+
let with_prefix_time = start.elapsed();
142+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
143+
144+
println!(
145+
" With prefix extractor: {:>8} ns/query",
146+
avg_with_prefix
147+
);
148+
149+
if avg_with_prefix > avg_no_prefix {
150+
let overhead = avg_with_prefix - avg_no_prefix;
151+
println!(
152+
" Overhead: {} ns ({:.1}%)",
153+
overhead,
154+
(overhead as f64 / avg_no_prefix as f64) * 100.0
155+
);
156+
} else {
157+
let savings = avg_no_prefix - avg_with_prefix;
158+
println!(
159+
" Savings: {} ns ({:.1}%)",
160+
savings,
161+
(savings as f64 / avg_no_prefix as f64) * 100.0
162+
);
163+
}
164+
165+
// Check CPU cost per segment
166+
if segment_count > 0 {
167+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
168+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
169+
} else {
170+
0
171+
};
172+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
173+
}
174+
}
175+
176+
println!("\n=== Summary ===");
177+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
178+
println!(
179+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
180+
);
181+
}
182+
183+
fn run_timing_benchmark() {
184+
println!("\n=== RunReader Performance Benchmark ===");
185+
println!("Testing impact of prefix filter checks on large runs\n");
186+
187+
for segment_count in [100, 500, 1000] {
188+
println!("\n--- Testing with {} segments ---", segment_count);
189+
190+
// Test without prefix extractor
191+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
192+
193+
let start = Instant::now();
194+
for _ in 0..100 {
195+
let start_key: &[u8] = b"zzz_0000";
196+
let end_key: &[u8] = b"zzz_9999";
197+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
198+
let _ = iter.count();
199+
}
200+
let no_prefix_time = start.elapsed();
201+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
202+
203+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
204+
205+
// Test with prefix extractor
206+
let (_tempdir_with_prefix, tree_with_prefix) =
207+
create_tree_with_segments(segment_count, true);
208+
209+
let start = Instant::now();
210+
for _ in 0..100 {
211+
let start_key: &[u8] = b"zzz_0000";
212+
let end_key: &[u8] = b"zzz_9999";
213+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
214+
let _ = iter.count();
215+
}
216+
let with_prefix_time = start.elapsed();
217+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
218+
219+
println!(
220+
" With prefix extractor: {:>8} ns/query",
221+
avg_with_prefix
222+
);
223+
224+
if avg_with_prefix > avg_no_prefix {
225+
let overhead = avg_with_prefix - avg_no_prefix;
226+
println!(
227+
" Overhead: {} ns ({:.1}%)",
228+
overhead,
229+
(overhead as f64 / avg_no_prefix as f64) * 100.0
230+
);
231+
} else {
232+
let savings = avg_no_prefix - avg_with_prefix;
233+
println!(
234+
" Savings: {} ns ({:.1}%)",
235+
savings,
236+
(savings as f64 / avg_no_prefix as f64) * 100.0
237+
);
238+
}
239+
240+
// Check CPU cost per segment
241+
if segment_count > 0 {
242+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
243+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
244+
} else {
245+
0
246+
};
247+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
248+
}
249+
}
250+
251+
println!("\n=== Summary ===");
252+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
253+
println!(
254+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
255+
);
256+
}
257+
258+
fn benchmark_all(c: &mut Criterion) {
259+
// Run standard benchmarks
260+
benchmark_range_query(c);
261+
262+
// Run the detailed timing comparison
263+
run_timing_benchmark();
264+
}
265+
266+
criterion_group!(benches, benchmark_range_query);
267+
criterion_main!(benches);

src/abstract.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ pub trait AbstractTree {
8585
/// Will return `Err` if an IO error occurs.
8686
fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
8787

88-
/// Gets the memory usage of all pinned bloom filters in the tree.
89-
fn pinned_bloom_filter_size(&self) -> usize;
88+
/// Gets the memory usage of all pinned filters in the tree.
89+
fn pinned_filter_size(&self) -> usize;
9090

9191
/// Gets the memory usage of all pinned index blocks in the tree.
9292
fn pinned_block_index_size(&self) -> usize;

src/blob_tree/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -395,8 +395,8 @@ impl AbstractTree for BlobTree {
395395
}))
396396
}
397397

398-
fn pinned_bloom_filter_size(&self) -> usize {
399-
self.index.pinned_bloom_filter_size()
398+
fn pinned_filter_size(&self) -> usize {
399+
self.index.pinned_filter_size()
400400
}
401401

402402
fn pinned_block_index_size(&self) -> usize {

src/compaction/worker.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ fn merge_segments(
364364
opts.tree_id,
365365
opts.config.cache.clone(),
366366
opts.config.descriptor_table.clone(),
367+
opts.config.prefix_extractor.clone(),
367368
payload.dest_level <= 1, // TODO: look at configuration
368369
payload.dest_level <= 2, // TODO: look at configuration
369370
#[cfg(feature = "metrics")]

src/config.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
// This source code is licensed under both the Apache 2.0 and MIT License
33
// (found in the LICENSE-* files in the repository)
44

5-
use crate::{path::absolute_path, BlobTree, Cache, CompressionType, DescriptorTable, Tree};
5+
use crate::{
6+
path::absolute_path, prefix::SharedPrefixExtractor, BlobTree, Cache, CompressionType,
7+
DescriptorTable, Tree,
8+
};
69
use std::{
710
path::{Path, PathBuf},
811
sync::Arc,
@@ -94,6 +97,10 @@ pub struct Config {
9497
/// Descriptor table to use
9598
#[doc(hidden)]
9699
pub descriptor_table: Arc<DescriptorTable>,
100+
101+
/// Prefix extractor for filters
102+
#[doc(hidden)]
103+
pub prefix_extractor: Option<SharedPrefixExtractor>,
97104
}
98105

99106
impl Default for Config {
@@ -115,6 +122,7 @@ impl Default for Config {
115122
compression: CompressionType::None,
116123
blob_compression: CompressionType::None,
117124
bloom_bits_per_key: 10,
125+
prefix_extractor: None,
118126

119127
blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
120128
blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
@@ -312,6 +320,30 @@ impl Config {
312320
self
313321
}
314322

323+
/// Sets the prefix extractor for filters.
324+
///
325+
/// A prefix extractor allows filters to index prefixes of keys
326+
/// instead of (or in addition to) the full keys. This enables efficient
327+
/// filtering for prefix-based queries.
328+
///
329+
/// # Example
330+
///
331+
/// ```
332+
/// # use lsm_tree::Config;
333+
/// use lsm_tree::prefix::FixedPrefixExtractor;
334+
/// use std::sync::Arc;
335+
///
336+
/// # let path = tempfile::tempdir()?;
337+
/// let config = Config::new(path)
338+
/// .prefix_extractor(Arc::new(FixedPrefixExtractor::new(8)));
339+
/// # Ok::<(), Box<dyn std::error::Error>>(())
340+
/// ```
341+
#[must_use]
342+
pub fn prefix_extractor(mut self, extractor: SharedPrefixExtractor) -> Self {
343+
self.prefix_extractor = Some(extractor);
344+
self
345+
}
346+
315347
/// Opens a tree using the config.
316348
///
317349
/// # Errors

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ pub mod mvcc_stream;
182182

183183
mod path;
184184

185+
/// Prefix extraction for filters
186+
pub mod prefix;
187+
185188
#[doc(hidden)]
186189
pub mod range;
187190

0 commit comments

Comments
 (0)