Skip to content

Commit b09d983

Browse files
committed
Add prefix bloom filter support
1 parent 1486943 commit b09d983

File tree

21 files changed

+3499
-69
lines changed

21 files changed

+3499
-69
lines changed

benches/run_reader.rs

Lines changed: 267 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,267 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
2+
use lsm_tree::prefix::FixedPrefixExtractor;
3+
use lsm_tree::{AbstractTree, Config};
4+
use std::sync::Arc;
5+
use std::time::Instant;
6+
use tempfile::TempDir;
7+
8+
fn create_tree_with_segments(
9+
segment_count: usize,
10+
with_prefix_extractor: bool,
11+
) -> (TempDir, lsm_tree::Tree) {
12+
let tempdir = tempfile::tempdir().unwrap();
13+
14+
let mut config = Config::new(&tempdir);
15+
if with_prefix_extractor {
16+
config = config.prefix_extractor(Arc::new(FixedPrefixExtractor::new(8)));
17+
}
18+
19+
let tree = config.open().unwrap();
20+
21+
// Create segments with distinct prefixes
22+
for segment_idx in 0..segment_count {
23+
let prefix = format!("seg{:04}", segment_idx);
24+
25+
// Add 100 keys per segment
26+
for key_idx in 0..100 {
27+
let key = format!("{}_{:04}", prefix, key_idx);
28+
tree.insert(key.as_bytes(), vec![0u8; 100], 0);
29+
}
30+
31+
// Flush to create a segment
32+
tree.flush_active_memtable(0).unwrap();
33+
}
34+
35+
(tempdir, tree)
36+
}
37+
38+
fn benchmark_range_query(c: &mut Criterion) {
39+
let mut group = c.benchmark_group("range_query");
40+
41+
// Test different segment counts
42+
for segment_count in [10, 100, 500, 1000] {
43+
// Benchmark without prefix extractor
44+
group.bench_with_input(
45+
BenchmarkId::new("no_prefix", segment_count),
46+
&segment_count,
47+
|b, &count| {
48+
let (_tempdir, tree) = create_tree_with_segments(count, false);
49+
50+
b.iter(|| {
51+
// Query for a range that doesn't exist
52+
let start: &[u8] = b"zzz_0000";
53+
let end: &[u8] = b"zzz_9999";
54+
let iter = tree.range(start..=end, 0, None);
55+
// Force evaluation by counting
56+
let count = iter.count();
57+
black_box(count);
58+
});
59+
},
60+
);
61+
62+
// Benchmark with prefix extractor
63+
group.bench_with_input(
64+
BenchmarkId::new("with_prefix", segment_count),
65+
&segment_count,
66+
|b, &count| {
67+
let (_tempdir, tree) = create_tree_with_segments(count, true);
68+
69+
b.iter(|| {
70+
// Query for a range that doesn't exist (will check filters)
71+
let start: &[u8] = b"zzz_0000";
72+
let end: &[u8] = b"zzz_9999";
73+
let iter = tree.range(start..=end, 0, None);
74+
// Force evaluation by counting
75+
let count = iter.count();
76+
black_box(count);
77+
});
78+
},
79+
);
80+
81+
// Benchmark with prefix extractor - existing prefix
82+
group.bench_with_input(
83+
BenchmarkId::new("with_prefix_exists", segment_count),
84+
&segment_count,
85+
|b, &count| {
86+
let (_tempdir, tree) = create_tree_with_segments(count, true);
87+
88+
b.iter(|| {
89+
// Query for a range that exists in the middle
90+
let mid = count / 2;
91+
let prefix = format!("seg{:04}", mid);
92+
let start_str = format!("{}_0000", prefix);
93+
let end_str = format!("{}_0099", prefix);
94+
let start: &[u8] = start_str.as_bytes();
95+
let end: &[u8] = end_str.as_bytes();
96+
let iter = tree.range(start..=end, 0, None);
97+
// Force evaluation by counting
98+
let count = iter.count();
99+
black_box(count);
100+
});
101+
},
102+
);
103+
}
104+
105+
group.finish();
106+
}
107+
108+
fn benchmark_timing_comparison(_c: &mut Criterion) {
109+
println!("\n=== RunReader Performance Benchmark ===");
110+
println!("Testing impact of prefix filter checks on large runs\n");
111+
112+
for segment_count in [100, 500, 1000] {
113+
println!("\n--- Testing with {} segments ---", segment_count);
114+
115+
// Test without prefix extractor
116+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
117+
118+
let start = Instant::now();
119+
for _ in 0..100 {
120+
let start_key: &[u8] = b"zzz_0000";
121+
let end_key: &[u8] = b"zzz_9999";
122+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
123+
let _ = iter.count();
124+
}
125+
let no_prefix_time = start.elapsed();
126+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
127+
128+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
129+
130+
// Test with prefix extractor
131+
let (_tempdir_with_prefix, tree_with_prefix) =
132+
create_tree_with_segments(segment_count, true);
133+
134+
let start = Instant::now();
135+
for _ in 0..100 {
136+
let start_key: &[u8] = b"zzz_0000";
137+
let end_key: &[u8] = b"zzz_9999";
138+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
139+
let _ = iter.count();
140+
}
141+
let with_prefix_time = start.elapsed();
142+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
143+
144+
println!(
145+
" With prefix extractor: {:>8} ns/query",
146+
avg_with_prefix
147+
);
148+
149+
if avg_with_prefix > avg_no_prefix {
150+
let overhead = avg_with_prefix - avg_no_prefix;
151+
println!(
152+
" Overhead: {} ns ({:.1}%)",
153+
overhead,
154+
(overhead as f64 / avg_no_prefix as f64) * 100.0
155+
);
156+
} else {
157+
let savings = avg_no_prefix - avg_with_prefix;
158+
println!(
159+
" Savings: {} ns ({:.1}%)",
160+
savings,
161+
(savings as f64 / avg_no_prefix as f64) * 100.0
162+
);
163+
}
164+
165+
// Check CPU cost per segment
166+
if segment_count > 0 {
167+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
168+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
169+
} else {
170+
0
171+
};
172+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
173+
}
174+
}
175+
176+
println!("\n=== Summary ===");
177+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
178+
println!(
179+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
180+
);
181+
}
182+
183+
fn run_timing_benchmark() {
184+
println!("\n=== RunReader Performance Benchmark ===");
185+
println!("Testing impact of prefix filter checks on large runs\n");
186+
187+
for segment_count in [100, 500, 1000] {
188+
println!("\n--- Testing with {} segments ---", segment_count);
189+
190+
// Test without prefix extractor
191+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
192+
193+
let start = Instant::now();
194+
for _ in 0..100 {
195+
let start_key: &[u8] = b"zzz_0000";
196+
let end_key: &[u8] = b"zzz_9999";
197+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
198+
let _ = iter.count();
199+
}
200+
let no_prefix_time = start.elapsed();
201+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
202+
203+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
204+
205+
// Test with prefix extractor
206+
let (_tempdir_with_prefix, tree_with_prefix) =
207+
create_tree_with_segments(segment_count, true);
208+
209+
let start = Instant::now();
210+
for _ in 0..100 {
211+
let start_key: &[u8] = b"zzz_0000";
212+
let end_key: &[u8] = b"zzz_9999";
213+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
214+
let _ = iter.count();
215+
}
216+
let with_prefix_time = start.elapsed();
217+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
218+
219+
println!(
220+
" With prefix extractor: {:>8} ns/query",
221+
avg_with_prefix
222+
);
223+
224+
if avg_with_prefix > avg_no_prefix {
225+
let overhead = avg_with_prefix - avg_no_prefix;
226+
println!(
227+
" Overhead: {} ns ({:.1}%)",
228+
overhead,
229+
(overhead as f64 / avg_no_prefix as f64) * 100.0
230+
);
231+
} else {
232+
let savings = avg_no_prefix - avg_with_prefix;
233+
println!(
234+
" Savings: {} ns ({:.1}%)",
235+
savings,
236+
(savings as f64 / avg_no_prefix as f64) * 100.0
237+
);
238+
}
239+
240+
// Check CPU cost per segment
241+
if segment_count > 0 {
242+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
243+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
244+
} else {
245+
0
246+
};
247+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
248+
}
249+
}
250+
251+
println!("\n=== Summary ===");
252+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
253+
println!(
254+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
255+
);
256+
}
257+
258+
fn benchmark_all(c: &mut Criterion) {
259+
// Run standard benchmarks
260+
benchmark_range_query(c);
261+
262+
// Run the detailed timing comparison
263+
run_timing_benchmark();
264+
}
265+
266+
criterion_group!(benches, benchmark_range_query);
267+
criterion_main!(benches);

examples/bench_run_reader.rs

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
use lsm_tree::prefix::FixedPrefixExtractor;
2+
use lsm_tree::{AbstractTree, Config};
3+
use std::sync::Arc;
4+
use std::time::Instant;
5+
use tempfile::TempDir;
6+
7+
fn create_tree_with_segments(
8+
segment_count: usize,
9+
with_prefix_extractor: bool,
10+
) -> (TempDir, lsm_tree::Tree) {
11+
let tempdir = tempfile::tempdir().unwrap();
12+
13+
let mut config = Config::new(&tempdir);
14+
if with_prefix_extractor {
15+
config = config.prefix_extractor(Arc::new(FixedPrefixExtractor::new(8)));
16+
}
17+
18+
let tree = config.open().unwrap();
19+
20+
// Create segments with distinct prefixes
21+
for segment_idx in 0..segment_count {
22+
let prefix = format!("seg{:04}", segment_idx);
23+
24+
// Add 100 keys per segment
25+
for key_idx in 0..100 {
26+
let key = format!("{}_{:04}", prefix, key_idx);
27+
tree.insert(key.as_bytes(), vec![0u8; 100], 0);
28+
}
29+
30+
// Flush to create a segment
31+
tree.flush_active_memtable(0).unwrap();
32+
}
33+
34+
(tempdir, tree)
35+
}
36+
37+
fn main() {
38+
println!("\n=== RunReader Performance Benchmark ===");
39+
println!("Testing impact of prefix bloom filter checks on large runs\n");
40+
41+
for segment_count in [100, 500, 1000] {
42+
println!("\n--- Testing with {} segments ---", segment_count);
43+
44+
// Test without prefix extractor
45+
let (_tempdir_no_prefix, tree_no_prefix) = create_tree_with_segments(segment_count, false);
46+
47+
let start = Instant::now();
48+
for _ in 0..100 {
49+
let start_key: &[u8] = b"zzz_0000";
50+
let end_key: &[u8] = b"zzz_9999";
51+
let iter = tree_no_prefix.range(start_key..=end_key, 0, None);
52+
let _ = iter.count();
53+
}
54+
let no_prefix_time = start.elapsed();
55+
let avg_no_prefix = no_prefix_time.as_nanos() / 100;
56+
57+
println!(" Without prefix extractor: {:>8} ns/query", avg_no_prefix);
58+
59+
// Test with prefix extractor
60+
let (_tempdir_with_prefix, tree_with_prefix) =
61+
create_tree_with_segments(segment_count, true);
62+
63+
let start = Instant::now();
64+
for _ in 0..100 {
65+
let start_key: &[u8] = b"zzz_0000";
66+
let end_key: &[u8] = b"zzz_9999";
67+
let iter = tree_with_prefix.range(start_key..=end_key, 0, None);
68+
let _ = iter.count();
69+
}
70+
let with_prefix_time = start.elapsed();
71+
let avg_with_prefix = with_prefix_time.as_nanos() / 100;
72+
73+
println!(
74+
" With prefix extractor: {:>8} ns/query",
75+
avg_with_prefix
76+
);
77+
78+
if avg_with_prefix > avg_no_prefix {
79+
let overhead = avg_with_prefix - avg_no_prefix;
80+
println!(
81+
" Overhead: {} ns ({:.1}%)",
82+
overhead,
83+
(overhead as f64 / avg_no_prefix as f64) * 100.0
84+
);
85+
} else {
86+
let savings = avg_no_prefix - avg_with_prefix;
87+
println!(
88+
" Savings: {} ns ({:.1}%)",
89+
savings,
90+
(savings as f64 / avg_no_prefix as f64) * 100.0
91+
);
92+
}
93+
94+
// Check CPU cost per segment
95+
if segment_count > 0 {
96+
let per_segment_overhead = if avg_with_prefix > avg_no_prefix {
97+
(avg_with_prefix - avg_no_prefix) / segment_count as u128
98+
} else {
99+
0
100+
};
101+
println!(" Per-segment overhead: ~{} ns", per_segment_overhead);
102+
}
103+
}
104+
105+
println!("\n=== Summary ===");
106+
println!("MAX_UPFRONT_CHECKS optimization limits overhead to checking at most 10 segments.");
107+
println!(
108+
"For runs with >10 segments, remaining segments are filtered lazily during iteration.\n"
109+
);
110+
}

src/abstract.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ pub trait AbstractTree {
8585
/// Will return `Err` if an IO error occurs.
8686
fn major_compact(&self, target_size: u64, seqno_threshold: SeqNo) -> crate::Result<()>;
8787

88-
/// Gets the memory usage of all pinned bloom filters in the tree.
89-
fn pinned_bloom_filter_size(&self) -> usize;
88+
/// Gets the memory usage of all pinned filters in the tree.
89+
fn pinned_filter_size(&self) -> usize;
9090

9191
/// Gets the memory usage of all pinned index blocks in the tree.
9292
fn pinned_block_index_size(&self) -> usize;

0 commit comments

Comments
 (0)