Skip to content

Commit 18921a8

Browse files
committed
Add prefix bloom filter support
1 parent 1486943 commit 18921a8

File tree

17 files changed

+2881
-48
lines changed

17 files changed

+2881
-48
lines changed

src/compaction/worker.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -364,6 +364,7 @@ fn merge_segments(
364364
opts.tree_id,
365365
opts.config.cache.clone(),
366366
opts.config.descriptor_table.clone(),
367+
opts.config.prefix_extractor.clone(),
367368
payload.dest_level <= 1, // TODO: look at configuration
368369
payload.dest_level <= 2, // TODO: look at configuration
369370
#[cfg(feature = "metrics")]

src/config.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,10 @@
22
// This source code is licensed under both the Apache 2.0 and MIT License
33
// (found in the LICENSE-* files in the repository)
44

5-
use crate::{path::absolute_path, BlobTree, Cache, CompressionType, DescriptorTable, Tree};
5+
use crate::{
6+
path::absolute_path, prefix::SharedPrefixExtractor, BlobTree, Cache, CompressionType,
7+
DescriptorTable, Tree,
8+
};
69
use std::{
710
path::{Path, PathBuf},
811
sync::Arc,
@@ -94,6 +97,10 @@ pub struct Config {
9497
/// Descriptor table to use
9598
#[doc(hidden)]
9699
pub descriptor_table: Arc<DescriptorTable>,
100+
101+
/// Prefix extractor for bloom filters
102+
#[doc(hidden)]
103+
pub prefix_extractor: Option<SharedPrefixExtractor>,
97104
}
98105

99106
impl Default for Config {
@@ -115,6 +122,7 @@ impl Default for Config {
115122
compression: CompressionType::None,
116123
blob_compression: CompressionType::None,
117124
bloom_bits_per_key: 10,
125+
prefix_extractor: None,
118126

119127
blob_file_target_size: /* 64 MiB */ 64 * 1_024 * 1_024,
120128
blob_file_separation_threshold: /* 4 KiB */ 4 * 1_024,
@@ -312,6 +320,30 @@ impl Config {
312320
self
313321
}
314322

323+
/// Sets the prefix extractor for bloom filters.
324+
///
325+
/// A prefix extractor allows bloom filters to index prefixes of keys
326+
/// instead of (or in addition to) the full keys. This enables efficient
327+
/// filtering for prefix-based queries.
328+
///
329+
/// # Example
330+
///
331+
/// ```
332+
/// # use lsm_tree::Config;
333+
/// use lsm_tree::prefix::FixedPrefixExtractor;
334+
/// use std::sync::Arc;
335+
///
336+
/// # let path = tempfile::tempdir()?;
337+
/// let config = Config::new(path)
338+
/// .prefix_extractor(Arc::new(FixedPrefixExtractor::new(8)));
339+
/// # Ok::<(), Box<dyn std::error::Error>>(())
340+
/// ```
341+
#[must_use]
342+
pub fn prefix_extractor(mut self, extractor: SharedPrefixExtractor) -> Self {
343+
self.prefix_extractor = Some(extractor);
344+
self
345+
}
346+
315347
/// Opens a tree using the config.
316348
///
317349
/// # Errors

src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ pub mod mvcc_stream;
182182

183183
mod path;
184184

185+
/// Prefix extraction for bloom filters
186+
pub mod prefix;
187+
185188
#[doc(hidden)]
186189
pub mod range;
187190

src/metrics.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,14 @@ impl Metrics {
4545
let hits = self.bloom_filter_hits.load(Relaxed) as f64;
4646
hits / queries
4747
}
48+
49+
/// Number of bloom filter queries performed.
50+
pub fn bloom_filter_queries(&self) -> usize {
51+
self.bloom_filter_queries.load(Relaxed)
52+
}
53+
54+
/// Number of bloom filter hits (queries that avoided disk I/O).
55+
pub fn bloom_filter_hits(&self) -> usize {
56+
self.bloom_filter_hits.load(Relaxed)
57+
}
4858
}

src/multi_reader.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,9 @@ mod tests {
8989
let mut readers: VecDeque<_> = VecDeque::new();
9090

9191
for segment in &segments {
92-
readers.push_back(segment.iter());
92+
if let Some(iter) = segment.iter() {
93+
readers.push_back(iter);
94+
}
9395
}
9496

9597
let multi_reader = MultiReader::new(readers);
@@ -115,7 +117,9 @@ mod tests {
115117
let mut readers: VecDeque<_> = VecDeque::new();
116118

117119
for segment in &segments {
118-
readers.push_back(segment.iter());
120+
if let Some(iter) = segment.iter() {
121+
readers.push_back(iter);
122+
}
119123
}
120124

121125
let multi_reader = MultiReader::new(readers);
@@ -141,7 +145,9 @@ mod tests {
141145
let mut readers: VecDeque<_> = VecDeque::new();
142146

143147
for segment in &segments {
144-
readers.push_back(segment.iter());
148+
if let Some(iter) = segment.iter() {
149+
readers.push_back(iter);
150+
}
145151
}
146152

147153
let multi_reader = MultiReader::new(readers);

0 commit comments

Comments
 (0)