From 864fb84300e369e00aae22af2f12da2b6ca5b48b Mon Sep 17 00:00:00 2001 From: zaidoon Date: Sun, 28 Sep 2025 22:30:24 -0400 Subject: [PATCH 1/4] add weak tombstone tracking and reclaimable value metrics --- src/abstract.rs | 6 ++++ src/blob_tree/mod.rs | 8 ++++++ src/compaction/leveled.rs | 4 +++ src/segment/meta.rs | 6 ++++ src/segment/mod.rs | 14 ++++++++++ src/segment/writer/meta.rs | 8 ++++++ src/segment/writer/mod.rs | 47 ++++++++++++++++++++++++++------ src/tree/mod.rs | 24 ++++++++++++++++ tests/segment_weak_tombstones.rs | 38 ++++++++++++++++++++++++++ 9 files changed, 146 insertions(+), 9 deletions(-) create mode 100644 tests/segment_weak_tombstones.rs diff --git a/src/abstract.rs b/src/abstract.rs index 1954e657..c735f454 100644 --- a/src/abstract.rs +++ b/src/abstract.rs @@ -74,6 +74,12 @@ pub trait AbstractTree { /// Returns the approximate number of tombstones in the tree. fn tombstone_count(&self) -> u64; + /// Returns the approximate number of weak tombstones (single deletes) in the tree. + fn weak_tombstone_count(&self) -> u64; + + /// Returns the approximate number of values reclaimable once weak tombstones can be GC'd. + fn weak_tombstone_reclaimable_count(&self) -> u64; + // TODO: clear() with Nuke compaction strategy (write lock) -> drop_range(..) /// Drops segments that are fully contained in a given range. diff --git a/src/blob_tree/mod.rs b/src/blob_tree/mod.rs index 8c0db395..9a417895 100644 --- a/src/blob_tree/mod.rs +++ b/src/blob_tree/mod.rs @@ -432,6 +432,14 @@ impl AbstractTree for BlobTree { self.index.tombstone_count() } + fn weak_tombstone_count(&self) -> u64 { + self.index.weak_tombstone_count() + } + + fn weak_tombstone_reclaimable_count(&self) -> u64 { + self.index.weak_tombstone_reclaimable_count() + } + fn drop_range, R: RangeBounds>(&self, range: R) -> crate::Result<()> { self.index.drop_range(range) } diff --git a/src/compaction/leveled.rs b/src/compaction/leveled.rs index ae50df5a..640a65ed 100644 --- a/src/compaction/leveled.rs +++ b/src/compaction/leveled.rs @@ -239,6 +239,10 @@ impl CompactionStrategy for Strategy { let mut scores = [(/* score */ 0.0, /* overshoot */ 0u64); 7]; { + // TODO(weak-tombstone-rewrite): incorporate `Segment::weak_tombstone_count` and + // `Segment::weak_tombstone_reclaimable` when computing level scores so rewrite + // decisions can prioritize segments that would free the most reclaimable values. + // Score first level // NOTE: We always have at least one level diff --git a/src/segment/meta.rs b/src/segment/meta.rs index 4163c3a2..25851db5 100644 --- a/src/segment/meta.rs +++ b/src/segment/meta.rs @@ -44,6 +44,8 @@ pub struct ParsedMeta { pub file_size: u64, pub item_count: u64, pub tombstone_count: u64, + pub weak_tombstone_count: u64, + pub weak_tombstone_reclaimable: u64, pub data_block_compression: CompressionType, pub index_block_compression: CompressionType, @@ -125,6 +127,8 @@ impl ParsedMeta { let data_block_count = read_u64!(block, b"#data_block_count"); let index_block_count = read_u64!(block, b"#index_block_count"); let file_size = read_u64!(block, b"#size"); // TODO: rename file_size + let weak_tombstone_count = read_u64!(block, b"#weak_tombstone_count"); + let weak_tombstone_reclaimable = read_u64!(block, b"#weak_tombstone_reclaimable"); let created_at = { let bytes = block @@ -196,6 +200,8 @@ impl ParsedMeta { file_size, item_count, tombstone_count, + weak_tombstone_count, + weak_tombstone_reclaimable, data_block_compression, index_block_compression, }) diff --git a/src/segment/mod.rs b/src/segment/mod.rs index 361e9127..aeb02639 100644 --- a/src/segment/mod.rs +++ b/src/segment/mod.rs @@ -505,6 +505,20 @@ impl Segment { self.metadata.tombstone_count } + /// Returns the number of weak (single delete) tombstones in the `Segment`. + #[must_use] + #[doc(hidden)] + pub fn weak_tombstone_count(&self) -> u64 { + self.metadata.weak_tombstone_count + } + + /// Returns the number of value entries reclaimable once weak tombstones can be GC'd. + #[must_use] + #[doc(hidden)] + pub fn weak_tombstone_reclaimable(&self) -> u64 { + self.metadata.weak_tombstone_reclaimable + } + /// Returns the ratio of tombstone markers in the `Segment`. #[must_use] #[doc(hidden)] diff --git a/src/segment/writer/meta.rs b/src/segment/writer/meta.rs index 11ac694b..4e616526 100644 --- a/src/segment/writer/meta.rs +++ b/src/segment/writer/meta.rs @@ -14,6 +14,12 @@ pub struct Metadata { /// Tombstone count pub tombstone_count: usize, + /// Weak tombstone (single delete) count + pub weak_tombstone_count: usize, + + /// Weak tombstone + value pairs that become reclaimable when GC watermark advances + pub weak_tombstone_reclaimable_count: usize, + // TODO: 3.0.0 - https://github.com/fjall-rs/lsm-tree/issues/101 /// Written key count (unique keys) pub key_count: usize, @@ -44,6 +50,8 @@ impl Default for Metadata { item_count: 0, tombstone_count: 0, + weak_tombstone_count: 0, + weak_tombstone_reclaimable_count: 0, key_count: 0, file_pos: BlockOffset(0), uncompressed_size: 0, diff --git a/src/segment/writer/mod.rs b/src/segment/writer/mod.rs index 68fb420b..2fbaff51 100644 --- a/src/segment/writer/mod.rs +++ b/src/segment/writer/mod.rs @@ -7,7 +7,7 @@ use super::{ }; use crate::{ coding::Encode, file::fsync_directory, segment::filter::standard_bloom::Builder, - time::unix_timestamp, CompressionType, InternalValue, SegmentId, UserKey, + time::unix_timestamp, CompressionType, InternalValue, SegmentId, UserKey, ValueType, }; use index::{BlockIndexWriter, FullIndexWriter}; use std::{fs::File, io::BufWriter, path::PathBuf}; @@ -61,6 +61,9 @@ pub struct Writer { /// /// using enhanced double hashing, so we got two u64s pub bloom_hash_buffer: Vec, + + /// Tracks the previously written item to detect weak tombstone/value pairs + previous_item: Option<(UserKey, ValueType)>, } impl Writer { @@ -103,6 +106,8 @@ impl Writer { bloom_policy: BloomConstructionPolicy::default(), bloom_hash_buffer: Vec::new(), + + previous_item: None, }) } @@ -171,33 +176,49 @@ impl Writer { /// sorted as described by the [`UserKey`], otherwise the block layout will /// be non-sense. pub fn write(&mut self, item: InternalValue) -> crate::Result<()> { + let value_type = item.key.value_type; + let seqno = item.key.seqno; + let user_key = item.key.user_key.clone(); + let value_len = item.value.len(); + if item.is_tombstone() { self.meta.tombstone_count += 1; } + if value_type == ValueType::WeakTombstone { + self.meta.weak_tombstone_count += 1; + } + + if value_type == ValueType::Value { + if let Some((prev_key, prev_type)) = &self.previous_item { + if prev_type == &ValueType::WeakTombstone && prev_key.as_ref() == user_key.as_ref() + { + self.meta.weak_tombstone_reclaimable_count += 1; + } + } + } + // NOTE: Check if we visit a new key - if Some(&item.key.user_key) != self.current_key.as_ref() { + if Some(&user_key) != self.current_key.as_ref() { self.meta.key_count += 1; - self.current_key = Some(item.key.user_key.clone()); + self.current_key = Some(user_key.clone()); // IMPORTANT: Do not buffer *every* item's key // because there may be multiple versions // of the same key if self.bloom_policy.is_active() { - self.bloom_hash_buffer - .push(Builder::get_hash(&item.key.user_key)); + self.bloom_hash_buffer.push(Builder::get_hash(&user_key)); } } - let seqno = item.key.seqno; - if self.meta.first_key.is_none() { - self.meta.first_key = Some(item.key.user_key.clone()); + self.meta.first_key = Some(user_key.clone()); } - self.chunk_size += item.key.user_key.len() + item.value.len(); + self.chunk_size += user_key.len() + value_len; self.chunk.push(item); + self.previous_item = Some((user_key, value_type)); if self.chunk_size >= self.data_block_size as usize { self.spill_block()?; @@ -403,6 +424,14 @@ impl Writer { "#user_data_size", &self.meta.uncompressed_size.to_le_bytes(), ), + meta( + "#weak_tombstone_count", + &(self.meta.weak_tombstone_count as u64).to_le_bytes(), + ), + meta( + "#weak_tombstone_reclaimable", + &(self.meta.weak_tombstone_reclaimable_count as u64).to_le_bytes(), + ), meta("v#lsmt", env!("CARGO_PKG_VERSION").as_bytes()), meta("v#table_version", &[3u8]), // TODO: tli_handle_count diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 6b01ad5c..436be3e4 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -120,6 +120,30 @@ impl AbstractTree for Tree { .sum() } + /// Returns the number of weak tombstones (single deletes) in the tree. + #[must_use] + fn weak_tombstone_count(&self) -> u64 { + self.manifest + .read() + .expect("lock is poisoned") + .current_version() + .iter_segments() + .map(Segment::weak_tombstone_count) + .sum() + } + + /// Returns the number of value entries that become reclaimable once weak tombstones can be GC'd. + #[must_use] + fn weak_tombstone_reclaimable_count(&self) -> u64 { + self.manifest + .read() + .expect("lock is poisoned") + .current_version() + .iter_segments() + .map(Segment::weak_tombstone_reclaimable) + .sum() + } + fn ingest( &self, iter: impl Iterator, diff --git a/tests/segment_weak_tombstones.rs b/tests/segment_weak_tombstones.rs new file mode 100644 index 00000000..6948cccf --- /dev/null +++ b/tests/segment_weak_tombstones.rs @@ -0,0 +1,38 @@ +use lsm_tree::{AbstractTree, Config}; + +#[test] +fn weak_tombstone_counts_single_pair() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + let tree = Config::new(folder.path()).open()?; + + tree.insert(b"a", b"old", 1); + tree.remove_weak(b"a", 2); + tree.flush_active_memtable(0)?; + + assert_eq!(tree.weak_tombstone_count(), 1); + assert_eq!(tree.weak_tombstone_reclaimable_count(), 1); + + Ok(()) +} + +#[test] +fn weak_tombstone_counts_multiple_keys() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + let tree = Config::new(folder.path()).open()?; + + tree.insert(b"a", b"old", 10); + tree.remove_weak(b"a", 11); + + tree.remove_weak(b"b", 12); + + tree.insert(b"c", b"old", 13); + tree.insert(b"c", b"new", 14); + tree.remove_weak(b"c", 15); + + tree.flush_active_memtable(0)?; + + assert_eq!(tree.weak_tombstone_count(), 3); + assert_eq!(tree.weak_tombstone_reclaimable_count(), 2); + + Ok(()) +} From 3d606aa2ae44db0392336d86b78a664f62dc4b25 Mon Sep 17 00:00:00 2001 From: Marvin <33938500+marvin-j97@users.noreply.github.com> Date: Sat, 4 Oct 2025 22:17:39 +0200 Subject: [PATCH 2/4] Update segment_weak_tombstones.rs --- tests/segment_weak_tombstones.rs | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/segment_weak_tombstones.rs b/tests/segment_weak_tombstones.rs index 6948cccf..130b68e3 100644 --- a/tests/segment_weak_tombstones.rs +++ b/tests/segment_weak_tombstones.rs @@ -36,3 +36,22 @@ fn weak_tombstone_counts_multiple_keys() -> lsm_tree::Result<()> { Ok(()) } + +#[test] +fn weak_tombstone_counts_multiple_weak() -> lsm_tree::Result<()> { + let folder = tempfile::tempdir()?; + let tree = Config::new(folder.path()).open()?; + + tree.insert(b"a", b"old", 10); + tree.remove_weak(b"a", 11); + tree.remove_weak(b"a", 12); + tree.remove_weak(b"a", 13); + tree.remove_weak(b"a", 14); + + tree.flush_active_memtable(0)?; + + assert_eq!(tree.weak_tombstone_count(), 4); + assert_eq!(tree.weak_tombstone_reclaimable_count(), 1); // a:10 is paired with a:11 + + Ok(()) +} From 4ea2daf30aed46e665c2f4f87af103bf5de8f840 Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sun, 12 Oct 2025 16:29:15 +0200 Subject: [PATCH 3/4] fmt --- src/segment/writer/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/segment/writer/mod.rs b/src/segment/writer/mod.rs index 01d2648e..479808fb 100644 --- a/src/segment/writer/mod.rs +++ b/src/segment/writer/mod.rs @@ -7,8 +7,8 @@ use super::{ }; use crate::{ coding::Encode, file::fsync_directory, segment::filter::standard_bloom::Builder, - time::unix_timestamp, CompressionType, InternalValue, SegmentId, UserKey, ValueType, - vlog::BlobFileId, + time::unix_timestamp, vlog::BlobFileId, CompressionType, InternalValue, SegmentId, UserKey, + ValueType, }; use index::{BlockIndexWriter, FullIndexWriter}; use std::{fs::File, io::BufWriter, path::PathBuf}; From dea3753e4dff9036936689944e643da923825f9e Mon Sep 17 00:00:00 2001 From: marvin-j97 Date: Sun, 12 Oct 2025 16:33:12 +0200 Subject: [PATCH 4/4] fix --- src/tree/mod.rs | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/tree/mod.rs b/src/tree/mod.rs index 8e85fc43..48572030 100644 --- a/src/tree/mod.rs +++ b/src/tree/mod.rs @@ -161,7 +161,7 @@ impl AbstractTree for Tree { ) } - // TODO: doctest + /// Returns the number of tombstones in the tree. fn tombstone_count(&self) -> u64 { self.current_version() .iter_segments() @@ -170,24 +170,16 @@ impl AbstractTree for Tree { } /// Returns the number of weak tombstones (single deletes) in the tree. - #[must_use] fn weak_tombstone_count(&self) -> u64 { - self.manifest - .read() - .expect("lock is poisoned") - .current_version() + self.current_version() .iter_segments() .map(Segment::weak_tombstone_count) .sum() } /// Returns the number of value entries that become reclaimable once weak tombstones can be GC'd. - #[must_use] fn weak_tombstone_reclaimable_count(&self) -> u64 { - self.manifest - .read() - .expect("lock is poisoned") - .current_version() + self.current_version() .iter_segments() .map(Segment::weak_tombstone_reclaimable) .sum()