From d9d55e0e86cf22b04913909c1e9a89fbc44bb2e6 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 15:02:29 +0200 Subject: [PATCH 1/9] init key-addr-cache --- src/btreemap.rs | 2 + src/btreemap/cache.rs | 466 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 468 insertions(+) create mode 100644 src/btreemap/cache.rs diff --git a/src/btreemap.rs b/src/btreemap.rs index e72567e4..866fc40a 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -49,8 +49,10 @@ //! ---------------------------------------- //! ``` mod allocator; +mod cache; mod iter; mod node; + use crate::btreemap::iter::{IterInternal, KeysIter, ValuesIter}; use crate::{ storable::Bound as StorableBound, diff --git a/src/btreemap/cache.rs b/src/btreemap/cache.rs new file mode 100644 index 00000000..f99626e9 --- /dev/null +++ b/src/btreemap/cache.rs @@ -0,0 +1,466 @@ +use std::collections::BTreeMap; + +#[allow(non_upper_case_globals)] +const KiB: usize = 1024; +#[allow(non_upper_case_globals)] +const MiB: usize = 1024 * KiB; +#[allow(non_upper_case_globals)] +const GiB: usize = 1024 * MiB; + +const DEFAULT_CAPACITY: usize = 0; +const DEFAULT_SIZE_LIMIT: usize = 3 * GiB; + +pub trait ByteSize { + /// Returns the size (in bytes) of the value. + fn byte_size(&self) -> usize { + std::mem::size_of_val(self) + } +} + +/// Incrementing counter used for tracking the order of usage. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Default)] +struct Counter(u64); + +/// Cache with eviction policy that minimizes duplication of keys and values. +#[derive(Debug, Default, Clone)] +pub struct Cache +where + K: Clone + Ord + ByteSize, + V: Clone + ByteSize, +{ + key_to_value: BTreeMap, + capacity: usize, + /// Tracks the cumulative bytes for all entries (including duplicated key storage). + size: usize, + size_limit: usize, + counter: Counter, + lru_order: BTreeMap, + usage: BTreeMap, + stats: CacheStats, +} + +impl Cache +where + K: Clone + Ord + ByteSize, + V: Clone + ByteSize, +{ + /// Computes the total overhead of an entry: + /// - 3 * key_size (key_to_value, lru_order, usage) + /// - value_size (key_to_value) + /// - 2 * size_of(Counter) (for the LRU order and usage tracking) + fn entry_overhead(key_size: usize, value_size: usize) -> usize { + 3 * key_size + value_size + 2 * std::mem::size_of::() + } + + /// Creates a new cache with the given capacity. + pub fn new() -> Self { + Self { + key_to_value: BTreeMap::new(), + capacity: DEFAULT_CAPACITY, + size: 0, + size_limit: DEFAULT_SIZE_LIMIT, + counter: Counter(0), + lru_order: BTreeMap::new(), + usage: BTreeMap::new(), + stats: CacheStats::default(), + } + } + + /// Creates a new cache with the given capacity and size limit. + pub fn with_capacity(self, capacity: usize) -> Self { + let mut this = self.clone(); + this.capacity = capacity; + this + } + + /// Creates a new cache with the given size limit. + pub fn with_size_limit(self, size_limit: usize) -> Self { + let mut this = self.clone(); + this.size_limit = size_limit; + this + } + + /// Clears all entries and resets statistics. + pub fn clear(&mut self) { + self.key_to_value.clear(); + self.size = 0; + self.counter = Counter(0); + self.lru_order.clear(); + self.usage.clear(); + self.stats = CacheStats::default(); + } + + /// Retrieves the value for the given key (if any) and updates its LRU status. + pub fn get(&mut self, key: &K) -> Option { + if self.capacity == 0 || self.size_limit == 0 { + return None; + } + if let Some(value) = self.key_to_value.get(key).cloned() { + self.touch(key.clone()); + self.stats.hits += 1; + return Some(value); + } + self.stats.misses += 1; + None + } + + /// Inserts the given key and value. + /// If adding this entry would exceed the capacity or size limit, evicts LRU entries. + pub fn insert(&mut self, key: K, value: V) { + if self.capacity == 0 || self.size_limit == 0 { + return; + } + let (key_size, value_size) = (key.byte_size(), value.byte_size()); + let overhead = Self::entry_overhead(key_size, value_size); + while self.len() + 1 > self.capacity() || self.size + overhead > self.size_limit() { + self.evict_one(); + } + if self.key_to_value.insert(key.clone(), value).is_none() { + self.size = self.size.saturating_add(key_size + value_size); + } + self.touch(key); + } + + /// Removes the entry associated with the given key. + pub fn remove(&mut self, key: &K) { + if self.capacity == 0 || self.size_limit == 0 { + return; + } + if let Some(value) = self.key_to_value.remove(key) { + let (key_size, value_size) = (key.byte_size(), value.byte_size()); + let overhead = Self::entry_overhead(key_size, value_size); + self.size = self.size.saturating_sub(overhead); + } + if let Some(counter) = self.usage.remove(key) { + self.lru_order.remove(&counter); + } + } + + /// Returns the number of entries in the cache. + #[inline] + pub fn len(&self) -> usize { + self.key_to_value.len() + } + + /// Returns the total size in bytes of all entries (including duplicate key storage). + #[inline] + pub fn size(&self) -> usize { + self.size + } + + /// Returns the configured size limit (in bytes). + #[inline] + pub fn size_limit(&self) -> usize { + self.size_limit + } + + /// Sets a new size limit, evicting entries as necessary. + pub fn set_size_limit(&mut self, size_limit: usize) { + self.size_limit = size_limit; + if size_limit == 0 { + self.clear(); + } else { + while self.size() > size_limit { + self.evict_one(); + } + } + } + + /// Returns the cache capacity (number of entries). + #[inline] + pub fn capacity(&self) -> usize { + self.capacity + } + + /// Sets a new capacity, evicting entries as needed. + pub fn set_capacity(&mut self, capacity: usize) { + self.capacity = capacity; + if self.capacity == 0 { + self.clear(); + } else { + while self.len() > self.capacity { + self.evict_one(); + } + } + } + + /// Evicts a single entry using the LRU policy. + /// Returns the key that was evicted. + fn evict_one(&mut self) -> Option { + // Find the least-recently used entry. + if let Some((&old_counter, old_key)) = self.lru_order.iter().next() { + let old_key = old_key.clone(); + let key_size = old_key.byte_size(); + let overhead = Self::entry_overhead(key_size, 0); + self.size = self.size.saturating_sub(overhead); + if let Some(v) = self.key_to_value.remove(&old_key) { + self.size = self.size.saturating_sub(v.byte_size()); + } + self.lru_order.remove(&old_counter); + self.usage.remove(&old_key); + return Some(old_key); + } + None + } + + /// Updates the LRU order for the given key. + /// If the key is already in the LRU maps, its old counter is replaced. + /// For a new key, the overhead for the key (in usage and lru_order) is added. + fn touch(&mut self, key: K) { + self.counter.0 += 1; + let new_counter = self.counter; + let delta: usize = key.byte_size() + std::mem::size_of::(); + // Update usage: if key was present, remove its old LRU overhead. + if let Some(old_counter) = self.usage.insert(key.clone(), new_counter) { + if self.lru_order.remove(&old_counter).is_some() { + self.size = self.size.saturating_sub(delta); + } + } else { + // New key in usage. + self.size = self.size.saturating_add(delta); + } + // Insert into lru_order. If newly inserted, add the overhead. + if self.lru_order.insert(new_counter, key).is_none() { + self.size = self.size.saturating_add(delta); + } + } + + /// Returns the current cache statistics. + pub fn stats(&self) -> CacheStats { + self.stats + } + + /// Resets the cache statistics. + pub fn reset_stats(&mut self) { + self.stats = CacheStats::default(); + } +} + +/// Runtime statistics for the cache. +#[derive(Default, Debug, Copy, Clone)] +pub struct CacheStats { + hits: u64, + misses: u64, +} + +impl CacheStats { + #[inline] + pub fn hits(&self) -> u64 { + self.hits + } + + #[inline] + pub fn misses(&self) -> u64 { + self.misses + } + + #[inline] + pub fn total(&self) -> u64 { + self.hits + self.misses + } + + #[inline] + pub fn hit_ratio(&self) -> f64 { + self.hits as f64 / (self.hits + self.misses).max(1) as f64 + } +} + +impl Drop for Cache +where + K: Clone + Ord + ByteSize, + V: Clone + ByteSize, +{ + fn drop(&mut self) { + // crate::debug::print(&format!("ABC cache len : {}", self.len())); + // crate::debug::print(&format!("ABC cache size : {}", self.size())); + // crate::debug::print(&format!( + // "ABC cache hit ratio : {:>.1} %", + // self.stats().hit_ratio() * 100.0 + // )); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::types::Address; + use ic_principal::Principal; + use std::mem::size_of; + + impl ByteSize for Principal { + fn byte_size(&self) -> usize { + self.as_slice().len() + } + } + impl ByteSize for Address {} + + type KeyAddressCache = Cache; + + fn user(id: u8) -> Principal { + Principal::from_slice(&[id]) + } + + fn addr(id: u64) -> Address { + Address::from(id) + } + + /// Helper: returns the expected overhead (in bytes) for an entry with key type u32 and value type u64. + /// Calculation: 3 * size_of(u32) + size_of(u64) + fn entry_size() -> usize { + let key_size = Principal::anonymous().byte_size(); + let value_size = Address::from(0).byte_size(); + 3 * key_size + value_size + 2 * size_of::() + } + + #[test] + fn test_insert_and_get() { + let mut cache = KeyAddressCache::new().with_capacity(5); + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(200)); + + // Test that values can be retrieved. + assert_eq!(cache.get(&user(1)), Some(addr(100))); + assert_eq!(cache.get(&user(2)), Some(addr(200))); + + // Test stats: two successful gets. + let stats = cache.stats(); + assert_eq!(stats.hits(), 2); + assert_eq!(stats.misses(), 0); + } + + #[test] + fn test_miss() { + let mut cache = KeyAddressCache::new().with_capacity(5); + // Attempt to retrieve a key that was never inserted. + assert_eq!(cache.get(&user(1)), None); + + let stats = cache.stats(); + assert_eq!(stats.hits(), 0); + assert_eq!(stats.misses(), 1); + } + + #[test] + fn test_cache_size_tracking() { + // Allow at most two entries. + let mut cache = KeyAddressCache::new() + .with_capacity(5) + .with_size_limit(2 * entry_size()); + + // Insert first entry. + cache.insert(user(1), addr(100)); + assert_eq!(cache.size(), entry_size()); + assert_eq!(cache.get(&user(1)), Some(addr(100))); + + // Insert the same entry again should not change the overall size. + cache.insert(user(1), addr(100)); + assert_eq!(cache.size(), entry_size()); + assert_eq!(cache.get(&user(1)), Some(addr(100))); + + // Insert second entry. + cache.insert(user(2), addr(200)); + assert_eq!(cache.size(), 2 * entry_size()); + assert_eq!(cache.get(&user(1)), Some(addr(100))); + assert_eq!(cache.get(&user(2)), Some(addr(200))); + + // Inserting a third entry should trigger eviction (LRU policy) so that the size remains unchanged. + cache.insert(user(3), addr(300)); + assert_eq!(cache.size(), 2 * entry_size()); + // Expect the least-recently used entry (key 1) to be evicted. + assert_eq!(cache.get(&user(1)), None); + assert_eq!(cache.get(&user(2)), Some(addr(200))); + assert_eq!(cache.get(&user(3)), Some(addr(300))); + + // Remove an entry. + cache.remove(&user(2)); + assert_eq!(cache.size(), entry_size()); + cache.remove(&user(3)); + assert_eq!(cache.size(), 0); + } + + #[test] + fn test_eviction_by_capacity() { + let mut cache = KeyAddressCache::new().with_capacity(3); + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(200)); + cache.insert(user(3), addr(300)); + + // Inserting a fourth entry should evict the LRU entry. + cache.insert(user(4), addr(400)); + + // Expect key 1 to be evicted. + assert_eq!(cache.get(&user(1)), None); + // The other keys should be available. + assert_eq!(cache.get(&user(2)), Some(addr(200))); + assert_eq!(cache.get(&user(3)), Some(addr(300))); + assert_eq!(cache.get(&user(4)), Some(addr(400))); + } + + #[test] + fn test_eviction_by_size_limit() { + // Set a size limit to allow only two entries. + let mut cache = KeyAddressCache::new() + .with_capacity(10) + .with_size_limit(2 * entry_size()); + + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(200)); + + // Inserting another entry should trigger eviction due to the size limit. + cache.insert(user(3), addr(300)); + + // Expect that one entry is evicted (key 1, as the LRU). + assert_eq!(cache.get(&user(1)), None); + assert_eq!(cache.get(&user(2)), Some(addr(200))); + assert_eq!(cache.get(&user(3)), Some(addr(300))); + } + + #[test] + fn test_remove() { + let mut cache = KeyAddressCache::new().with_capacity(5); + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(200)); + + // Remove key 1. + cache.remove(&user(1)); + assert_eq!(cache.get(&user(1)), None); + // Removing a non-existent key should be safe. + cache.remove(&user(3)); + // Key 2 should still be retrievable. + assert_eq!(cache.get(&user(2)), Some(addr(200))); + } + + #[test] + fn test_clear() { + let mut cache = KeyAddressCache::new().with_capacity(5); + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(200)); + cache.insert(user(3), addr(300)); + + cache.clear(); + assert_eq!(cache.len(), 0); + assert_eq!(cache.get(&user(1)), None); + assert_eq!(cache.get(&user(2)), None); + assert_eq!(cache.get(&user(3)), None); + } + + #[test] + fn test_stats() { + let mut cache = KeyAddressCache::new().with_capacity(3); + // Initially, no hits or misses. + let stats = cache.stats(); + assert_eq!(stats.hits(), 0); + assert_eq!(stats.misses(), 0); + + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(200)); + + // One hit. + let _ = cache.get(&user(1)); + // One miss. + let _ = cache.get(&user(3)); + + let stats = cache.stats(); + assert_eq!(stats.hits(), 1); + assert_eq!(stats.misses(), 1); + } +} From 9edf0b2fbe24cc2ca666073af8d4ad2fa7a32536 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 15:27:46 +0200 Subject: [PATCH 2/9] remove size tracking --- src/btreemap.rs | 10 +++ src/btreemap/cache.rs | 178 ++++-------------------------------------- 2 files changed, 25 insertions(+), 163 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index 866fc40a..80e37fd7 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -60,9 +60,11 @@ use crate::{ Memory, Storable, }; use allocator::Allocator; +use cache::Cache; pub use iter::Iter; use node::{DerivedPageSize, Entry, Node, NodeType, PageSize, Version}; use std::borrow::Cow; +use std::cell::RefCell; use std::marker::PhantomData; use std::ops::{Bound, RangeBounds}; @@ -83,6 +85,8 @@ const DEFAULT_PAGE_SIZE: u32 = 1024; // A marker to indicate that the `PageSize` stored in the header is a `PageSize::Value`. const PAGE_SIZE_VALUE_MARKER: u32 = u32::MAX; +type KeyAddressCache = Cache; + /// A "stable" map based on a B-tree. /// /// The implementation is based on the algorithm outlined in "Introduction to Algorithms" @@ -107,6 +111,9 @@ where // A marker to communicate to the Rust compiler that we own these types. _phantom: PhantomData<(K, V)>, + + // A cache for storing recently accessed nodes. + key_address_cache: RefCell>, } #[derive(PartialEq, Debug)] @@ -216,6 +223,7 @@ where version: Version::V2(page_size), length: 0, _phantom: PhantomData, + key_address_cache: RefCell::new(KeyAddressCache::new()), }; btree.save_header(); @@ -243,6 +251,7 @@ where }), length: 0, _phantom: PhantomData, + key_address_cache: RefCell::new(KeyAddressCache::new()), }; btree.save_header(); @@ -292,6 +301,7 @@ where version, length: header.length, _phantom: PhantomData, + key_address_cache: RefCell::new(KeyAddressCache::new()), } } diff --git a/src/btreemap/cache.rs b/src/btreemap/cache.rs index f99626e9..33e0ba0e 100644 --- a/src/btreemap/cache.rs +++ b/src/btreemap/cache.rs @@ -8,14 +8,6 @@ const MiB: usize = 1024 * KiB; const GiB: usize = 1024 * MiB; const DEFAULT_CAPACITY: usize = 0; -const DEFAULT_SIZE_LIMIT: usize = 3 * GiB; - -pub trait ByteSize { - /// Returns the size (in bytes) of the value. - fn byte_size(&self) -> usize { - std::mem::size_of_val(self) - } -} /// Incrementing counter used for tracking the order of usage. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Default)] @@ -25,14 +17,11 @@ struct Counter(u64); #[derive(Debug, Default, Clone)] pub struct Cache where - K: Clone + Ord + ByteSize, - V: Clone + ByteSize, + K: Clone + Ord, + V: Clone, { key_to_value: BTreeMap, capacity: usize, - /// Tracks the cumulative bytes for all entries (including duplicated key storage). - size: usize, - size_limit: usize, counter: Counter, lru_order: BTreeMap, usage: BTreeMap, @@ -41,24 +30,14 @@ where impl Cache where - K: Clone + Ord + ByteSize, - V: Clone + ByteSize, + K: Clone + Ord, + V: Clone, { - /// Computes the total overhead of an entry: - /// - 3 * key_size (key_to_value, lru_order, usage) - /// - value_size (key_to_value) - /// - 2 * size_of(Counter) (for the LRU order and usage tracking) - fn entry_overhead(key_size: usize, value_size: usize) -> usize { - 3 * key_size + value_size + 2 * std::mem::size_of::() - } - /// Creates a new cache with the given capacity. pub fn new() -> Self { Self { key_to_value: BTreeMap::new(), capacity: DEFAULT_CAPACITY, - size: 0, - size_limit: DEFAULT_SIZE_LIMIT, counter: Counter(0), lru_order: BTreeMap::new(), usage: BTreeMap::new(), @@ -73,17 +52,9 @@ where this } - /// Creates a new cache with the given size limit. - pub fn with_size_limit(self, size_limit: usize) -> Self { - let mut this = self.clone(); - this.size_limit = size_limit; - this - } - /// Clears all entries and resets statistics. pub fn clear(&mut self) { self.key_to_value.clear(); - self.size = 0; self.counter = Counter(0); self.lru_order.clear(); self.usage.clear(); @@ -92,7 +63,7 @@ where /// Retrieves the value for the given key (if any) and updates its LRU status. pub fn get(&mut self, key: &K) -> Option { - if self.capacity == 0 || self.size_limit == 0 { + if self.capacity == 0 { return None; } if let Some(value) = self.key_to_value.get(key).cloned() { @@ -107,30 +78,22 @@ where /// Inserts the given key and value. /// If adding this entry would exceed the capacity or size limit, evicts LRU entries. pub fn insert(&mut self, key: K, value: V) { - if self.capacity == 0 || self.size_limit == 0 { + if self.capacity == 0 { return; } - let (key_size, value_size) = (key.byte_size(), value.byte_size()); - let overhead = Self::entry_overhead(key_size, value_size); - while self.len() + 1 > self.capacity() || self.size + overhead > self.size_limit() { + while self.len() + 1 > self.capacity { self.evict_one(); } - if self.key_to_value.insert(key.clone(), value).is_none() { - self.size = self.size.saturating_add(key_size + value_size); - } + self.key_to_value.insert(key.clone(), value); self.touch(key); } /// Removes the entry associated with the given key. pub fn remove(&mut self, key: &K) { - if self.capacity == 0 || self.size_limit == 0 { + if self.capacity == 0 { return; } - if let Some(value) = self.key_to_value.remove(key) { - let (key_size, value_size) = (key.byte_size(), value.byte_size()); - let overhead = Self::entry_overhead(key_size, value_size); - self.size = self.size.saturating_sub(overhead); - } + self.key_to_value.remove(key); if let Some(counter) = self.usage.remove(key) { self.lru_order.remove(&counter); } @@ -142,30 +105,6 @@ where self.key_to_value.len() } - /// Returns the total size in bytes of all entries (including duplicate key storage). - #[inline] - pub fn size(&self) -> usize { - self.size - } - - /// Returns the configured size limit (in bytes). - #[inline] - pub fn size_limit(&self) -> usize { - self.size_limit - } - - /// Sets a new size limit, evicting entries as necessary. - pub fn set_size_limit(&mut self, size_limit: usize) { - self.size_limit = size_limit; - if size_limit == 0 { - self.clear(); - } else { - while self.size() > size_limit { - self.evict_one(); - } - } - } - /// Returns the cache capacity (number of entries). #[inline] pub fn capacity(&self) -> usize { @@ -190,12 +129,7 @@ where // Find the least-recently used entry. if let Some((&old_counter, old_key)) = self.lru_order.iter().next() { let old_key = old_key.clone(); - let key_size = old_key.byte_size(); - let overhead = Self::entry_overhead(key_size, 0); - self.size = self.size.saturating_sub(overhead); - if let Some(v) = self.key_to_value.remove(&old_key) { - self.size = self.size.saturating_sub(v.byte_size()); - } + self.key_to_value.remove(&old_key); self.lru_order.remove(&old_counter); self.usage.remove(&old_key); return Some(old_key); @@ -209,20 +143,10 @@ where fn touch(&mut self, key: K) { self.counter.0 += 1; let new_counter = self.counter; - let delta: usize = key.byte_size() + std::mem::size_of::(); - // Update usage: if key was present, remove its old LRU overhead. if let Some(old_counter) = self.usage.insert(key.clone(), new_counter) { - if self.lru_order.remove(&old_counter).is_some() { - self.size = self.size.saturating_sub(delta); - } - } else { - // New key in usage. - self.size = self.size.saturating_add(delta); - } - // Insert into lru_order. If newly inserted, add the overhead. - if self.lru_order.insert(new_counter, key).is_none() { - self.size = self.size.saturating_add(delta); + self.lru_order.remove(&old_counter); } + self.lru_order.insert(new_counter, key); } /// Returns the current cache statistics. @@ -267,8 +191,8 @@ impl CacheStats { impl Drop for Cache where - K: Clone + Ord + ByteSize, - V: Clone + ByteSize, + K: Clone + Ord, + V: Clone, { fn drop(&mut self) { // crate::debug::print(&format!("ABC cache len : {}", self.len())); @@ -287,13 +211,6 @@ mod tests { use ic_principal::Principal; use std::mem::size_of; - impl ByteSize for Principal { - fn byte_size(&self) -> usize { - self.as_slice().len() - } - } - impl ByteSize for Address {} - type KeyAddressCache = Cache; fn user(id: u8) -> Principal { @@ -304,14 +221,6 @@ mod tests { Address::from(id) } - /// Helper: returns the expected overhead (in bytes) for an entry with key type u32 and value type u64. - /// Calculation: 3 * size_of(u32) + size_of(u64) - fn entry_size() -> usize { - let key_size = Principal::anonymous().byte_size(); - let value_size = Address::from(0).byte_size(); - 3 * key_size + value_size + 2 * size_of::() - } - #[test] fn test_insert_and_get() { let mut cache = KeyAddressCache::new().with_capacity(5); @@ -339,44 +248,6 @@ mod tests { assert_eq!(stats.misses(), 1); } - #[test] - fn test_cache_size_tracking() { - // Allow at most two entries. - let mut cache = KeyAddressCache::new() - .with_capacity(5) - .with_size_limit(2 * entry_size()); - - // Insert first entry. - cache.insert(user(1), addr(100)); - assert_eq!(cache.size(), entry_size()); - assert_eq!(cache.get(&user(1)), Some(addr(100))); - - // Insert the same entry again should not change the overall size. - cache.insert(user(1), addr(100)); - assert_eq!(cache.size(), entry_size()); - assert_eq!(cache.get(&user(1)), Some(addr(100))); - - // Insert second entry. - cache.insert(user(2), addr(200)); - assert_eq!(cache.size(), 2 * entry_size()); - assert_eq!(cache.get(&user(1)), Some(addr(100))); - assert_eq!(cache.get(&user(2)), Some(addr(200))); - - // Inserting a third entry should trigger eviction (LRU policy) so that the size remains unchanged. - cache.insert(user(3), addr(300)); - assert_eq!(cache.size(), 2 * entry_size()); - // Expect the least-recently used entry (key 1) to be evicted. - assert_eq!(cache.get(&user(1)), None); - assert_eq!(cache.get(&user(2)), Some(addr(200))); - assert_eq!(cache.get(&user(3)), Some(addr(300))); - - // Remove an entry. - cache.remove(&user(2)); - assert_eq!(cache.size(), entry_size()); - cache.remove(&user(3)); - assert_eq!(cache.size(), 0); - } - #[test] fn test_eviction_by_capacity() { let mut cache = KeyAddressCache::new().with_capacity(3); @@ -395,25 +266,6 @@ mod tests { assert_eq!(cache.get(&user(4)), Some(addr(400))); } - #[test] - fn test_eviction_by_size_limit() { - // Set a size limit to allow only two entries. - let mut cache = KeyAddressCache::new() - .with_capacity(10) - .with_size_limit(2 * entry_size()); - - cache.insert(user(1), addr(100)); - cache.insert(user(2), addr(200)); - - // Inserting another entry should trigger eviction due to the size limit. - cache.insert(user(3), addr(300)); - - // Expect that one entry is evicted (key 1, as the LRU). - assert_eq!(cache.get(&user(1)), None); - assert_eq!(cache.get(&user(2)), Some(addr(200))); - assert_eq!(cache.get(&user(3)), Some(addr(300))); - } - #[test] fn test_remove() { let mut cache = KeyAddressCache::new().with_capacity(5); From 1c7910d2556c4af219ce8635b8277a67497872a1 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 15:45:21 +0200 Subject: [PATCH 3/9] key_address_cache --- src/btreemap.rs | 12 +++-- .../{cache.rs => key_address_cache.rs} | 44 +++++++++---------- 2 files changed, 29 insertions(+), 27 deletions(-) rename src/btreemap/{cache.rs => key_address_cache.rs} (89%) diff --git a/src/btreemap.rs b/src/btreemap.rs index 80e37fd7..e3235ffc 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -49,7 +49,7 @@ //! ---------------------------------------- //! ``` mod allocator; -mod cache; +mod key_address_cache; mod iter; mod node; @@ -60,8 +60,8 @@ use crate::{ Memory, Storable, }; use allocator::Allocator; -use cache::Cache; pub use iter::Iter; +use key_address_cache::KeyAddressCache; use node::{DerivedPageSize, Entry, Node, NodeType, PageSize, Version}; use std::borrow::Cow; use std::cell::RefCell; @@ -85,8 +85,6 @@ const DEFAULT_PAGE_SIZE: u32 = 1024; // A marker to indicate that the `PageSize` stored in the header is a `PageSize::Value`. const PAGE_SIZE_VALUE_MARKER: u32 = u32::MAX; -type KeyAddressCache = Cache; - /// A "stable" map based on a B-tree. /// /// The implementation is based on the algorithm outlined in "Introduction to Algorithms" @@ -1120,6 +1118,12 @@ where /// Saves the node to memory. #[inline] fn save_node(&mut self, node: &mut Node) { + let address = node.address(); + node.keys().iter().for_each(|key| { + self.key_address_cache + .borrow_mut() + .insert(key.clone(), address); + }); node.save(self.allocator_mut()); } diff --git a/src/btreemap/cache.rs b/src/btreemap/key_address_cache.rs similarity index 89% rename from src/btreemap/cache.rs rename to src/btreemap/key_address_cache.rs index 33e0ba0e..84293f3c 100644 --- a/src/btreemap/cache.rs +++ b/src/btreemap/key_address_cache.rs @@ -1,3 +1,4 @@ +use crate::types::Address; use std::collections::BTreeMap; #[allow(non_upper_case_globals)] @@ -15,12 +16,12 @@ struct Counter(u64); /// Cache with eviction policy that minimizes duplication of keys and values. #[derive(Debug, Default, Clone)] -pub struct Cache +pub struct KeyAddressCache where K: Clone + Ord, - V: Clone, { - key_to_value: BTreeMap, + key_to_address: BTreeMap, + address_to_keys: BTreeMap>, capacity: usize, counter: Counter, lru_order: BTreeMap, @@ -28,15 +29,15 @@ where stats: CacheStats, } -impl Cache +impl KeyAddressCache where K: Clone + Ord, - V: Clone, { /// Creates a new cache with the given capacity. pub fn new() -> Self { Self { - key_to_value: BTreeMap::new(), + key_to_address: BTreeMap::new(), + address_to_keys: BTreeMap::new(), capacity: DEFAULT_CAPACITY, counter: Counter(0), lru_order: BTreeMap::new(), @@ -54,37 +55,38 @@ where /// Clears all entries and resets statistics. pub fn clear(&mut self) { - self.key_to_value.clear(); + self.key_to_address.clear(); + self.address_to_keys.clear(); self.counter = Counter(0); self.lru_order.clear(); self.usage.clear(); self.stats = CacheStats::default(); } - /// Retrieves the value for the given key (if any) and updates its LRU status. - pub fn get(&mut self, key: &K) -> Option { + /// Retrieves the address for the given key (if any) and updates its LRU status. + pub fn get(&mut self, key: &K) -> Option
{ if self.capacity == 0 { return None; } - if let Some(value) = self.key_to_value.get(key).cloned() { + if let Some(address) = self.key_to_address.get(key).cloned() { self.touch(key.clone()); self.stats.hits += 1; - return Some(value); + return Some(address); } self.stats.misses += 1; None } - /// Inserts the given key and value. + /// Inserts the given key and address. /// If adding this entry would exceed the capacity or size limit, evicts LRU entries. - pub fn insert(&mut self, key: K, value: V) { + pub fn insert(&mut self, key: K, address: Address) { if self.capacity == 0 { return; } while self.len() + 1 > self.capacity { self.evict_one(); } - self.key_to_value.insert(key.clone(), value); + self.key_to_address.insert(key.clone(), address); self.touch(key); } @@ -93,7 +95,7 @@ where if self.capacity == 0 { return; } - self.key_to_value.remove(key); + self.key_to_address.remove(key); if let Some(counter) = self.usage.remove(key) { self.lru_order.remove(&counter); } @@ -102,7 +104,7 @@ where /// Returns the number of entries in the cache. #[inline] pub fn len(&self) -> usize { - self.key_to_value.len() + self.key_to_address.len() } /// Returns the cache capacity (number of entries). @@ -129,7 +131,7 @@ where // Find the least-recently used entry. if let Some((&old_counter, old_key)) = self.lru_order.iter().next() { let old_key = old_key.clone(); - self.key_to_value.remove(&old_key); + self.key_to_address.remove(&old_key); self.lru_order.remove(&old_counter); self.usage.remove(&old_key); return Some(old_key); @@ -189,10 +191,9 @@ impl CacheStats { } } -impl Drop for Cache +impl Drop for KeyAddressCache where K: Clone + Ord, - V: Clone, { fn drop(&mut self) { // crate::debug::print(&format!("ABC cache len : {}", self.len())); @@ -209,9 +210,6 @@ mod tests { use super::*; use crate::types::Address; use ic_principal::Principal; - use std::mem::size_of; - - type KeyAddressCache = Cache; fn user(id: u8) -> Principal { Principal::from_slice(&[id]) @@ -227,7 +225,7 @@ mod tests { cache.insert(user(1), addr(100)); cache.insert(user(2), addr(200)); - // Test that values can be retrieved. + // Test that addresses can be retrieved. assert_eq!(cache.get(&user(1)), Some(addr(100))); assert_eq!(cache.get(&user(2)), Some(addr(200))); From 0e73d5d29566bb3be916bb9532b410c53b0c26e5 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 15:52:26 +0200 Subject: [PATCH 4/9] remove_address --- src/btreemap.rs | 3 ++- src/btreemap/key_address_cache.rs | 41 ++++++++++++++++++++++++++++++- src/types.rs | 2 +- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index e3235ffc..551a1b65 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -49,8 +49,8 @@ //! ---------------------------------------- //! ``` mod allocator; -mod key_address_cache; mod iter; +mod key_address_cache; mod node; use crate::btreemap::iter::{IterInternal, KeysIter, ValuesIter}; @@ -1119,6 +1119,7 @@ where #[inline] fn save_node(&mut self, node: &mut Node) { let address = node.address(); + self.key_address_cache.borrow_mut().remove_address(&address); node.keys().iter().for_each(|key| { self.key_address_cache .borrow_mut() diff --git a/src/btreemap/key_address_cache.rs b/src/btreemap/key_address_cache.rs index 84293f3c..81d3ede2 100644 --- a/src/btreemap/key_address_cache.rs +++ b/src/btreemap/key_address_cache.rs @@ -87,6 +87,10 @@ where self.evict_one(); } self.key_to_address.insert(key.clone(), address); + self.address_to_keys + .entry(address) + .or_default() + .push(key.clone()); self.touch(key); } @@ -95,12 +99,34 @@ where if self.capacity == 0 { return; } - self.key_to_address.remove(key); + if let Some(address) = self.key_to_address.remove(key) { + if let Some(keys) = self.address_to_keys.get_mut(&address) { + keys.retain(|k| k != key); + if keys.is_empty() { + self.address_to_keys.remove(&address); + } + } + } if let Some(counter) = self.usage.remove(key) { self.lru_order.remove(&counter); } } + /// Removes all entries associated with the given address. + pub fn remove_address(&mut self, address: &Address) { + if self.capacity == 0 { + return; + } + if let Some(keys) = self.address_to_keys.remove(address) { + for key in keys { + self.key_to_address.remove(&key); + if let Some(counter) = self.usage.remove(&key) { + self.lru_order.remove(&counter); + } + } + } + } + /// Returns the number of entries in the cache. #[inline] pub fn len(&self) -> usize { @@ -279,6 +305,19 @@ mod tests { assert_eq!(cache.get(&user(2)), Some(addr(200))); } + #[test] + fn test_remove_address() { + let mut cache = KeyAddressCache::new().with_capacity(5); + cache.insert(user(1), addr(100)); + cache.insert(user(2), addr(100)); + cache.insert(user(3), addr(200)); + + cache.remove_address(&addr(100)); + assert_eq!(cache.get(&user(1)), None); + assert_eq!(cache.get(&user(2)), None); + assert_eq!(cache.get(&user(3)), Some(addr(200))); + } + #[test] fn test_clear() { let mut cache = KeyAddressCache::new().with_capacity(5); diff --git a/src/types.rs b/src/types.rs index 0c9bbf7f..599d571c 100644 --- a/src/types.rs +++ b/src/types.rs @@ -3,7 +3,7 @@ use core::ops::{Add, AddAssign, Div, Mul, Rem, Sub, SubAssign}; pub const NULL: Address = Address(0); #[repr(C, packed)] -#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Eq)] +#[derive(Copy, Clone, Debug, PartialEq, PartialOrd, Ord, Eq)] pub struct Address(u64); impl From for Address { From a385cb9df85d96134f3b873d14e8477d5dc7da4b Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 16:21:16 +0200 Subject: [PATCH 5/9] add caching --- src/btreemap.rs | 35 +++++++++++++++++++++++++++-------- src/btreemap/node.rs | 1 - 2 files changed, 27 insertions(+), 9 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index 551a1b65..e07b2692 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -543,10 +543,21 @@ where where F: Fn(Node, usize) -> R + Clone, { + let node_addr = self + .key_address_cache + .borrow_mut() + .get(key) + .unwrap_or(node_addr); let node = self.load_node(node_addr); // Look for the key in the current node. match node.search(key) { - Ok(idx) => Some(f(node, idx)), // Key found: apply `f`. + Ok(idx) => { + // Key found: apply `f`. + self.key_address_cache + .borrow_mut() + .insert(key.clone(), node.address()); + Some(f(node, idx)) + } Err(idx) => match node.node_type() { NodeType::Leaf => None, // At a leaf: key not present. NodeType::Internal => self.traverse(node.child(idx), key, f), // Continue search in child. @@ -1091,7 +1102,9 @@ where /// [1, 2, 3, 4, 5, 6, 7] (stored in the `into` node) /// `source` is deallocated. fn merge(&mut self, source: Node, mut into: Node, median: Entry) -> Node { + self.replace_cached_keys_for_address(source.address(), &[]); into.merge(source, median, &mut self.allocator); + self.replace_cached_keys_for_address(into.address(), &into.keys()); into } @@ -1106,6 +1119,7 @@ where /// Deallocates a node. #[inline] fn deallocate_node(&mut self, node: Node) { + self.replace_cached_keys_for_address(node.address(), &[]); node.deallocate(self.allocator_mut()); } @@ -1118,16 +1132,21 @@ where /// Saves the node to memory. #[inline] fn save_node(&mut self, node: &mut Node) { - let address = node.address(); - self.key_address_cache.borrow_mut().remove_address(&address); - node.keys().iter().for_each(|key| { - self.key_address_cache - .borrow_mut() - .insert(key.clone(), address); - }); + self.replace_cached_keys_for_address(node.address(), &node.keys()); node.save(self.allocator_mut()); } + /// Replaces the cached keys for the given address with the provided keys. + fn replace_cached_keys_for_address(&mut self, address: Address, keys: &[K]) { + let mut cache = self.key_address_cache.borrow_mut(); + if cache.capacity() > 0 { + cache.remove_address(&address); + keys.iter().for_each(|key| { + cache.insert(key.clone(), address); + }); + } + } + /// Saves the map to memory. fn save_header(&self) { let header = BTreeHeader { diff --git a/src/btreemap/node.rs b/src/btreemap/node.rs index 50af80e0..a5c3c6f2 100644 --- a/src/btreemap/node.rs +++ b/src/btreemap/node.rs @@ -371,7 +371,6 @@ impl Node { .collect() } - #[cfg(test)] pub fn keys(&self) -> Vec { self.keys_and_encoded_values .iter() From 3ed525cb94e1ef62cb805878298b8b1c916d9677 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 16:23:09 +0200 Subject: [PATCH 6/9] . --- src/btreemap.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index e07b2692..b30f992f 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -525,7 +525,12 @@ where if self.root_addr == NULL { return None; } - self.traverse(self.root_addr, key, |node, idx| { + let node_addr = self + .key_address_cache + .borrow_mut() + .get(key) + .unwrap_or(self.root_addr); + self.traverse(node_addr, key, |node, idx| { node.into_entry(idx, self.memory()).1 // Extract value. }) .map(Cow::Owned) @@ -543,11 +548,6 @@ where where F: Fn(Node, usize) -> R + Clone, { - let node_addr = self - .key_address_cache - .borrow_mut() - .get(key) - .unwrap_or(node_addr); let node = self.load_node(node_addr); // Look for the key in the current node. match node.search(key) { From d93d9e04e51b4353c061ec2834aea321e9e17b6d Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 16:24:03 +0200 Subject: [PATCH 7/9] . --- src/btreemap.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index b30f992f..cca4d8d1 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -555,7 +555,7 @@ where // Key found: apply `f`. self.key_address_cache .borrow_mut() - .insert(key.clone(), node.address()); + .insert(key.clone(), node_addr); Some(f(node, idx)) } Err(idx) => match node.node_type() { From b1fc15a1a9deab304da65bf3dcdad546f3f2e121 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 16:26:00 +0200 Subject: [PATCH 8/9] . --- src/btreemap.rs | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index cca4d8d1..ef886be8 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -110,8 +110,8 @@ where // A marker to communicate to the Rust compiler that we own these types. _phantom: PhantomData<(K, V)>, - // A cache for storing recently accessed nodes. - key_address_cache: RefCell>, + // A cache for storing the node addresses of keys. + cache: RefCell>, } #[derive(PartialEq, Debug)] @@ -221,7 +221,7 @@ where version: Version::V2(page_size), length: 0, _phantom: PhantomData, - key_address_cache: RefCell::new(KeyAddressCache::new()), + cache: RefCell::new(KeyAddressCache::new()), }; btree.save_header(); @@ -249,7 +249,7 @@ where }), length: 0, _phantom: PhantomData, - key_address_cache: RefCell::new(KeyAddressCache::new()), + cache: RefCell::new(KeyAddressCache::new()), }; btree.save_header(); @@ -299,7 +299,7 @@ where version, length: header.length, _phantom: PhantomData, - key_address_cache: RefCell::new(KeyAddressCache::new()), + cache: RefCell::new(KeyAddressCache::new()), } } @@ -525,11 +525,7 @@ where if self.root_addr == NULL { return None; } - let node_addr = self - .key_address_cache - .borrow_mut() - .get(key) - .unwrap_or(self.root_addr); + let node_addr = self.cache.borrow_mut().get(key).unwrap_or(self.root_addr); self.traverse(node_addr, key, |node, idx| { node.into_entry(idx, self.memory()).1 // Extract value. }) @@ -553,9 +549,7 @@ where match node.search(key) { Ok(idx) => { // Key found: apply `f`. - self.key_address_cache - .borrow_mut() - .insert(key.clone(), node_addr); + self.cache.borrow_mut().insert(key.clone(), node_addr); Some(f(node, idx)) } Err(idx) => match node.node_type() { @@ -1138,7 +1132,7 @@ where /// Replaces the cached keys for the given address with the provided keys. fn replace_cached_keys_for_address(&mut self, address: Address, keys: &[K]) { - let mut cache = self.key_address_cache.borrow_mut(); + let mut cache = self.cache.borrow_mut(); if cache.capacity() > 0 { cache.remove_address(&address); keys.iter().for_each(|key| { From 9b425aeb9895db6b07f52c45da30664f3dd4ed63 Mon Sep 17 00:00:00 2001 From: Maksym Arutyunyan Date: Tue, 8 Apr 2025 16:28:56 +0200 Subject: [PATCH 9/9] . --- src/btreemap.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/btreemap.rs b/src/btreemap.rs index ef886be8..e1d03774 100644 --- a/src/btreemap.rs +++ b/src/btreemap.rs @@ -1096,9 +1096,9 @@ where /// [1, 2, 3, 4, 5, 6, 7] (stored in the `into` node) /// `source` is deallocated. fn merge(&mut self, source: Node, mut into: Node, median: Entry) -> Node { - self.replace_cached_keys_for_address(source.address(), &[]); + //self.replace_cached_keys_for_address(source.address(), &[]); into.merge(source, median, &mut self.allocator); - self.replace_cached_keys_for_address(into.address(), &into.keys()); + //self.replace_cached_keys_for_address(into.address(), &into.keys()); into }