Skip to content

Added mem-dbg as optional feature #19

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,4 @@ twox-hash = "1.1"
serde = { version = "1.0", features = ["derive"] }
rand = { version = "0.7", features = ["small_rng"] }
packed_simd = { version = "0.3", features = ["into_bits"], optional = true }
mem_dbg = {version="0.2.4", optional=true}
1 change: 1 addition & 0 deletions src/count_min.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use crate::traits::{Intersect, IntersectPlusUnionIsPlus, New, UnionAssign};
serialize = "C: Serialize, <C as New>::Config: Serialize",
deserialize = "C: Deserialize<'de>, <C as New>::Config: Deserialize<'de>"
))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct CountMinSketch<K: ?Sized, C: New> {
counters: Vec<Vec<C>>,
offsets: Vec<usize>, // to avoid malloc/free each push
Expand Down
27 changes: 26 additions & 1 deletion src/distinct.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ use self::consts::{BIAS_DATA, RAW_ESTIMATE_DATA, TRESHOLD_DATA};
/// Like [`HyperLogLog`] but implements `Ord` and `Eq` by using the estimate of the cardinality.
#[derive(Serialize, Deserialize)]
#[serde(bound = "")]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct HyperLogLogMagnitude<V>(HyperLogLog<V>);
impl<V: Hash> Ord for HyperLogLogMagnitude<V> {
#[inline(always)]
Expand Down Expand Up @@ -127,6 +128,7 @@ impl<V> IntersectPlusUnionIsPlus for HyperLogLogMagnitude<V> {
/// See [*HyperLogLog: the analysis of a near-optimal cardinality estimation algorithm*](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) and [*HyperLogLog in Practice: Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm*](https://ai.google/research/pubs/pub40671) for background on HyperLogLog with bias correction.
#[derive(Serialize, Deserialize)]
#[serde(bound = "")]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct HyperLogLog<V: ?Sized> {
alpha: f64,
zero: usize,
Expand Down Expand Up @@ -168,6 +170,29 @@ where
}
}

/// Returns the current harmonic sum of the `HyperLogLog` data structure.
///
/// The harmonic sum is the sum of the reciprocals of the registers, i.e.
/// `1/2^m[0] + 1/2^m[1] + ... + 1/2^m[n-1]`.
pub fn harmonic_sum(&self) -> f64 {
self.sum
}

/// Returns the current number of zero registers in the `HyperLogLog` data structure.
pub fn zero_registers(&self) -> usize {
self.zero
}

/// Returns the number of registers in the `HyperLogLog` data structure.
pub fn number_of_registers(&self) -> usize {
self.m.len()
}

/// Returns the precision of the `HyperLogLog` data structure.
pub fn precision(&self) -> u8 {
self.p
}

/// "Visit" an element.
#[inline]
pub fn push(&mut self, value: &V) {
Expand Down Expand Up @@ -337,7 +362,7 @@ where
}

fn get_alpha(p: u8) -> f64 {
assert!(4 <= p && p <= 16);
assert!(4 <= p && p <= 18);
match p {
4 => 0.673,
5 => 0.697,
Expand Down
10 changes: 10 additions & 0 deletions src/distinct/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,16 @@ mod test {
ret
})
}

#[test]
fn test_length_compatability() {
assert_eq!(TRESHOLD_DATA.len(), RAW_ESTIMATE_DATA.len());
assert_eq!(RAW_ESTIMATE_DATA.len(), BIAS_DATA.len());

for (raw_estimate_data, bias_data) in RAW_ESTIMATE_DATA.iter().zip(BIAS_DATA.iter()) {
assert_eq!(raw_estimate_data.len(), bias_data.len());
}
}
}

#[rustfmt::skip]
Expand Down
3 changes: 3 additions & 0 deletions src/linked_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use serde::{Deserialize, Serialize};
use std::{iter, marker, ops};

#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct LinkedListIndex<'a>(usize, marker::PhantomData<&'a ()>);
impl<'a> LinkedListIndex<'a> {
#[inline(always)]
Expand All @@ -11,6 +12,7 @@ impl<'a> LinkedListIndex<'a> {
}

#[derive(Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct LinkedList<T> {
vec: Box<[(usize, usize, Option<T>)]>,
head: usize,
Expand Down Expand Up @@ -304,6 +306,7 @@ impl<'a, T> ops::IndexMut<LinkedListIndex<'a>> for LinkedList<T> {
}
}


pub struct LinkedListIter<'a, T: 'a> {
linked_list: &'a LinkedList<T>,
index: Option<LinkedListIndex<'a>>,
Expand Down
2 changes: 2 additions & 0 deletions src/ordered_linked_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::{ops, ptr};
use crate::linked_list::{LinkedList, LinkedListIndex};

#[derive(Copy, Clone, PartialEq, Eq, Serialize, Deserialize, Debug)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct OrderedLinkedListIndex<'a>(LinkedListIndex<'a>);
impl<'a> OrderedLinkedListIndex<'a> {
#[inline(always)]
Expand All @@ -13,6 +14,7 @@ impl<'a> OrderedLinkedListIndex<'a> {
}

#[derive(Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct OrderedLinkedList<T>(LinkedList<T>);
impl<T: Ord> OrderedLinkedList<T> {
pub fn new(cap: usize) -> Self {
Expand Down
3 changes: 3 additions & 0 deletions src/sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use std::{convert::TryFrom, fmt, iter, ops, vec};

/// Given population and sample sizes, returns true if this element is in the sample. Without replacement.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct SampleTotal {
total: usize,
samples: usize,
Expand Down Expand Up @@ -39,6 +40,7 @@ impl Drop for SampleTotal {
}

#[derive(Clone)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
struct FixedCapVec<T>(Vec<T>);
impl<T> FixedCapVec<T> {
fn new(cap: usize) -> Self {
Expand Down Expand Up @@ -122,6 +124,7 @@ where

/// [Reservoir sampling](https://en.wikipedia.org/wiki/Reservoir_sampling). Without replacement, and the returned order is unstable.
#[derive(Clone, Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct SampleUnstable<T> {
reservoir: FixedCapVec<T>,
i: usize,
Expand Down
3 changes: 3 additions & 0 deletions src/top.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ use crate::{
serialize = "A: Hash + Eq + Serialize, C: Serialize, <C as New>::Config: Serialize",
deserialize = "A: Hash + Eq + Deserialize<'de>, C: Deserialize<'de>, <C as New>::Config: Deserialize<'de>"
))]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
pub struct Top<A, C: New> {
map: HashMap<A, OrderedLinkedListIndex<'static>, RandomXxHashBuilder>,
list: OrderedLinkedList<Node<A, C>>,
Expand Down Expand Up @@ -227,6 +228,7 @@ impl<
}

#[derive(Clone, Serialize, Deserialize)]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
struct Node<T, C>(T, C);
impl<T, C: Ord> Ord for Node<T, C> {
#[inline(always)]
Expand Down Expand Up @@ -280,6 +282,7 @@ mod test {

#[derive(Serialize, Deserialize)]
#[serde(bound = "")]
#[cfg_attr(feature = "mem_dbg", derive(mem_dbg::MemDbg, mem_dbg::MemSize))]
struct HLL<V>(HyperLogLog<V>);
impl<V: Hash> Ord for HLL<V> {
#[inline(always)]
Expand Down
Loading