Skip to content

Commit 788d205

Browse files
[MOD-14096] FFI-readiness for NumericRangeTree's GC (#8430)
1 parent 34f2968 commit 788d205

11 files changed

Lines changed: 146 additions & 22 deletions

File tree

src/redisearch_rs/generational_slab/src/lib.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,17 @@ impl Key {
156156
pub const fn generation(self) -> u32 {
157157
self.generation
158158
}
159+
160+
/// Reconstruct a key from its raw position and generation.
161+
///
162+
/// This is intended for FFI round-trips where a key was previously
163+
/// decomposed via [`Key::position`] and [`Key::generation`].
164+
pub const fn from_raw_parts(position: u32, generation: u32) -> Self {
165+
Self {
166+
position,
167+
generation,
168+
}
169+
}
159170
}
160171

161172
/// Pre-allocated storage for a uniform data type

src/redisearch_rs/numeric_range_tree/src/arena.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,15 @@ impl NodeIndex {
3232
pub const fn key(self) -> Key {
3333
self.0
3434
}
35+
36+
/// Reconstruct a `NodeIndex` from the raw position and generation of
37+
/// the underlying [`Key`].
38+
///
39+
/// Intended for FFI round-trips where the index was previously
40+
/// decomposed via [`Key::position`] and [`Key::generation`].
41+
pub const fn from_raw_parts(position: u32, generation: u32) -> Self {
42+
Self(Key::from_raw_parts(position, generation))
43+
}
3544
}
3645

3746
impl From<Key> for NodeIndex {

src/redisearch_rs/numeric_range_tree/src/index.rs

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
1515
use ffi::IndexFlags_Index_StoreNumeric;
1616
use inverted_index::{
17-
EntriesTrackingIndex, IndexReader, IndexReaderCore, RSIndexResult,
17+
EntriesTrackingIndex, IndexBlock, IndexReader, IndexReaderCore, RSIndexResult,
1818
debug::Summary,
1919
numeric::{Numeric, NumericFloatCompression},
2020
};
@@ -137,19 +137,17 @@ impl NumericIndex {
137137
///
138138
/// The `doc_exist` callback returns `true` if the document still exists.
139139
/// Returns `Ok(Some(delta))` if GC is needed, `Ok(None)` otherwise.
140-
pub fn scan_gc(
140+
pub fn scan_gc<F>(
141141
&self,
142142
doc_exist: impl Fn(ffi::t_docId) -> bool,
143-
) -> std::io::Result<Option<inverted_index::GcScanDelta>> {
144-
match self {
145-
NumericIndex::Uncompressed(idx) => idx.scan_gc(
146-
doc_exist,
147-
None::<fn(&inverted_index::RSIndexResult<'_>, &inverted_index::IndexBlock)>,
148-
),
149-
NumericIndex::Compressed(idx) => idx.scan_gc(
150-
doc_exist,
151-
None::<fn(&inverted_index::RSIndexResult<'_>, &inverted_index::IndexBlock)>,
152-
),
143+
repair_fn: Option<F>,
144+
) -> std::io::Result<Option<inverted_index::GcScanDelta>>
145+
where
146+
F: for<'index> FnMut(&RSIndexResult<'index>, &IndexBlock),
147+
{
148+
match self {
149+
NumericIndex::Uncompressed(idx) => idx.scan_gc(doc_exist, repair_fn),
150+
NumericIndex::Compressed(idx) => idx.scan_gc(doc_exist, repair_fn),
153151
}
154152
}
155153
}

src/redisearch_rs/numeric_range_tree/src/iter.rs

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ impl<'a> ReversePreOrderDfsIterator<'a> {
4040
}
4141

4242
/// Create a new iterator starting from the given node index in the tree.
43-
pub fn from_node(tree: &'a NumericRangeTree, node_idx: NodeIndex) -> Self {
43+
fn from_node(tree: &'a NumericRangeTree, node_idx: NodeIndex) -> Self {
4444
let mut stack = Vec::with_capacity(tree.node(node_idx).max_depth() as usize + 1);
4545
stack.push(node_idx);
4646
Self { tree, stack }
@@ -72,3 +72,44 @@ impl<'a> IntoIterator for &'a NumericRangeTree {
7272
ReversePreOrderDfsIterator::new(self)
7373
}
7474
}
75+
76+
/// Same iteration logic as [`ReversePreOrderDfsIterator`], but it yields indices alongside each node.
77+
#[derive(Debug)]
78+
pub struct IndexedReversePreOrderDfsIterator<'a> {
79+
/// Reference to the tree (used to resolve node indices).
80+
tree: &'a NumericRangeTree,
81+
/// Stack of node indices to visit. Nodes are pushed right-first so left is
82+
/// processed first (LIFO order).
83+
stack: Vec<NodeIndex>,
84+
}
85+
86+
impl<'a> IndexedReversePreOrderDfsIterator<'a> {
87+
/// Create a new iterator starting from the root of the given tree.
88+
pub fn new(tree: &'a NumericRangeTree) -> Self {
89+
Self::from_node(tree, tree.root_index())
90+
}
91+
92+
/// Create a new iterator starting from the given node index in the tree.
93+
fn from_node(tree: &'a NumericRangeTree, node_idx: NodeIndex) -> Self {
94+
let mut stack = Vec::with_capacity(tree.node(node_idx).max_depth() as usize + 1);
95+
stack.push(node_idx);
96+
Self { tree, stack }
97+
}
98+
}
99+
100+
impl<'a> Iterator for IndexedReversePreOrderDfsIterator<'a> {
101+
type Item = (NodeIndex, &'a NumericRangeNode);
102+
103+
fn next(&mut self) -> Option<Self::Item> {
104+
let node_idx = self.stack.pop()?;
105+
let node = self.tree.node(node_idx);
106+
107+
if let NumericRangeNode::Internal(internal) = node {
108+
// Push children onto stack (left first so right is processed first)
109+
self.stack.push(internal.left_index());
110+
self.stack.push(internal.right_index());
111+
}
112+
113+
Some((node_idx, node))
114+
}
115+
}

src/redisearch_rs/numeric_range_tree/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,11 @@ mod unique_id;
7979
pub use arena::NodeIndex;
8080
pub use index::{NumericIndex, NumericIndexReader};
8181
pub use inverted_index::NumericFilter;
82-
pub use iter::ReversePreOrderDfsIterator;
82+
pub use iter::{IndexedReversePreOrderDfsIterator, ReversePreOrderDfsIterator};
8383
pub use node::{InternalNode, LeafNode, NumericRangeNode};
8484
pub use range::NumericRange;
8585
pub use tree::{
86-
AddResult, NodeGcDelta, NumericRangeTree, SingleNodeGcResult, TrimEmptyLeavesResult,
86+
AddResult, CompactIfSparseResult, NodeGcDelta, NumericRangeTree, SingleNodeGcResult,
87+
TrimEmptyLeavesResult,
8788
};
8889
pub use unique_id::TreeUniqueId;

src/redisearch_rs/numeric_range_tree/src/range.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ pub struct NumericRange {
6464
/// hashed into HyperLogLog registers. We hash the raw bytes (bit
6565
/// representation) rather than the numeric value — see
6666
/// [`NumericRange::update_cardinality`] for rationale.
67-
fn update_cardinality(hll: &mut Hll, value: f64) {
67+
pub(crate) fn update_cardinality(hll: &mut Hll, value: f64) {
6868
hll.add(value.to_ne_bytes());
6969
}
7070

src/redisearch_rs/numeric_range_tree/src/tree/gc.rs

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
1616
use std::collections::HashMap;
1717

18-
use inverted_index::{GcApplyInfo, GcScanDelta};
18+
use inverted_index::{GcApplyInfo, GcScanDelta, IndexBlock, RSIndexResult};
1919

2020
use super::{NumericRangeTree, TrimEmptyLeavesResult, apply_signed_delta};
2121
use crate::NumericRangeNode;
@@ -41,6 +41,7 @@ pub struct NodeGcDelta {
4141
///
4242
/// Returned by [`NumericRangeTree::apply_gc_to_node`].
4343
#[derive(Debug, Clone, Copy, Default)]
44+
#[repr(C)]
4445
pub struct SingleNodeGcResult {
4546
/// Information about the outcome of garbage collection on
4647
/// the inverted index stored within this node.
@@ -51,6 +52,7 @@ pub struct SingleNodeGcResult {
5152

5253
/// Returned by [`NumericRangeTree::compact_if_sparse`].
5354
#[derive(Debug, Clone, Copy, Default)]
55+
#[repr(C)]
5456
pub struct CompactIfSparseResult {
5557
/// The change in the tree's inverted index memory usage, in bytes.
5658
/// Positive values indicate growth, negative values indicate shrinkage.
@@ -61,6 +63,51 @@ pub struct CompactIfSparseResult {
6163
pub node_size_delta: i64,
6264
}
6365

66+
impl NumericRangeNode {
67+
/// Scan a single node's inverted index for GC work.
68+
///
69+
/// Scan the inverted index associated with its range for deleted
70+
/// documents, and computes HLL registers for cardinality re-estimation.
71+
///
72+
/// Returns `Some(NodeGcDelta)` if the node had GC work, `None` otherwise
73+
/// (either the node has no range, or no documents were deleted).
74+
pub fn scan_gc(&self, doc_exists: &dyn Fn(ffi::t_docId) -> bool) -> Option<NodeGcDelta> {
75+
let range = self.range()?;
76+
77+
// HLL tracking for cardinality (re)estimation
78+
let mut majority_hll = Hll::new();
79+
let mut last_block_hll = Hll::new();
80+
let mut last_block_ptr: *const IndexBlock = std::ptr::null();
81+
82+
let mut repair_fn = |res: &RSIndexResult<'_>, block: &IndexBlock| {
83+
let bp = block as *const IndexBlock;
84+
if bp != last_block_ptr {
85+
majority_hll.merge(&last_block_hll);
86+
last_block_hll.clear();
87+
last_block_ptr = bp;
88+
}
89+
// SAFETY: We know this is a numeric index result
90+
let value = unsafe { res.as_numeric_unchecked() };
91+
crate::range::update_cardinality(&mut last_block_hll, value);
92+
};
93+
94+
let delta = range
95+
.entries()
96+
.scan_gc(doc_exists, Some(&mut repair_fn))
97+
.ok()
98+
.flatten()?;
99+
100+
// Merge majority into last_block to get "with last block" registers.
101+
last_block_hll.merge(&majority_hll);
102+
103+
Some(NodeGcDelta {
104+
delta,
105+
registers_with_last_block: *last_block_hll.registers(),
106+
registers_without_last_block: *majority_hll.registers(),
107+
})
108+
}
109+
}
110+
64111
impl NumericRangeTree {
65112
/// Apply a GC delta to a single node by index.
66113
///

src/redisearch_rs/numeric_range_tree/src/tree/mod.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ mod insert;
2323
#[cfg(all(feature = "unittest", not(miri)))]
2424
mod invariants;
2525

26-
pub use gc::{NodeGcDelta, SingleNodeGcResult};
26+
pub use gc::{CompactIfSparseResult, NodeGcDelta, SingleNodeGcResult};
2727

2828
use ffi::t_docId;
2929

@@ -286,6 +286,11 @@ impl NumericRangeTree {
286286
crate::ReversePreOrderDfsIterator::new(self)
287287
}
288288

289+
/// Returns an iterator over all nodes in the tree, alongside their indices (depth-first traversal).
290+
pub fn indexed_iter(&self) -> crate::IndexedReversePreOrderDfsIterator<'_> {
291+
crate::IndexedReversePreOrderDfsIterator::new(self)
292+
}
293+
289294
/// Calculate the total memory usage of the tree, in bytes.
290295
pub const fn mem_usage(&self) -> usize {
291296
std::mem::size_of::<Self>() + self.stats.inverted_indexes_size + self.nodes.mem_usage()

src/redisearch_rs/numeric_range_tree/src/unique_id.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ impl TreeUniqueId {
3030
}
3131
}
3232

33+
impl std::fmt::Display for TreeUniqueId {
34+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35+
write!(f, "{}", self.0)
36+
}
37+
}
38+
3339
impl From<TreeUniqueId> for u32 {
3440
fn from(id: TreeUniqueId) -> Self {
3541
id.0

src/redisearch_rs/numeric_range_tree/tests/integration/gc.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,10 @@ fn apply_gc_to_single_leaf(#[values(false, true)] compress_floats: bool) {
8383
let result = tree.apply_gc_to_node(tree.root_index(), delta).unwrap();
8484

8585
assert_eq!(result.index_gc_info.entries_removed, 5);
86-
assert_eq!(tree.num_entries(), entries_before - 5);
86+
assert_eq!(
87+
tree.num_entries(),
88+
entries_before - result.index_gc_info.entries_removed
89+
);
8790
assert!(
8891
result.index_gc_info.bytes_freed > 0,
8992
"GC that removes entries should free bytes"

0 commit comments

Comments
 (0)