Skip to content

Commit 6bb03cf

Browse files
Add find (range query) algorithm for numeric_range_tree
1 parent 8fd4e9f commit 6bb03cf

9 files changed

Lines changed: 622 additions & 0 deletions

File tree

src/redisearch_rs/numeric_range_tree/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ mod unique_id;
7777

7878
pub use arena::NodeIndex;
7979
pub use index::{NumericIndex, NumericIndexReader};
80+
pub use inverted_index::NumericFilter;
8081
pub use iter::PreOrderDfsIterator;
8182
pub use node::{InternalNode, LeafNode, NumericRangeNode};
8283
pub use range::NumericRange;

src/redisearch_rs/numeric_range_tree/src/range.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,16 @@ impl NumericRange {
134134
self.hll.count()
135135
}
136136

137+
/// Returns true if this range is completely contained within [min, max].
138+
pub const fn contained_in(&self, min: f64, max: f64) -> bool {
139+
self.min_val >= min && self.max_val <= max
140+
}
141+
142+
/// Returns true if this range overlaps with [min, max].
143+
pub const fn overlaps(&self, min: f64, max: f64) -> bool {
144+
!(min > self.max_val || max < self.min_val)
145+
}
146+
137147
/// Get the minimum value in this range.
138148
pub const fn min_val(&self) -> f64 {
139149
self.min_val
Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/*
2+
* Copyright (c) 2006-Present, Redis Ltd.
3+
* All rights reserved.
4+
*
5+
* Licensed under your choice of the Redis Source Available License 2.0
6+
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
7+
* GNU Affero General Public License v3 (AGPLv3).
8+
*/
9+
10+
//! Read path: range queries.
11+
//!
12+
//! This module implements the tree's range query operations. Given a
13+
//! [`NumericFilter`], it traverses the tree to find all matching ranges,
14+
//! using containment checks to prune subtrees and avoid unnecessary descent.
15+
16+
use inverted_index::NumericFilter;
17+
18+
use super::NumericRangeTree;
19+
use crate::arena::{NodeArena, NodeIndex};
20+
use crate::{NumericRange, NumericRangeNode};
21+
22+
impl NumericRangeTree {
23+
/// Find all numeric ranges that match the given filter.
24+
///
25+
/// Returns a vector of references to ranges that overlap with the filter's
26+
/// min/max bounds. The ranges are returned in order based on the filter's
27+
/// ascending/descending preference.
28+
///
29+
/// # Optimization Goal
30+
///
31+
/// We try to minimize the number of ranges returned, as each range will
32+
/// later need to be unioned during query iteration. When a node's range
33+
/// is completely contained within the filter bounds, we return that single
34+
/// range instead of descending to its children—this is why internal nodes
35+
/// retain their ranges up to `max_depth_range`.
36+
///
37+
/// # Offset Handling
38+
///
39+
/// The `filter.offset` and `filter.limit` parameters enable pagination.
40+
/// We track the running total of documents and skip ranges until we've
41+
/// passed the offset. See `recursive_find_ranges` for the special handling
42+
/// of the first overlapping leaf.
43+
pub fn find<'a>(&'a self, filter: &NumericFilter) -> Vec<&'a NumericRange> {
44+
let mut ranges = Vec::with_capacity(8);
45+
let mut total = 0usize;
46+
47+
Self::recursive_find_ranges(&mut ranges, &self.nodes, self.root, filter, &mut total);
48+
49+
#[cfg(all(feature = "unittest", not(miri)))]
50+
Self::check_find_invariants(&ranges, filter, total);
51+
52+
ranges
53+
}
54+
55+
/// Recursively find ranges that match the filter.
56+
///
57+
/// # Containment vs Overlap
58+
///
59+
/// - **Contained**: If the node's range is completely within [min, max],
60+
/// add it and stop descending. The range covers all values we need from
61+
/// this subtree.
62+
/// - **Overlaps**: If the range partially overlaps [min, max], we must
63+
/// descend into children (for internal nodes) or add the range (for leaves)
64+
/// since it contains some—but not all—of the values we need.
65+
/// - **No overlap**: Skip this subtree entirely.
66+
///
67+
/// # Traversal Order
68+
///
69+
/// Children are visited in ascending or descending order based on `filter.ascending`.
70+
/// This ensures ranges are returned in the correct order for sorted iteration.
71+
fn recursive_find_ranges<'a>(
72+
ranges: &mut Vec<&'a NumericRange>,
73+
nodes: &'a NodeArena,
74+
node_idx: NodeIndex,
75+
filter: &NumericFilter,
76+
total: &mut usize,
77+
) {
78+
// Check if we've reached the limit
79+
if filter.limit > 0 && *total >= filter.offset.saturating_add(filter.limit) {
80+
return;
81+
}
82+
83+
let node = &nodes[node_idx];
84+
let min = filter.min;
85+
let max = filter.max;
86+
87+
if let Some(range) = node.range() {
88+
let num_docs = range.num_docs();
89+
if num_docs == 0 {
90+
// We don't care about empty ranges.
91+
return;
92+
}
93+
let contained = range.contained_in(min, max);
94+
let overlaps = range.overlaps(min, max);
95+
96+
// If the range is completely contained in the search bounds, add it
97+
if contained {
98+
*total += num_docs as usize;
99+
if *total > filter.offset {
100+
ranges.push(range);
101+
}
102+
return;
103+
}
104+
105+
// No overlap at all - nothing to do
106+
if !overlaps {
107+
return;
108+
}
109+
}
110+
111+
match node {
112+
NumericRangeNode::Internal(internal) => {
113+
if filter.ascending {
114+
// Ascending: left first, then right
115+
if min <= internal.value {
116+
Self::recursive_find_ranges(
117+
ranges,
118+
nodes,
119+
internal.left_index(),
120+
filter,
121+
total,
122+
);
123+
}
124+
if max >= internal.value {
125+
Self::recursive_find_ranges(
126+
ranges,
127+
nodes,
128+
internal.right_index(),
129+
filter,
130+
total,
131+
);
132+
}
133+
} else {
134+
// Descending: right first, then left
135+
if max >= internal.value {
136+
Self::recursive_find_ranges(
137+
ranges,
138+
nodes,
139+
internal.right_index(),
140+
filter,
141+
total,
142+
);
143+
}
144+
if min <= internal.value {
145+
Self::recursive_find_ranges(
146+
ranges,
147+
nodes,
148+
internal.left_index(),
149+
filter,
150+
total,
151+
);
152+
}
153+
}
154+
}
155+
NumericRangeNode::Leaf(leaf) => {
156+
let num_docs = leaf.range.num_docs();
157+
*total += if *total == 0 && filter.offset == 0 {
158+
// This is the first range of the result set.
159+
// It the range _overlaps_ with the filter range,
160+
// but isn't contained within it, it might contain documents
161+
// that sit outside the filter range.
162+
// For example, if the filter range is [10, 20] and the leaf range is [5, 11],
163+
// the leaf range contains documents that are outside the filter range.
164+
//
165+
// If we sum its `num_docs` to the running total, we may
166+
// end up overcounting the number of documents that match
167+
// the filter range, thus returning less documents than expected.
168+
// We choose the opposite strategy: we potentially return _more_ documents
169+
// than necessary, leaving it to the result set iterator to precisely filter
170+
// out the ones that sit outside the filter range.
171+
1
172+
} else {
173+
num_docs as usize
174+
};
175+
if *total > filter.offset {
176+
ranges.push(&leaf.range);
177+
}
178+
}
179+
}
180+
}
181+
}

src/redisearch_rs/numeric_range_tree/src/tree/invariants.rs

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
//! after every mutation (`add`, `trim_empty_leaves`) to catch structural
1414
//! violations early.
1515
16+
use inverted_index::{IndexReader, NumericFilter, RSIndexResult};
17+
1618
use super::{AddResult, NumericRangeTree, TreeStats, apply_signed_delta};
19+
use crate::NumericRange;
1720
use crate::NumericRangeNode;
1821
use crate::arena::NodeIndex;
1922

@@ -149,6 +152,92 @@ impl NumericRangeTree {
149152
);
150153
}
151154

155+
/// Verify invariants of the ranges returned by `find`.
156+
///
157+
/// Checks three properties:
158+
/// 1. **Filter overlap** — every returned range overlaps the query filter.
159+
/// 2. **Ordering** — consecutive ranges are strictly ordered (ascending or
160+
/// descending based on the filter's `ascending` flag). This transitively
161+
/// implies pairwise non-overlap.
162+
/// 3. **Limit sufficiency** — when `limit > 0` and the tree contained
163+
/// enough matching documents (`total >= offset + limit`), the returned
164+
/// ranges must collectively hold at least `limit` documents.
165+
pub(crate) fn check_find_invariants(
166+
ranges: &[&NumericRange],
167+
filter: &NumericFilter,
168+
total: usize,
169+
) {
170+
// Every returned range must overlap the filter.
171+
for (i, range) in ranges.iter().enumerate() {
172+
assert!(
173+
range.overlaps(filter.min, filter.max),
174+
"find invariant violated: ranges[{i}] [{}, {}] does not overlap filter [{}, {}]",
175+
range.min_val(),
176+
range.max_val(),
177+
filter.min,
178+
filter.max,
179+
);
180+
}
181+
182+
// Ordering and disjointedness.
183+
// Strict ordering on consecutive pairs (max < next_min, or min > next_max)
184+
// transitively implies pairwise non-overlap for all pairs.
185+
for window in ranges.windows(2) {
186+
if filter.ascending {
187+
assert!(
188+
window[0].max_val() < window[1].min_val(),
189+
"find invariant violated: ascending order broken: [{}, {}] not before [{}, {}]",
190+
window[0].min_val(),
191+
window[0].max_val(),
192+
window[1].min_val(),
193+
window[1].max_val(),
194+
);
195+
} else {
196+
assert!(
197+
window[0].min_val() > window[1].max_val(),
198+
"find invariant violated: descending order broken: [{}, {}] not after [{}, {}]",
199+
window[0].min_val(),
200+
window[0].max_val(),
201+
window[1].min_val(),
202+
window[1].max_val(),
203+
);
204+
}
205+
}
206+
207+
// Limit sufficiency: if limit > 0 and we had enough matching documents
208+
// (total >= offset + limit), the returned ranges must contain at least
209+
// `limit` documents whose values fall within the filter bounds.
210+
if filter.limit > 0 {
211+
let target = filter.offset.saturating_add(filter.limit);
212+
if total >= target {
213+
let mut docs: usize = 0;
214+
for range in ranges {
215+
if range.contained_in(filter.min, filter.max) {
216+
docs += range.num_docs() as usize;
217+
} else {
218+
let mut reader = range.reader();
219+
let mut result = RSIndexResult::numeric(0.0);
220+
while reader.next_record(&mut result).unwrap_or(false) {
221+
// SAFETY: The entries in a NumericRange always contain
222+
// numeric data—they are created via `RSIndexResult::numeric`.
223+
let value = unsafe { result.as_numeric_unchecked() };
224+
if filter.value_in_range(value) {
225+
docs += 1;
226+
}
227+
}
228+
}
229+
}
230+
assert!(
231+
docs >= filter.limit,
232+
"find invariant violated: limit={} but returned ranges contain only \
233+
{docs} in-bounds docs (total={total}, offset={})",
234+
filter.limit,
235+
filter.offset,
236+
);
237+
}
238+
}
239+
}
240+
152241
/// Verify all structural invariants of the tree.
153242
///
154243
/// Panics with a descriptive message if any invariant is violated.

src/redisearch_rs/numeric_range_tree/src/tree/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,9 @@
1414
//!
1515
//! The implementation is split into sub-modules by concern:
1616
//! - [`insert`]: Write path (add, split, balance)
17+
//! - [`find`]: Read path (range queries)
1718
19+
mod find;
1820
mod insert;
1921
#[cfg(all(feature = "unittest", not(miri)))]
2022
mod invariants;

0 commit comments

Comments
 (0)