|
| 1 | +/* |
| 2 | + * Copyright (c) 2006-Present, Redis Ltd. |
| 3 | + * All rights reserved. |
| 4 | + * |
| 5 | + * Licensed under your choice of the Redis Source Available License 2.0 |
| 6 | + * (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the |
| 7 | + * GNU Affero General Public License v3 (AGPLv3). |
| 8 | +*/ |
| 9 | + |
| 10 | +//! Read path: range queries. |
| 11 | +//! |
| 12 | +//! This module implements the tree's range query operations. Given a |
| 13 | +//! [`NumericFilter`], it traverses the tree to find all matching ranges, |
| 14 | +//! using containment checks to prune subtrees and avoid unnecessary descent. |
| 15 | +
|
| 16 | +use inverted_index::NumericFilter; |
| 17 | + |
| 18 | +use super::NumericRangeTree; |
| 19 | +use crate::arena::{NodeArena, NodeIndex}; |
| 20 | +use crate::{NumericRange, NumericRangeNode}; |
| 21 | + |
| 22 | +impl NumericRangeTree { |
| 23 | + /// Find all numeric ranges that match the given filter. |
| 24 | + /// |
| 25 | + /// Returns a vector of references to ranges that overlap with the filter's |
| 26 | + /// min/max bounds. The ranges are returned in order based on the filter's |
| 27 | + /// ascending/descending preference. |
| 28 | + /// |
| 29 | + /// # Optimization Goal |
| 30 | + /// |
| 31 | + /// We try to minimize the number of ranges returned, as each range will |
| 32 | + /// later need to be unioned during query iteration. When a node's range |
| 33 | + /// is completely contained within the filter bounds, we return that single |
| 34 | + /// range instead of descending to its children—this is why internal nodes |
| 35 | + /// retain their ranges up to `max_depth_range`. |
| 36 | + /// |
| 37 | + /// # Offset Handling |
| 38 | + /// |
| 39 | + /// The `filter.offset` and `filter.limit` parameters enable pagination. |
| 40 | + /// We track the running total of documents and skip ranges until we've |
| 41 | + /// passed the offset. See `recursive_find_ranges` for the special handling |
| 42 | + /// of the first overlapping leaf. |
| 43 | + pub fn find<'a>(&'a self, filter: &NumericFilter) -> Vec<&'a NumericRange> { |
| 44 | + let mut ranges = Vec::with_capacity(8); |
| 45 | + let mut total = 0usize; |
| 46 | + |
| 47 | + Self::recursive_find_ranges(&mut ranges, &self.nodes, self.root, filter, &mut total); |
| 48 | + |
| 49 | + #[cfg(all(feature = "unittest", not(miri)))] |
| 50 | + Self::check_find_invariants(&ranges, filter, total); |
| 51 | + |
| 52 | + ranges |
| 53 | + } |
| 54 | + |
| 55 | + /// Recursively find ranges that match the filter. |
| 56 | + /// |
| 57 | + /// # Containment vs Overlap |
| 58 | + /// |
| 59 | + /// - **Contained**: If the node's range is completely within [min, max], |
| 60 | + /// add it and stop descending. The range covers all values we need from |
| 61 | + /// this subtree. |
| 62 | + /// - **Overlaps**: If the range partially overlaps [min, max], we must |
| 63 | + /// descend into children (for internal nodes) or add the range (for leaves) |
| 64 | + /// since it contains some—but not all—of the values we need. |
| 65 | + /// - **No overlap**: Skip this subtree entirely. |
| 66 | + /// |
| 67 | + /// # Traversal Order |
| 68 | + /// |
| 69 | + /// Children are visited in ascending or descending order based on `filter.ascending`. |
| 70 | + /// This ensures ranges are returned in the correct order for sorted iteration. |
| 71 | + fn recursive_find_ranges<'a>( |
| 72 | + ranges: &mut Vec<&'a NumericRange>, |
| 73 | + nodes: &'a NodeArena, |
| 74 | + node_idx: NodeIndex, |
| 75 | + filter: &NumericFilter, |
| 76 | + total: &mut usize, |
| 77 | + ) { |
| 78 | + // Check if we've reached the limit |
| 79 | + if filter.limit > 0 && *total >= filter.offset.saturating_add(filter.limit) { |
| 80 | + return; |
| 81 | + } |
| 82 | + |
| 83 | + let node = &nodes[node_idx]; |
| 84 | + let min = filter.min; |
| 85 | + let max = filter.max; |
| 86 | + |
| 87 | + if let Some(range) = node.range() { |
| 88 | + let num_docs = range.num_docs(); |
| 89 | + if num_docs == 0 { |
| 90 | + // We don't care about empty ranges. |
| 91 | + return; |
| 92 | + } |
| 93 | + let contained = range.contained_in(min, max); |
| 94 | + let overlaps = range.overlaps(min, max); |
| 95 | + |
| 96 | + // If the range is completely contained in the search bounds, add it |
| 97 | + if contained { |
| 98 | + *total += num_docs as usize; |
| 99 | + if *total > filter.offset { |
| 100 | + ranges.push(range); |
| 101 | + } |
| 102 | + return; |
| 103 | + } |
| 104 | + |
| 105 | + // No overlap at all - nothing to do |
| 106 | + if !overlaps { |
| 107 | + return; |
| 108 | + } |
| 109 | + } |
| 110 | + |
| 111 | + match node { |
| 112 | + NumericRangeNode::Internal(internal) => { |
| 113 | + if filter.ascending { |
| 114 | + // Ascending: left first, then right |
| 115 | + if min <= internal.value { |
| 116 | + Self::recursive_find_ranges( |
| 117 | + ranges, |
| 118 | + nodes, |
| 119 | + internal.left_index(), |
| 120 | + filter, |
| 121 | + total, |
| 122 | + ); |
| 123 | + } |
| 124 | + if max >= internal.value { |
| 125 | + Self::recursive_find_ranges( |
| 126 | + ranges, |
| 127 | + nodes, |
| 128 | + internal.right_index(), |
| 129 | + filter, |
| 130 | + total, |
| 131 | + ); |
| 132 | + } |
| 133 | + } else { |
| 134 | + // Descending: right first, then left |
| 135 | + if max >= internal.value { |
| 136 | + Self::recursive_find_ranges( |
| 137 | + ranges, |
| 138 | + nodes, |
| 139 | + internal.right_index(), |
| 140 | + filter, |
| 141 | + total, |
| 142 | + ); |
| 143 | + } |
| 144 | + if min <= internal.value { |
| 145 | + Self::recursive_find_ranges( |
| 146 | + ranges, |
| 147 | + nodes, |
| 148 | + internal.left_index(), |
| 149 | + filter, |
| 150 | + total, |
| 151 | + ); |
| 152 | + } |
| 153 | + } |
| 154 | + } |
| 155 | + NumericRangeNode::Leaf(leaf) => { |
| 156 | + let num_docs = leaf.range.num_docs(); |
| 157 | + *total += if *total == 0 && filter.offset == 0 { |
| 158 | + // This is the first range of the result set. |
| 159 | + // It the range _overlaps_ with the filter range, |
| 160 | + // but isn't contained within it, it might contain documents |
| 161 | + // that sit outside the filter range. |
| 162 | + // For example, if the filter range is [10, 20] and the leaf range is [5, 11], |
| 163 | + // the leaf range contains documents that are outside the filter range. |
| 164 | + // |
| 165 | + // If we sum its `num_docs` to the running total, we may |
| 166 | + // end up overcounting the number of documents that match |
| 167 | + // the filter range, thus returning less documents than expected. |
| 168 | + // We choose the opposite strategy: we potentially return _more_ documents |
| 169 | + // than necessary, leaving it to the result set iterator to precisely filter |
| 170 | + // out the ones that sit outside the filter range. |
| 171 | + 1 |
| 172 | + } else { |
| 173 | + num_docs as usize |
| 174 | + }; |
| 175 | + if *total > filter.offset { |
| 176 | + ranges.push(&leaf.range); |
| 177 | + } |
| 178 | + } |
| 179 | + } |
| 180 | + } |
| 181 | +} |
0 commit comments