SET CLUSTER SETTING sql.stats.automatic_collection.enabled = false;
CREATE TABLE t (
i INT PRIMARY KEY,
s STRING,
b BYTES,
INDEX s (s),
INDEX b (b)
);
ALTER TABLE t INJECT STATISTICS '[
{
"columns": [
"i"
],
"created_at": "2021-08-02 17:23:30.293908",
"distinct_count": 65536,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "0"
},
{
"distinct_range": 32766,
"num_eq": 1,
"num_range": 32766,
"upper_bound": "32767"
},
{
"distinct_range": 32767,
"num_eq": 1,
"num_range": 32767,
"upper_bound": "65535"
}
],
"histo_col_type": "INT8",
"name": "repro",
"null_count": 0,
"row_count": 65536
},
{
"columns": [
"s"
],
"created_at": "2021-08-02 17:23:30.293908",
"distinct_count": 65536,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "\\x0000"
},
{
"distinct_range": 32766,
"num_eq": 1,
"num_range": 32766,
"upper_bound": "\\x7FFF"
},
{
"distinct_range": 32767,
"num_eq": 1,
"num_range": 32767,
"upper_bound": "\\xFFFF"
}
],
"histo_col_type": "STRING",
"name": "repro",
"null_count": 0,
"row_count": 65536
},
{
"columns": [
"b"
],
"created_at": "2021-08-02 17:23:30.293908",
"distinct_count": 65536,
"histo_buckets": [
{
"distinct_range": 0,
"num_eq": 1,
"num_range": 0,
"upper_bound": "\\x0000"
},
{
"distinct_range": 32766,
"num_eq": 1,
"num_range": 32766,
"upper_bound": "\\x7FFF"
},
{
"distinct_range": 32767,
"num_eq": 1,
"num_range": 32767,
"upper_bound": "\\xFFFF"
}
],
"histo_col_type": "BYTES",
"name": "repro",
"null_count": 0,
"row_count": 65536
}
]';
EXPLAIN SELECT i FROM t WHERE i >= 0x55B4 AND i < 0x55B5;
EXPLAIN (OPT, VERBOSE) SELECT i FROM t WHERE i >= 0x55B4 AND i < 0x55B5;
EXPLAIN SELECT s FROM t WHERE s >= '\x55B4' AND s < '\x55B5';
EXPLAIN (OPT, VERBOSE) SELECT s FROM t WHERE s >= '\x55B4' AND s < '\x55B5';
EXPLAIN SELECT b FROM t WHERE b >= '\x55B4' AND b < '\x55B5';
EXPLAIN (OPT, VERBOSE) SELECT b FROM t WHERE b >= '\x55B4' AND b < '\x55B5';
info
----------------------------------------------------------
scan t@b
├── columns: b:3
├── constraint: /3/1: [/'\x55b4' - /'\x55b5')
├── stats: [rows=16383, distinct(3)=16383, null(3)=0]
│ histogram(3)= 0 0 16383 0
│ <--- '\x55b4' ------- '\x55b5'
└── cost: 17380.59
(7 rows)
Optimizer row estimates for
BYTEScolumns are dividing the histogram bucket by 2 instead of by the portion of the key space covered by the span. (Not totally sure this is specific toBYTESbut that is where I observed it.)Here's an example:
For the
BYTEScolumn the row estimate is 16383 instead of 1, like I would expect.Environment: