-
Notifications
You must be signed in to change notification settings - Fork 2.3k
Bug Report: write_only vindexes are incorrectly prioritized #13486
Description
Overview of the Issue
During our effort to migrate lookup tables from the legacy type of lookup_hash to lookup, we encountered an issue that resulted in the incorrect vindex being selected when write_only mode was used, resulting in scatter queries. This migration involves the following high level steps:
- Create new lookup table with the new lookup type. Set
write_onlytotrue - Backfill the new lookup table
- Remove the
write_onlyflag on the new table and setwrite_onlytotrueon the old lookup table - Remove the old lookup table
Reproduction Steps
- Deploy the following vschema:
{
"mainkeyspace": {
"sharded": true,
"vindexes": {
"hash": {
"type": "hash"
},
"varchar_hash": {
"type": "unicode_loose_md5"
},
"aliases_alias_lookup": {
"type": "lookup_hash",
"params": {
"table": "aliases_alias_lookup",
"from": "alias",
"to": "user_id",
"ignore_nulls": "true",
"autocommit": "true"
},
"owner": "aliases"
},
"aliases_alias_lookup_alt": {
"type": "lookup",
"params": {
"autocommit": "true",
"from": "alias",
"ignore_nulls": "true",
"table": "aliases_alias_lookup_alt",
"to": "keyspace_id",
"write_only": "true"
},
"owner": "aliases"
}
},
"tables": {
"aliases": {
"columnVindexes": [
{
"column": "user_id",
"name": "hash"
},
{
"column": "alias",
"name": "aliases_alias_lookup"
},
{
"column": "alias",
"name": "aliases_alias_lookup_alt"
}
]
},
"aliases_alias_lookup": {
"columnVindexes": [
{
"column": "alias",
"name": "varchar_hash"
}
]
},
"aliases_alias_lookup_alt": {
"columnVindexes": [
{
"column": "alias",
"name": "varchar_hash"
}
]
}
}
}
}
- Deploy the following schema:
CREATE TABLE aliases (
`id` bigint NOT NULL AUTO_INCREMENT,
`alias` varchar(191) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`user_id` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_aliases_user_id` (`user_id`)
) ENGINE=InnoDB AUTO_INCREMENT=440699859 DEFAULT CHARSET=utf8mb3;
CREATE TABLE aliases_alias_lookup (
`id` bigint NOT NULL AUTO_INCREMENT,
`alias` varchar(191) COLLATE utf8mb4_general_ci NOT NULL,
`user_id` bigint(20) NOT NULL,
PRIMARY KEY (`id`),
KEY `idx_alias` (`alias`)
) ENGINE=InnoDB AUTO_INCREMENT=1237907453 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
CREATE TABLE aliases_alias_lookup_alt (
`alias` varchar(191) COLLATE utf8mb4_general_ci NOT NULL,
`keyspace_id` varbinary(16) NOT NULL,
PRIMARY KEY (`alias`,`keyspace_id`)
) ENGINE=InnoDB;
INSERT INTO aliases values ('hello', 1);
- Run
vexplain select alias, user_id from aliases where alias = 'hello';from the vtgate. Output:
{
"OperatorType": "VindexLookup",
"Variant": "Equal",
"Keyspace": {
"Name": "mainkeyspace",
"Sharded": true
},
"Values": [
"VARCHAR(\"hello\")"
],
"Vindex": "aliases_alias_lookup_alt",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "IN",
"Keyspace": {
"Name": "mainkeyspace",
"Sharded": true
},
"FieldQuery": "select alias, keyspace_id from aliases_alias_lookup_alt where 1 != 1",
"Query": "select alias, keyspace_id from aliases_alias_lookup_alt where alias in ::__vals",
"Table": "aliases_alias_lookup_alt",
"Values": [
":alias"
],
"Vindex": "varchar_hash"
},
{
"OperatorType": "Route",
"Variant": "ByDestination",
"Keyspace": {
"Name": "mainkeyspace",
"Sharded": true
},
"FieldQuery": "select alias, user_id from aliases where 1 != 1",
"Query": "select alias, user_id from aliases where alias = 'hello'",
"Table": "aliases"
}
]
} |
Notice that the table aliases_alias_lookup_alt is selected by the planner, which is in write_only mode. This results in the undesirable behavior of a scatter query. The desirable state is for the planner to select aliases_alias_lookup so that the scatter is prevented while aliases_alias_lookup_alt can be backfilled. The next steps showcase how changing the order of the vindexes results in the correct table being used.
- Deploy the vschema file with the order of vindexes changed
{
"mainkeyspace": {
"sharded": true,
"vindexes": {
"hash": {
"type": "hash"
},
"varchar_hash": {
"type": "unicode_loose_md5"
},
"aliases_alias_lookup": {
"type": "lookup_hash",
"params": {
"table": "aliases_alias_lookup",
"from": "alias",
"to": "user_id",
"ignore_nulls": "true",
"autocommit": "true"
},
"owner": "aliases"
},
"aliases_alias_lookup_alt": {
"type": "lookup",
"params": {
"autocommit": "true",
"from": "alias",
"ignore_nulls": "true",
"table": "aliases_alias_lookup_alt",
"to": "keyspace_id",
"write_only": "true"
},
"owner": "aliases"
}
},
"tables": {
"aliases": {
"columnVindexes": [
{
"column": "user_id",
"name": "hash"
},
{
"column": "alias",
"name": "aliases_alias_lookup_alt"
},
{
"column": "alias",
"name": "aliases_alias_lookup"
}
]
},
"aliases_alias_lookup": {
"columnVindexes": [
{
"column": "alias",
"name": "varchar_hash"
}
]
},
"aliases_alias_lookup_alt": {
"columnVindexes": [
{
"column": "alias",
"name": "varchar_hash"
}
]
}
}
}
}
Note that the alt table is listed first under columnVindexes
- Run
vexplain select alias, user_id from aliases where alias = 'hello';from the vtgate again. Output:
{
"OperatorType": "VindexLookup",
"Variant": "Equal",
"Keyspace": {
"Name": "mainkeyspace",
"Sharded": true
},
"Values": [
"VARCHAR(\"hello\")"
],
"Vindex": "aliases_alias_lookup",
"Inputs": [
{
"OperatorType": "Route",
"Variant": "IN",
"Keyspace": {
"Name": "mainkeyspace",
"Sharded": true
},
"FieldQuery": "select alias, user_id from aliases_alias_lookup where 1 != 1",
"Query": "select alias, user_id from aliases_alias_lookup where alias in ::__vals",
"Table": "aliases_alias_lookup",
"Values": [
":alias"
],
"Vindex": "varchar_hash"
},
{
"OperatorType": "Route",
"Variant": "ByDestination",
"Keyspace": {
"Name": "mainkeyspace",
"Sharded": true
},
"FieldQuery": "select alias, user_id from aliases where 1 != 1",
"Query": "select alias, user_id from aliases where alias = 'hello'",
"Table": "aliases"
}
]
}
Now aliases_alias_lookup is correctly used.
It's entirely possible that the decision logic for this case has not yet been implemented due to other Vitess consumers not having the requirement to upgrade their lookup table while keeping the old version alive. But having the write_only version not being prioritized explicitly seems like an important requirement for migrations of this type, as scatter queries can lead to app outages in high QPS scenarios.
Binary Version
Version: 16.0.1-SNAPSHOT (Git revision 0e79c68912ee1ebc0851c7ef4b2da5b190ebf9bb branch 'HEAD') built on Thu Jun 15 21:21:53 UTC 2023 by root@buildkitsandbox using go1.20.1 linux/amd64
### Operating System and Environment details
```sh
cat /etc/os-release
PRETTY_NAME="Debian GNU/Linux 11 (bullseye)"
NAME="Debian GNU/Linux"
VERSION_ID="11"
VERSION="11 (bullseye)"
VERSION_CODENAME=bullseye
ID=debian
HOME_URL="https://www.debian.org/"
SUPPORT_URL="https://www.debian.org/support"
BUG_REPORT_URL="https://bugs.debian.org/"
Kernel version: Linux 5.15.49-linuxkit-pr
Architecture: x86_64
Log Fragments
No response