Skip to content

Bug Report: write_only vindexes are incorrectly prioritized #13486

@aparajon

Description

@aparajon

Overview of the Issue

During our effort to migrate lookup tables from the legacy type of lookup_hash to lookup, we encountered an issue that resulted in the incorrect vindex being selected when write_only mode was used, resulting in scatter queries. This migration involves the following high level steps:

  1. Create new lookup table with the new lookup type. Set write_only to true
  2. Backfill the new lookup table
  3. Remove the write_only flag on the new table and set write_only to true on the old lookup table
  4. Remove the old lookup table

Reproduction Steps

  1. Deploy the following vschema:
{
  "mainkeyspace": {
    "sharded": true,
    "vindexes": {
      "hash": {
        "type": "hash"
      },
      "varchar_hash": {
        "type": "unicode_loose_md5"
      },
      "aliases_alias_lookup": {
        "type": "lookup_hash",
        "params": {
          "table": "aliases_alias_lookup",
          "from": "alias",
          "to": "user_id",
          "ignore_nulls": "true",
          "autocommit": "true"
        },
        "owner": "aliases"
      },
      "aliases_alias_lookup_alt": {
        "type": "lookup",
        "params": {
          "autocommit": "true",
          "from": "alias",
          "ignore_nulls": "true",
          "table": "aliases_alias_lookup_alt",
          "to": "keyspace_id",
          "write_only": "true"
        },
        "owner": "aliases"
      }
    },
    "tables": {
      "aliases": {
        "columnVindexes": [
          {
            "column": "user_id",
            "name": "hash"
          },
          {
            "column": "alias",
            "name": "aliases_alias_lookup"
          },
          {
            "column": "alias",
            "name": "aliases_alias_lookup_alt"
          }
        ]
      },
      "aliases_alias_lookup": {
        "columnVindexes": [
          {
            "column": "alias",
            "name": "varchar_hash"
          }
        ]
      },
      "aliases_alias_lookup_alt": {
        "columnVindexes": [
          {
            "column": "alias",
            "name": "varchar_hash"
          }
        ]
      }
    }
  }
}
  1. Deploy the following schema:
CREATE TABLE aliases (
  `id` bigint NOT NULL AUTO_INCREMENT,
  `alias` varchar(191) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
  `user_id` bigint(20) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `idx_aliases_user_id` (`user_id`)
) ENGINE=InnoDB AUTO_INCREMENT=440699859 DEFAULT CHARSET=utf8mb3;

CREATE TABLE aliases_alias_lookup (
  `id` bigint NOT NULL AUTO_INCREMENT,
  `alias` varchar(191) COLLATE utf8mb4_general_ci NOT NULL,
  `user_id` bigint(20) NOT NULL,
  PRIMARY KEY (`id`),
  KEY `idx_alias` (`alias`)
) ENGINE=InnoDB AUTO_INCREMENT=1237907453 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;

CREATE TABLE aliases_alias_lookup_alt (
    `alias` varchar(191) COLLATE utf8mb4_general_ci NOT NULL,
    `keyspace_id` varbinary(16) NOT NULL,
    PRIMARY KEY (`alias`,`keyspace_id`)
) ENGINE=InnoDB;

INSERT INTO aliases values ('hello', 1);
  1. Run vexplain select alias, user_id from aliases where alias = 'hello'; from the vtgate. Output:
{
	"OperatorType": "VindexLookup",
	"Variant": "Equal",
	"Keyspace": {
		"Name": "mainkeyspace",
		"Sharded": true
	},
	"Values": [
		"VARCHAR(\"hello\")"
	],
	"Vindex": "aliases_alias_lookup_alt",
	"Inputs": [
		{
			"OperatorType": "Route",
			"Variant": "IN",
			"Keyspace": {
				"Name": "mainkeyspace",
				"Sharded": true
			},
			"FieldQuery": "select alias, keyspace_id from aliases_alias_lookup_alt where 1 != 1",
			"Query": "select alias, keyspace_id from aliases_alias_lookup_alt where alias in ::__vals",
			"Table": "aliases_alias_lookup_alt",
			"Values": [
				":alias"
			],
			"Vindex": "varchar_hash"
		},
		{
			"OperatorType": "Route",
			"Variant": "ByDestination",
			"Keyspace": {
				"Name": "mainkeyspace",
				"Sharded": true
			},
			"FieldQuery": "select alias, user_id from aliases where 1 != 1",
			"Query": "select alias, user_id from aliases where alias = 'hello'",
			"Table": "aliases"
		}
	]
} |

Notice that the table aliases_alias_lookup_alt is selected by the planner, which is in write_only mode. This results in the undesirable behavior of a scatter query. The desirable state is for the planner to select aliases_alias_lookup so that the scatter is prevented while aliases_alias_lookup_alt can be backfilled. The next steps showcase how changing the order of the vindexes results in the correct table being used.

  1. Deploy the vschema file with the order of vindexes changed
{
    "mainkeyspace": {
      "sharded": true,
      "vindexes": {
        "hash": {
          "type": "hash"
        },
        "varchar_hash": {
          "type": "unicode_loose_md5"
        },
        "aliases_alias_lookup": {
          "type": "lookup_hash",
          "params": {
            "table": "aliases_alias_lookup",
            "from": "alias",
            "to": "user_id",
            "ignore_nulls": "true",
            "autocommit": "true"
          },
          "owner": "aliases"
        },
        "aliases_alias_lookup_alt": {
          "type": "lookup",
          "params": {
            "autocommit": "true",
            "from": "alias",
            "ignore_nulls": "true",
            "table": "aliases_alias_lookup_alt",
            "to": "keyspace_id",
            "write_only": "true"
          },
          "owner": "aliases"
        }
      },
      "tables": {
        "aliases": {
          "columnVindexes": [
            {
              "column": "user_id",
              "name": "hash"
            },
            {
              "column": "alias",
              "name": "aliases_alias_lookup_alt"
            },
            {
              "column": "alias",
              "name": "aliases_alias_lookup"
            }
          ]
        },
        "aliases_alias_lookup": {
          "columnVindexes": [
            {
              "column": "alias",
              "name": "varchar_hash"
            }
          ]
        },
        "aliases_alias_lookup_alt": {
          "columnVindexes": [
            {
              "column": "alias",
              "name": "varchar_hash"
            }
          ]
        }
      }
    }
  }

Note that the alt table is listed first under columnVindexes

  1. Run vexplain select alias, user_id from aliases where alias = 'hello'; from the vtgate again. Output:
{
	"OperatorType": "VindexLookup",
	"Variant": "Equal",
	"Keyspace": {
		"Name": "mainkeyspace",
		"Sharded": true
	},
	"Values": [
		"VARCHAR(\"hello\")"
	],
	"Vindex": "aliases_alias_lookup",
	"Inputs": [
		{
			"OperatorType": "Route",
			"Variant": "IN",
			"Keyspace": {
				"Name": "mainkeyspace",
				"Sharded": true
			},
			"FieldQuery": "select alias, user_id from aliases_alias_lookup where 1 != 1",
			"Query": "select alias, user_id from aliases_alias_lookup where alias in ::__vals",
			"Table": "aliases_alias_lookup",
			"Values": [
				":alias"
			],
			"Vindex": "varchar_hash"
		},
		{
			"OperatorType": "Route",
			"Variant": "ByDestination",
			"Keyspace": {
				"Name": "mainkeyspace",
				"Sharded": true
			},
			"FieldQuery": "select alias, user_id from aliases where 1 != 1",
			"Query": "select alias, user_id from aliases where alias = 'hello'",
			"Table": "aliases"
		}
	]
} 

Now aliases_alias_lookup is correctly used.

It's entirely possible that the decision logic for this case has not yet been implemented due to other Vitess consumers not having the requirement to upgrade their lookup table while keeping the old version alive. But having the write_only version not being prioritized explicitly seems like an important requirement for migrations of this type, as scatter queries can lead to app outages in high QPS scenarios.

Binary Version

Version: 16.0.1-SNAPSHOT (Git revision 0e79c68912ee1ebc0851c7ef4b2da5b190ebf9bb branch 'HEAD') built on Thu Jun 15 21:21:53 UTC 2023 by root@buildkitsandbox using go1.20.1 linux/amd64


### Operating System and Environment details

```sh
cat /etc/os-release
PRETTY_NAME="Debian GNU/Linux 11 (bullseye)"
NAME="Debian GNU/Linux"
VERSION_ID="11"
VERSION="11 (bullseye)"
VERSION_CODENAME=bullseye
ID=debian
HOME_URL="https://www.debian.org/"
SUPPORT_URL="https://www.debian.org/support"
BUG_REPORT_URL="https://bugs.debian.org/"

Kernel version: Linux 5.15.49-linuxkit-pr

Architecture: x86_64

Log Fragments

No response

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions