Skip to content

ES doesn't exhaust options for allocation leaving unassigned shards. #12273

@johntdyer

Description

@johntdyer

Shared 5 will not get assigned after an upgrade from 1.5.0 to 1.6.0.

[root@ls2-es-lb ~]# curl -XGET "http://localhost:9200/_cluster/state/routing_table,routing_nodes/logstash-cdr-2015.05.18" | jq '.'
{
  "allocations": [],
  "routing_nodes": {
    "nodes": {
      "Ts0HJNFvSGy2JVd31VlotQ": [
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 1,
          "relocating_node": null,
          "node": "Ts0HJNFvSGy2JVd31VlotQ",
          "primary": false,
          "state": "STARTED"
        },
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 2,
          "relocating_node": null,
          "node": "Ts0HJNFvSGy2JVd31VlotQ",
          "primary": false,
          "state": "STARTED"
        }
      ],
      "6AS8BMZKQkivehCUWANRdQ": [
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 3,
          "relocating_node": null,
          "node": "6AS8BMZKQkivehCUWANRdQ",
          "primary": true,
          "state": "STARTED"
        },
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 1,
          "relocating_node": null,
          "node": "6AS8BMZKQkivehCUWANRdQ",
          "primary": true,
          "state": "STARTED"
        }
      ],
      "6fs0j8RWQ2esU7wgvAPcdg": [
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 4,
          "relocating_node": null,
          "node": "6fs0j8RWQ2esU7wgvAPcdg",
          "primary": false,
          "state": "STARTED"
        },
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 2,
          "relocating_node": null,
          "node": "6fs0j8RWQ2esU7wgvAPcdg",
          "primary": true,
          "state": "STARTED"
        }
      ],
      "srLX4NZDTIaHq9qBVsxcZw": [
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 0,
          "relocating_node": null,
          "node": "srLX4NZDTIaHq9qBVsxcZw",
          "primary": true,
          "state": "STARTED"
        },
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 3,
          "relocating_node": null,
          "node": "srLX4NZDTIaHq9qBVsxcZw",
          "primary": false,
          "state": "STARTED"
        }
      ],
      "DnCwjImuRFOsranelYuOaw": [
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 5,
          "relocating_node": null,
          "node": "DnCwjImuRFOsranelYuOaw",
          "primary": true,
          "state": "STARTED"
        }
      ],
      "3ZOu2V5xSX-BxL2Osd5l7A": [
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 4,
          "relocating_node": null,
          "node": "3ZOu2V5xSX-BxL2Osd5l7A",
          "primary": true,
          "state": "STARTED"
        },
        {
          "index": "logstash-cdr-2015.05.18",
          "shard": 0,
          "relocating_node": null,
          "node": "3ZOu2V5xSX-BxL2Osd5l7A",
          "primary": false,
          "state": "STARTED"
        }
      ]
    },
    "unassigned": [
      {
        "index": "logstash-cdr-2015.05.18",
        "shard": 5,
        "relocating_node": null,
        "node": null,
        "primary": false,
        "state": "UNASSIGNED"
      }
    ]
  },
  "routing_table": {
    "indices": {
      "logstash-cdr-2015.05.18": {
        "shards": {
          "2": [
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 2,
              "relocating_node": null,
              "node": "6fs0j8RWQ2esU7wgvAPcdg",
              "primary": true,
              "state": "STARTED"
            },
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 2,
              "relocating_node": null,
              "node": "Ts0HJNFvSGy2JVd31VlotQ",
              "primary": false,
              "state": "STARTED"
            }
          ],
          "5": [
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 5,
              "relocating_node": null,
              "node": "DnCwjImuRFOsranelYuOaw",
              "primary": true,
              "state": "STARTED"
            },
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 5,
              "relocating_node": null,
              "node": null,
              "primary": false,
              "state": "UNASSIGNED"
            }
          ],
          "1": [
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 1,
              "relocating_node": null,
              "node": "6AS8BMZKQkivehCUWANRdQ",
              "primary": true,
              "state": "STARTED"
            },
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 1,
              "relocating_node": null,
              "node": "Ts0HJNFvSGy2JVd31VlotQ",
              "primary": false,
              "state": "STARTED"
            }
          ],
          "3": [
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 3,
              "relocating_node": null,
              "node": "srLX4NZDTIaHq9qBVsxcZw",
              "primary": false,
              "state": "STARTED"
            },
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 3,
              "relocating_node": null,
              "node": "6AS8BMZKQkivehCUWANRdQ",
              "primary": true,
              "state": "STARTED"
            }
          ],
          "0": [
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 0,
              "relocating_node": null,
              "node": "3ZOu2V5xSX-BxL2Osd5l7A",
              "primary": false,
              "state": "STARTED"
            },
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 0,
              "relocating_node": null,
              "node": "srLX4NZDTIaHq9qBVsxcZw",
              "primary": true,
              "state": "STARTED"
            }
          ],
          "4": [
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 4,
              "relocating_node": null,
              "node": "3ZOu2V5xSX-BxL2Osd5l7A",
              "primary": true,
              "state": "STARTED"
            },
            {
              "index": "logstash-cdr-2015.05.18",
              "shard": 4,
              "relocating_node": null,
              "node": "6fs0j8RWQ2esU7wgvAPcdg",
              "primary": false,
              "state": "STARTED"
            }
          ]
        }
      }
    }
  },
  "cluster_name": "tropo-es"
}

I tried to force a re-route w/ the following script but it didnt work

for h in 3ZOu2V5xSX-BxL2Osd5l7A srLX4NZDTIaHq9qBVsxcZw 6fs0j8RWQ2esU7wgvAPcdg 6AS8BMZKQkivehCUWANRdQ DnCwjImuRFOsranelYuOaw Ts0HJNFvSGy2JVd31VlotQ; do
  curl -sw "%{http_code}" -XPOST -d '{ "commands" : [ { "allocate" : { "shard": 5, "index": "logstash-cdr-2015.05.18", "node" : "'"$h"'"  } } ] }'   'http://ls2-es-lb.int.tropo.com:9200/_cluster/reroute?pretty'  | jq '.'

# jdyer at JOHNDYE-M-F9G6 in ~/Projects/logstash-input-stomp on git:master o [13:37:32]
$ for h in 3ZOu2V5xSX-BxL2Osd5l7A srLX4NZDTIaHq9qBVsxcZw 6fs0j8RWQ2esU7wgvAPcdg 6AS8BMZKQkivehCUWANRdQ DnCwjImuRFOsranelYuOaw Ts0HJNFvSGy2JVd31VlotQ; do
for>   curl -sw "%{http_code}" -XPOST -d '{ "commands" : [ { "allocate" : { "shard": 5, "index": "logstash-cdr-2015.05.18", "node" : "'"$h"'"  } } ] }'   'http://ls2-es-lb.int.tropo.com:9200/_cluster/reroute?pretty'  | jq '.'
for> done
{
  "error": "ElasticsearchIllegalArgumentException[[allocate] allocation of [logstash-cdr-2015.05.18][5] on node [ls2-es1.int.tropo.com][3ZOu2V5xSX-BxL2Osd5l7A][ls2-es1][inet[/10.1.0.103:9300]]{master=false} is not allowed, reason: [YES(shard is not allocated to same node or host)][YES(node passes include/exclude/require filters)][YES(primary is already active)][YES(below shard recovery limit of [2])][YES(allocation disabling is ignored)][YES(allocation disabling is ignored)][YES(no allocation awareness enabled)][NO(too many shards for this index on node [2], limit: [2])][YES(target node version [1.6.0] is same or newer than source node version [1.6.0])][YES(enough disk for shard on node, free: [468.2gb])][YES(shard not primary or relocation disabled)]]",
  "status": 400
}
400
{
  "error": "ElasticsearchIllegalArgumentException[[allocate] allocation of [logstash-cdr-2015.05.18][5] on node [ls2-es2.int.tropo.com][srLX4NZDTIaHq9qBVsxcZw][ls2-es2][inet[/10.1.0.102:9300]]{master=false} is not allowed, reason: [YES(shard is not allocated to same node or host)][YES(node passes include/exclude/require filters)][YES(primary is already active)][YES(below shard recovery limit of [2])][YES(allocation disabling is ignored)][YES(allocation disabling is ignored)][YES(no allocation awareness enabled)][NO(too many shards for this index on node [2], limit: [2])][YES(target node version [1.6.0] is same or newer than source node version [1.6.0])][YES(enough disk for shard on node, free: [469.7gb])][YES(shard not primary or relocation disabled)]]",
  "status": 400
}
400
{
  "error": "ElasticsearchIllegalArgumentException[[allocate] allocation of [logstash-cdr-2015.05.18][5] on node [ls2-es3.int.tropo.com][6fs0j8RWQ2esU7wgvAPcdg][ls2-es3][inet[/10.1.0.101:9300]]{master=false} is not allowed, reason: [YES(shard is not allocated to same node or host)][YES(node passes include/exclude/require filters)][YES(primary is already active)][YES(below shard recovery limit of [2])][YES(allocation disabling is ignored)][YES(allocation disabling is ignored)][YES(no allocation awareness enabled)][NO(too many shards for this index on node [2], limit: [2])][YES(target node version [1.6.0] is same or newer than source node version [1.6.0])][YES(enough disk for shard on node, free: [472.2gb])][YES(shard not primary or relocation disabled)]]",
  "status": 400
}
400
{
  "error": "ElasticsearchIllegalArgumentException[[allocate] allocation of [logstash-cdr-2015.05.18][5] on node [ls2-es4.int.tropo.com][6AS8BMZKQkivehCUWANRdQ][ls2-es4][inet[/10.1.0.104:9300]]{master=false} is not allowed, reason: [YES(shard is not allocated to same node or host)][YES(node passes include/exclude/require filters)][YES(primary is already active)][YES(below shard recovery limit of [2])][YES(allocation disabling is ignored)][YES(allocation disabling is ignored)][YES(no allocation awareness enabled)][NO(too many shards for this index on node [2], limit: [2])][YES(target node version [1.6.0] is same or newer than source node version [1.6.0])][YES(enough disk for shard on node, free: [481gb])][YES(shard not primary or relocation disabled)]]",
  "status": 400
}
400
{
  "error": "ElasticsearchIllegalArgumentException[[allocate] allocation of [logstash-cdr-2015.05.18][5] on node [ls2-es5.int.tropo.com][DnCwjImuRFOsranelYuOaw][ls2-es5][inet[/10.1.0.55:9300]]{master=false} is not allowed, reason: [NO(shard cannot be allocated on same node [DnCwjImuRFOsranelYuOaw] it already exists on)][YES(node passes include/exclude/require filters)][YES(primary is already active)][YES(below shard recovery limit of [2])][YES(allocation disabling is ignored)][YES(allocation disabling is ignored)][YES(no allocation awareness enabled)][YES(shard count under limit [2] of total shards per node)][YES(target node version [1.6.0] is same or newer than source node version [1.6.0])][YES(enough disk for shard on node, free: [466.9gb])][YES(shard not primary or relocation disabled)]]",
  "status": 400
}
400
{
  "error": "ElasticsearchIllegalArgumentException[[allocate] allocation of [logstash-cdr-2015.05.18][5] on node [ls2-es6.int.tropo.com][Ts0HJNFvSGy2JVd31VlotQ][ls2-es6.int.tropo.com][inet[/10.1.0.106:9300]]{master=false} is not allowed, reason: [YES(shard is not allocated to same node or host)][YES(node passes include/exclude/require filters)][YES(primary is already active)][YES(below shard recovery limit of [2])][YES(allocation disabling is ignored)][YES(allocation disabling is ignored)][YES(no allocation awareness enabled)][NO(too many shards for this index on node [2], limit: [2])][YES(target node version [1.6.0] is same or newer than source node version [1.6.0])][YES(enough disk for shard on node, free: [483.3gb])][YES(shard not primary or relocation disabled)]]",
  "status": 400
}
400

this is the only unassigned shared since the restart and I am not sure how to get it back to green. Any advice ?

Thanks

Metadata

Metadata

Assignees

No one assigned

    Labels

    :Distributed/AllocationAll issues relating to the decision making around placing a shard (both master logic & on the nodes)>bugTeam:DistributedMeta label for distributed team.

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions