-
Notifications
You must be signed in to change notification settings - Fork 29
Multi-Node Cluster Hangs on Zentity 1.6.1 Requests #56
Description
Hey there, I'm currently experiencing an issue running Zentity 1.6.1 w/ Elasticsearch 7.10.1 inside a multi-node cluster, but not on a single-node cluster. When sending alernating setup/ delete requests (as well as with other requests), it sometimes hangs and looks like the Elasticsearch CoordinatorPublication gets gummed up. I can replicate this both in a local docker-compose setup (attached below) and in Kubernetes with an elastic-on-k8s cluster w/ 3 master and 3 data nodes.
Here are the logs from the docker-compose setup, where I've deleted then created the index, and the coordination hangs for 30+ seconds:
elasticsearch | {"type": "server", "timestamp": "2021-01-22T15:56:16,893Z", "level": "INFO", "component": "o.e.c.m.MetadataDeleteIndexService", "cluster.name": "docker-cluster", "node.name": "primary", "message": "[.zentity-models/kCCUX_6bS3CZeDQzImGi2A] deleting index", "cluster.uuid": "Zi3JrTDvRkmyjizI6z-6QQ", "node.id": "eZpuNPEsRqKPl6bhvojRJQ" }
elasticsearch | {"type": "deprecation", "timestamp": "2021-01-22T15:56:31,234Z", "level": "DEPRECATION", "component": "o.e.d.c.m.MetadataCreateIndexService", "cluster.name": "docker-cluster", "node.name": "primary", "message": "index name [.zentity-models] starts with a dot '.', in the next major version, index names starting with a dot are reserved for hidden indices and system indices", "cluster.uuid": "Zi3JrTDvRkmyjizI6z-6QQ", "node.id": "eZpuNPEsRqKPl6bhvojRJQ" }
elasticsearch | {"type": "server", "timestamp": "2021-01-22T15:56:31,309Z", "level": "INFO", "component": "o.e.c.m.MetadataCreateIndexService", "cluster.name": "docker-cluster", "node.name": "primary", "message": "[.zentity-models] creating index, cause [api], templates [], shards [1]/[1]", "cluster.uuid": "Zi3JrTDvRkmyjizI6z-6QQ", "node.id": "eZpuNPEsRqKPl6bhvojRJQ" }
elasticsearch | {"type": "server", "timestamp": "2021-01-22T15:56:41,313Z", "level": "INFO", "component": "o.e.c.c.C.CoordinatorPublication", "cluster.name": "docker-cluster", "node.name": "primary", "message": "after [10s] publication of cluster state version [928] is still waiting for {es-data-2}{Xjwq8qUrReyh5VUi21l3aQ}{btWNi8GkTJaAjVjbcQxe2g}{172.19.0.2}{172.19.0.2:9300}{dir} [SENT_PUBLISH_REQUEST]", "cluster.uuid": "Zi3JrTDvRkmyjizI6z-6QQ", "node.id": "eZpuNPEsRqKPl6bhvojRJQ" }
elasticsearch | {"type": "server", "timestamp": "2021-01-22T15:57:01,314Z", "level": "WARN", "component": "o.e.c.c.C.CoordinatorPublication", "cluster.name": "docker-cluster", "node.name": "primary", "message": "after [30s] publication of cluster state version [928] is still waiting for {es-data-2}{Xjwq8qUrReyh5VUi21l3aQ}{btWNi8GkTJaAjVjbcQxe2g}{172.19.0.2}{172.19.0.2:9300}{dir} [SENT_PUBLISH_REQUEST]", "cluster.uuid": "Zi3JrTDvRkmyjizI6z-6QQ", "node.id": "eZpuNPEsRqKPl6bhvojRJQ" }
Do you think this originates in the plugin or in a misconfiguration of the clusters?
Docker Compose file
version: '3.7'x-plugin-volume: &plugin-volume "./target/releases/:/plugins"
x-base-es: &base-es
image: docker.elastic.co/elasticsearch/elasticsearch-oss:${ES_VERSION:-7.10.2}
user: "elasticsearch"install all plugins in mounted /plugin directory and start the elasticsearch server
command:
- /bin/bash
- -c
- elasticsearch-plugin install --batch https://zentity.io/releases/zentity-1.6.1-elasticsearch-7.10.2.zip && elasticsearch
ulimits:
nofile:
soft: 65536
hard: 65536
memlock:
soft: -1
hard: -1
environment: &base-env
cluster.name: docker-cluster
network.host: 0.0.0.0
# minimum_master_nodes need to be explicitly set when bound on a public IP
# set to 1 to allow single node clusters
# Details: elastic/elasticsearch#17288
discovery.zen.minimum_master_nodes: "1"
# Reduce virtual memory requirements, see docker/for-win#5202 (comment)
bootstrap.memory_lock: "false"
ES_JAVA_OPTS: "-Xms512m -Xmx512m -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=0.0.0.0:5050"
http.cors.enabled: "true"
http.cors.allow-origin: "*"
cluster.initial_master_nodes: primary
networks:
- elasticx-base-primary-node: &base-primary-node
<<: *base-es
environment:
<<: *base-env
node.name: primary
node.master: "true"
node.data: "false"
node.ingest: "false"x-base-data-node: &base-data-node
<<: *base-es
environment:
<<: *base-env
discovery.zen.ping.unicast.hosts: elasticsearch
node.master: "false"
node.data: "true"
node.ingest: "true"services:
elasticsearch:
<<: *base-primary-node
hostname: elasticsearch
container_name: elasticsearch
volumes:
- *plugin-volume
- es-primary:/usr/share/elasticsearch/data
ports:
- "${ES_PORT:-9200}:9200" # http
- "${DEBUGGER_PORT:-5050}:5050" # debuggeres-data-1:
<<: *base-data-node
hostname: es-data-1
container_name: es-data-1
volumes:
- *plugin-volume
- es-data-1:/usr/share/elasticsearch/data
ports:
- "${DEBUGGER_PORT_DATA_1:-5051}:5050" # debuggeres-data-2:
<<: *base-data-node
hostname: es-data-2
container_name: es-data-2
volumes:
- *plugin-volume
- es-data-2:/usr/share/elasticsearch/data
ports:
- "${DEBUGGER_PORT_DATA_2:-5052}:5050" # debuggerkibana:
image: docker.elastic.co/kibana/kibana-oss:${KIBANA_VERSION:-7.10.1}
hostname: kibana
container_name: kibana
logging:
driver: none
environment:
- server.host=0.0.0.0
- server.name=kibana.local
- elasticsearch.url=http://elasticsearch:9200
ports:
- '${KIBANA_PORT:-5601}:5601'
networks:
- elasticvolumes:
es-primary:
driver: local
es-data-1:
driver: local
es-data-2:
driver: localnetworks:
elastic:
Elastic K8s manifest
apiVersion: elasticsearch.k8s.elastic.co/v1
kind: Elasticsearch
metadata:
annotations:
common.k8s.elastic.co/controller-version: 1.3.1
elasticsearch.k8s.elastic.co/cluster-uuid: 8xDpRuE4T8ufu_KSJV4hFw
creationTimestamp: "2021-01-20T17:28:29Z"
generation: 4
labels:
app.kubernetes.io/instance: eck-entity-resolution
app.kubernetes.io/managed-by: Tiller
app.kubernetes.io/name: eck-entity-resolution
app.kubernetes.io/part-of: eck
app.kubernetes.io/version: 1.1.2
helm.sh/chart: eck-entity-resolution-0.3.0
name: eck-entity-resolution
namespace: entity-resolution
resourceVersion: "273469952"
selfLink: /apis/elasticsearch.k8s.elastic.co/v1/namespaces/entity-resolution/elasticsearches/eck-entity-resolution
uid: cff37de2-c6c3-4ebd-a230-e45f00bdc7e7
spec:
auth:
fileRealm:
- secretName: eck-entity-resolution-users
roles:
- secretName: eck-entity-resolution-roles
http:
service:
metadata:
creationTimestamp: null
spec: {}
tls:
certificate: {}
selfSignedCertificate:
disabled: true
nodeSets:
- config:
node.data: false
node.ingest: false
node.master: true
count: 3
name: primary-node
podTemplate:
spec:
containers:
- env:
- name: ES_JAVA_OPTS
value: -Xms500m -Xmx500m
name: elasticsearch
resources:
limits:
cpu: 1
memory: 1Gi
requests:
cpu: 0.5
memory: 1Gi
initContainers:
- command:
- sh
- -c
- |
bin/elasticsearch-plugin install --batch https://github.com/zentity-io/zentity/releases/download/zentity-1.6.1/zentity-1.6.1-elasticsearch-7.10.1.zip
name: install-plugins
- command:
- sh
- -c
- sysctl -w vm.max_map_count=262144
name: sysctl
securityContext:
privileged: true
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
storageClassName: standard-expandable
- config:
node.data: true
node.ingest: true
node.master: false
count: 3
name: data-node
podTemplate:
containers:
- env:
- name: ES_JAVA_OPTS
value: -Xms4g -Xmx4g -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=0.0.0.0:5005
name: elasticsearch
resources:
limits:
cpu: 2
memory: 8Gi
requests:
cpu: 0.5
memory: 8Gi
spec:
initContainers:
- command:
- sh
- -c
- |
bin/elasticsearch-plugin install --batch https://github.com/zentity-io/zentity/releases/download/zentity-1.6.1/zentity-1.6.1-elasticsearch-7.10.1.zip
name: install-plugins
- command:
- sh
- -c
- sysctl -w vm.max_map_count=262144
name: sysctl
securityContext:
privileged: true
volumeClaimTemplates:
- metadata:
name: elasticsearch-data
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 25Gi
storageClassName: sdd-fast-expandable
transport:
service:
metadata:
creationTimestamp: null
spec: {}
updateStrategy:
changeBudget: {}
version: 7.10.1
status:
availableNodes: 6
health: green
phase: Ready
version: 7.10.1