Fix circuit breaker leak in MultiTerms aggregation#79362
Fix circuit breaker leak in MultiTerms aggregation#79362iverase merged 5 commits intoelastic:masterfrom
Conversation
|
Pinging @elastic/es-analytics-geo (Team:Analytics) |
DaveCTurner
left a comment
There was a problem hiding this comment.
LGTM in the sense that (1) the tests fail iff the fix is reverted and (2) the script I had to reproduce the problem no longer exhibits a leak.
Details
DELETE /test
# 200 OK
# {
# "acknowledged": true
# }
PUT /test
{
"mappings": {
"properties": {
"field1": {
"type": "keyword"
},
"field2": {
"type": "keyword"
}
}
},
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
}
}
# 200 OK
# {
# "shards_acknowledged": true,
# "acknowledged": true,
# "index": "test"
# }
POST /test/_bulk
{"index":{}}
{"field1":"aaa","field2":"bb"}
{"index":{}}
{"field1":"aaa","field2":"cc"}
{"index":{}}
{"field1":"aaa","field2":"dd"}
{"index":{}}
{"field1":"aaa","field2":"ee"}
{"index":{}}
{"field1":"aaa","field2":"ff"}
{"index":{}}
{"field1":"aaa","field2":"gg"}
{"index":{}}
{"field1":"aaa","field2":"hh"}
{"index":{}}
{"field1":"aaa","field2":"jj"}
{"index":{}}
{"field1":"aaa","field2":"kk"}
{"index":{}}
{"field1":"aaa","field2":"ll"}
# 200 OK
# {
# "took": 87,
# "items": [
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "PR50k3wBIyZAxwBsvzs1",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 0
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Ph50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 1
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Px50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 2
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "QB50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 3
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "QR50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 4
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Qh50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 5
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Qx50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 6
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "RB50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 7
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "RR50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 8
# }
# },
# {
# "index": {
# "status": 201,
# "_type": "_doc",
# "_primary_term": 1,
# "_id": "Rh50k3wBIyZAxwBsvzs3",
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "_index": "test",
# "result": "created",
# "_version": 1,
# "_seq_no": 9
# }
# }
# ],
# "errors": false
# }
POST /test/_refresh
# 200 OK
# {
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# }
# }
GET /_nodes/stats/breaker?filter_path=nodes.*.breakers.request
# 200 OK
# {
# "nodes": {
# "zKIBvy-7QpKw-Lizpwi0vA": {
# "breakers": {
# "request": {
# "estimated_size": "0b",
# "overhead": 1.0,
# "estimated_size_in_bytes": 0,
# "limit_size": "614.3mb",
# "tripped": 0,
# "limit_size_in_bytes": 644245094
# }
# }
# }
# }
# }
POST /_cache/clear
# 200 OK
# {
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# }
# }
GET /test/_search
{
"size": 0,
"aggs": {
"test": {
"multi_terms": {
"size": 1,
"terms": [
{
"field": "field1"
},
{
"field": "field2"
}
]
}
}
}
}
# 200 OK
# {
# "aggregations": {
# "test": {
# "buckets": [
# {
# "doc_count": 1,
# "key_as_string": "aaa|bb",
# "key": [
# "aaa",
# "bb"
# ]
# }
# ],
# "doc_count_error_upper_bound": 0,
# "sum_other_doc_count": 9
# }
# },
# "took": 26,
# "_shards": {
# "skipped": 0,
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "timed_out": false,
# "hits": {
# "max_score": null,
# "total": {
# "value": 10,
# "relation": "eq"
# },
# "hits": []
# }
# }
GET /_nodes/stats/breaker?filter_path=nodes.*.breakers.request
# 200 OK
# {
# "nodes": {
# "zKIBvy-7QpKw-Lizpwi0vA": {
# "breakers": {
# "request": {
# "estimated_size": "744b",
# "overhead": 1.0,
# "estimated_size_in_bytes": 744,
# "limit_size": "614.3mb",
# "tripped": 0,
# "limit_size_in_bytes": 644245094
# }
# }
# }
# }
# }
POST /_cache/clear
# 200 OK
# {
# "_shards": {
# "successful": 1,
# "total": 1,
# "failed": 0
# }
# }
GET /test/_search
{
"size": 0,
"aggs": {
"test": {
"multi_terms": {
"size": 1,
"terms": [
{
"field": "field1"
},
{
"field": "field2"
}
]
}
}
}
}
# 200 OK
# {
# "aggregations": {
# "test": {
# "buckets": [
# {
# "doc_count": 1,
# "key_as_string": "aaa|bb",
# "key": [
# "aaa",
# "bb"
# ]
# }
# ],
# "doc_count_error_upper_bound": 0,
# "sum_other_doc_count": 9
# }
# },
# "took": 3,
# "_shards": {
# "skipped": 0,
# "successful": 1,
# "total": 1,
# "failed": 0
# },
# "timed_out": false,
# "hits": {
# "max_score": null,
# "total": {
# "value": 10,
# "relation": "eq"
# },
# "hits": []
# }
# }
GET /_nodes/stats/breaker?filter_path=nodes.*.breakers.request
# 200 OK
# {
# "nodes": {
# "zKIBvy-7QpKw-Lizpwi0vA": {
# "breakers": {
# "request": {
# "estimated_size": "1.4kb",
# "overhead": 1.0,
# "estimated_size_in_bytes": 1488,
# "limit_size": "614.3mb",
# "tripped": 0,
# "limit_size_in_bytes": 644245094
# }
# }
# }
# }
# }
Leaving it to Igor for a final look tho, this isn't code which which I'm familiar.
nik9000
left a comment
There was a problem hiding this comment.
👍
I do think it'd be good to randomize whether or not we pre-allocate AggregatorTestCase. I'll bet we'd have caught this earlier if we did that.
|
@nik9000 I added logic in AggregatorTestCase to randomise preallocation. |
| query, | ||
| breakerService, | ||
| builder.bytesToPreallocate(), | ||
| randomBoolean() ? 0 : builder.bytesToPreallocate(), |
| protected void doClose() { | ||
| super.doClose(); | ||
| Releasables.close(this.analyzer, this.bytesRefHash); | ||
| Releasables.close(this.analyzer, this.bytesRefHash, bucketOrds); |
There was a problem hiding this comment.
@benwtrent looks like you had a leak that I didn't catch. I'm quite thankful for leak detecting test cases.
benwtrent
left a comment
There was a problem hiding this comment.
Thank you for fixing CategorizeTextAggregator.java as well!
|
@elasticmachine update branch |
|
@elasticmachine run elasticsearch-ci/part-1 |
The MultiTermsAggregator creates a BytesKeyedBucketOrds that never gets closed and therefore it might leak the memory allocated into the circuit breaker.
💔 Backport failed
You can use sqren/backport to manually backport by running |
The MultiTermsAggregator creates a BytesKeyedBucketOrds that never gets closed and therefore it might leak the memory allocated into the circuit breaker.
* upstream/master: (34 commits) Add extensionName() to security extension (elastic#79329) More robust and consistent allowAll indicesAccessControl (elastic#79415) Fix circuit breaker leak in MultiTerms aggregation (elastic#79362) guard geoline aggregation from parents aggegator that emit empty buckets (elastic#79129) Vector tiles: increase the size of the envelope used to clip geometries (elastic#79030) Revert "[ML] Add queue_capacity setting to start deployment API (elastic#79369)" (elastic#79374) Convert token service license object to LicensedFeature (elastic#79284) [TEST] Fix ShardPathTests for MDP (elastic#79393) Fix fleet search API with no checkpints (elastic#79400) Reduce BWC version for transient settings (elastic#79396) EQL: Rename a test class for eclipse (elastic#79254) Use search_coordination threadpool in field caps (elastic#79378) Use query param instead of a system property for opting in for new cluster health response code (elastic#79351) Add new kNN search endpoint (elastic#79013) Disable BWC tests Convert auditing license object to LicensedFeature (elastic#79280) Update BWC versions after backport of elastic#78551 Enable InstantiatingObjectParser to pass context as a first argument (elastic#79206) Move xcontent filtering tests (elastic#79298) Update links to Fleet/Agent docs (elastic#79303) ...
The MultiTermsAggregator creates a BytesKeyedBucketOrds that never gets closed and therefore it might leak the memory allocated into the circuit breaker.