Description
BSC recently upgrade the goleveldb version indirectly to the latest commit, i.e. from: 20210819022825-2ae1ddf74ef7 to 20220721030215-126854af5e6d
When we tried to restart the node, it took quite a long time, more several minutes, eventually force killed.
After some investigation, we found the node got stuck on leveldb.(*DB).Close(), the database is quite large on BSC. But it was ok with an older version. Dive deeper, we found it was caused by the levelDB upgrade, after downgrade to the previous version, the issue is gone.
Some Information to share:
- The size of the database is ~2.5TB
- some of the call stack dumped during DB close
//== dump 1:
goroutine 18035443 [runnable]:
github.com/syndtr/goleveldb/leveldb/cache.(*lru).Evict(0xc002b104c0?, 0xc06977fa40?)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/lru.go:138 +0x175
github.com/syndtr/goleveldb/leveldb/cache.(*Cache).Close.func1({0xc26ac58000, 0x131ce, 0xc1481f3b60?})
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:661 +0x7f
github.com/syndtr/goleveldb/leveldb/cache.(*mHead).enumerateNodesWithCB(0xc144998380, 0xc1ddfb3b98)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:327 +0x58
github.com/syndtr/goleveldb/leveldb/cache.(*Cache).Close(0xc0018823f0, 0x1)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:652 +0xb0
github.com/syndtr/goleveldb/leveldb.(*tOps).close(0xc002b0e690)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/table.go:494 +0x2a
github.com/syndtr/goleveldb/leveldb.(*session).close(0xc001ed4e10)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/session.go:101 +0x29
github.com/syndtr/goleveldb/leveldb.(*DB).Close(0x17?)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/db.go:1218 +0x2b4
github.com/ethereum/go-ethereum/ethdb/leveldb.(*Database).Close(0xc002252200)
/home/runner/work/bsc/bsc/ethdb/leveldb/leveldb.go:180 +0x205
github.com/ethereum/go-ethereum/core/rawdb.(*freezerdb).Close(0xc0018f45a0)
/home/runner/work/bsc/bsc/core/rawdb/database.go:50 +0xd0
github.com/ethereum/go-ethereum/node.(*closeTrackingDB).Close(0xc0002827c8)
/home/runner/work/bsc/bsc/node/node.go:698 +0x99
github.com/ethereum/go-ethereum/eth.(*Ethereum).Stop(0xc0002a6360)
/home/runner/work/bsc/bsc/eth/backend.go:684 +0x158
github.com/ethereum/go-ethereum/node.(*Node).stopServices(0xc00228c410, {0xc01dfece40, 0x2, 0x3be62318a99cbdfe?})
/home/runner/work/bsc/bsc/node/node.go:321 +0xbd
github.com/ethereum/go-ethereum/node.(*Node).Close(0xc00228c410)
/home/runner/work/bsc/bsc/node/node.go:241 +0x191
created by github.com/ethereum/go-ethereum/cmd/utils.StartNode.func1.1
/home/runner/work/bsc/bsc/cmd/utils/cmd.go:92 +0x96
//== dump 2: 17s
goroutine 18035443 [runnable]:
github.com/syndtr/goleveldb/leveldb/cache.(*Node).callFinalizer(0xc1ca643b20)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:722 +0xf9
github.com/syndtr/goleveldb/leveldb/cache.(*Cache).Close.func1({0xc273374000, 0x2190f, 0xc1481f3b60?})
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:665 +0xae
github.com/syndtr/goleveldb/leveldb/cache.(*mHead).enumerateNodesWithCB(0xc144998380, 0xc1ddfb3b98)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:327 +0x58
github.com/syndtr/goleveldb/leveldb/cache.(*Cache).Close(0xc0018823f0, 0x1)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:652 +0xb0
github.com/syndtr/goleveldb/leveldb.(*tOps).close(0xc002b0e690)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/table.go:494 +0x2a
github.com/syndtr/goleveldb/leveldb.(*session).close(0xc001ed4e10)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/session.go:101 +0x29
github.com/syndtr/goleveldb/leveldb.(*DB).Close(0x17?)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/db.go:1218 +0x2b4
github.com/ethereum/go-ethereum/ethdb/leveldb.(*Database).Close(0xc002252200)
/home/runner/work/bsc/bsc/ethdb/leveldb/leveldb.go:180 +0x205
github.com/ethereum/go-ethereum/core/rawdb.(*freezerdb).Close(0xc0018f45a0)
/home/runner/work/bsc/bsc/core/rawdb/database.go:50 +0xd0
github.com/ethereum/go-ethereum/node.(*closeTrackingDB).Close(0xc0002827c8)
/home/runner/work/bsc/bsc/node/node.go:698 +0x99
github.com/ethereum/go-ethereum/eth.(*Ethereum).Stop(0xc0002a6360)
/home/runner/work/bsc/bsc/eth/backend.go:684 +0x158
github.com/ethereum/go-ethereum/node.(*Node).stopServices(0xc00228c410, {0xc01dfece40, 0x2, 0x3be62318a99cbdfe?})
/home/runner/work/bsc/bsc/node/node.go:321 +0xbd
github.com/ethereum/go-ethereum/node.(*Node).Close(0xc00228c410)
/home/runner/work/bsc/bsc/node/node.go:241 +0x191
created by github.com/ethereum/go-ethereum/cmd/utils.StartNode.func1.1
/home/runner/work/bsc/bsc/cmd/utils/cmd.go:92 +0x96
//== dump 3: after 70s
goroutine 18035443 [runnable]:
github.com/syndtr/goleveldb/leveldb/cache.(*Node).callFinalizer(0xc03d4843f0)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:722 +0xf9
github.com/syndtr/goleveldb/leveldb/cache.(*Cache).Close.func1({0xc299162000, 0x3b882, 0x39c00?})
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:665 +0xae
github.com/syndtr/goleveldb/leveldb/cache.(*mHead).enumerateNodesWithCB(0xc144998380, 0xc1ddfb3b98)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:327 +0x58
github.com/syndtr/goleveldb/leveldb/cache.(*Cache).Close(0xc0018823f0, 0x1)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/cache/cache.go:652 +0xb0
github.com/syndtr/goleveldb/leveldb.(*tOps).close(0xc002b0e690)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/table.go:494 +0x2a
github.com/syndtr/goleveldb/leveldb.(*session).close(0xc001ed4e10)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/session.go:101 +0x29
github.com/syndtr/goleveldb/leveldb.(*DB).Close(0x17?)
/home/runner/go/pkg/mod/github.com/syndtr/goleveldb@v1.0.1-0.20220721030215-126854af5e6d/leveldb/db.go:1218 +0x2b4
github.com/ethereum/go-ethereum/ethdb/leveldb.(*Database).Close(0xc002252200)
/home/runner/work/bsc/bsc/ethdb/leveldb/leveldb.go:180 +0x205
github.com/ethereum/go-ethereum/core/rawdb.(*freezerdb).Close(0xc0018f45a0)
/home/runner/work/bsc/bsc/core/rawdb/database.go:50 +0xd0
github.com/ethereum/go-ethereum/node.(*closeTrackingDB).Close(0xc0002827c8)
/home/runner/work/bsc/bsc/node/node.go:698 +0x99
github.com/ethereum/go-ethereum/eth.(*Ethereum).Stop(0xc0002a6360)
/home/runner/work/bsc/bsc/eth/backend.go:684 +0x158
github.com/ethereum/go-ethereum/node.(*Node).stopServices(0xc00228c410, {0xc01dfece40, 0x2, 0x3be62318a99cbdfe?})
/home/runner/work/bsc/bsc/node/node.go:321 +0xbd
github.com/ethereum/go-ethereum/node.(*Node).Close(0xc00228c410)
/home/runner/work/bsc/bsc/node/node.go:241 +0x191
created by github.com/ethereum/go-ethereum/cmd/utils.StartNode.func1.1
/home/runner/work/bsc/bsc/cmd/utils/cmd.go:92 +0x96
Description
BSC recently upgrade the goleveldb version indirectly to the latest commit, i.e. from: 20210819022825-2ae1ddf74ef7 to 20220721030215-126854af5e6d
When we tried to restart the node, it took quite a long time, more several minutes, eventually force killed.
After some investigation, we found the node got stuck on
leveldb.(*DB).Close(), the database is quite large on BSC. But it was ok with an older version. Dive deeper, we found it was caused by the levelDB upgrade, after downgrade to the previous version, the issue is gone.Some Information to share: