Skip to content

Commit f02c274

Browse files
authored
snapcfg, downloader: lazy-parse registry, per-chain remote loading (#19641)
## Summary First PR in a series to remove the need to read all chains' TOML files at startup. Erigon only uses one chain per execution, but the current code parses all 8 chains' snapshot hashes eagerly at init and fetches all 8 from GitHub/R2 when loading remote preverified hashes. This PR does not remove the registry, but makes it effectively a registry of 1 element by: - **Lazy-parsing the preverified registry**: store raw embedded TOML bytes and parse on demand instead of parsing all chains at init time - **Adding `LoadRemotePreverifiedForChain`**: fetch snapshot hashes for a single chain from GitHub/R2 instead of all chains; use it in `downloadercfg.LoadSnapshotsHashes` - **Deduplicating `cdnHeaders`**: move the canonical definition + `InsertCloudflareHeaders` to `db/snapcfg/cdn.go`; remove copies from `db/downloader` - **Pulling `fetchChainToml` from erigon-snapshot into this repo** (`db/snapcfg/util.go`) with TODOs marking the copies to remove upstream - **Extracting URL builders**: `ChainTomlR2URL` and `ChainTomlGitHubURL` in `db/snapcfg/cdn.go` with unit tests ### Measured improvement (chiado, cold start) | | main | this branch | |---|---|---| | Snapshot hash loading | ~2533ms | ~587ms | | **Speedup** | — | **~4.3x faster** | ## Test plan - [x] `make lint` clean - [x] `make erigon integration` builds - [x] Ephemeral node with `--chain=chiado` starts and loads snapshot hashes correctly - [x] `go test ./db/snapcfg/...` passes with new URL builder tests
1 parent d73979a commit f02c274

6 files changed

Lines changed: 228 additions & 69 deletions

File tree

db/downloader/downloader.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ import (
2424
"io"
2525
"io/fs"
2626
"iter"
27-
"maps"
2827
"math"
2928
"net"
3029
"net/http"
@@ -64,6 +63,7 @@ import (
6463
"github.com/erigontech/erigon/db/kv"
6564
"github.com/erigontech/erigon/db/kv/dbcfg"
6665
"github.com/erigontech/erigon/db/kv/mdbx"
66+
"github.com/erigontech/erigon/db/snapcfg"
6767
"github.com/erigontech/erigon/db/snaptype"
6868
)
6969

@@ -151,15 +151,6 @@ type requestHandler struct {
151151
rt http.RoundTripper
152152
}
153153

154-
var cloudflareHeaders = http.Header{
155-
"lsjdjwcush6jbnjj3jnjscoscisoc5s": []string{"I%OSJDNFKE783DDHHJD873EFSIVNI7384R78SSJBJBCCJBC32JABBJCBJK45"},
156-
}
157-
158-
func insertCloudflareHeaders(req *http.Request) {
159-
// Note this is clobbering the headers.
160-
maps.Copy(req.Header, cloudflareHeaders)
161-
}
162-
163154
type roundTripperFunc func(req *http.Request) (*http.Response, error)
164155

165156
func (me roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
@@ -168,7 +159,7 @@ func (me roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error)
168159

169160
// TODO(anacrolix): Upstream any logic that works reliably.
170161
func (r *requestHandler) RoundTrip(req *http.Request) (resp *http.Response, err error) {
171-
insertCloudflareHeaders(req)
162+
snapcfg.InsertCloudflareHeaders(req)
172163

173164
resp, err = r.rt.RoundTrip(req)
174165
if err != nil {

db/downloader/downloadercfg/downloadercfg.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -328,7 +328,7 @@ func LoadSnapshotsHashes(ctx context.Context, dirs datadir.Dirs, chainName strin
328328
snapcfg.SetToml(chainName, haveToml, true)
329329
} else {
330330
// Fetch the snapshot hashes from the web
331-
err := snapcfg.LoadRemotePreverified(ctx)
331+
err := snapcfg.LoadRemotePreverifiedForChain(ctx, chainName)
332332
if err != nil {
333333
log.Root().Crit("Snapshot hashes for supported networks was not loaded. Please check your network connection and/or GitHub status here https://www.githubstatus.com/", "chain", chainName, "err", err)
334334
return fmt.Errorf("failed to fetch remote snapshot hashes for chain %s", chainName)

db/downloader/webseed.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929
"github.com/hashicorp/go-retryablehttp"
3030

3131
"github.com/erigontech/erigon/common/log/v3"
32+
"github.com/erigontech/erigon/db/snapcfg"
3233
"github.com/erigontech/erigon/db/snaptype"
3334
)
3435

@@ -227,7 +228,7 @@ func (d *WebSeeds) retrieveManifest(ctx context.Context, webSeedProviderUrl *url
227228
if err != nil {
228229
return nil, err
229230
}
230-
insertCloudflareHeaders(request)
231+
snapcfg.InsertCloudflareHeaders(request)
231232
resp, err := d.client.Do(request)
232233
if err != nil {
233234
return nil, fmt.Errorf("webseed.http: make request: %w, url=%s", err, u.String())
@@ -240,7 +241,7 @@ func (d *WebSeeds) retrieveManifest(ctx context.Context, webSeedProviderUrl *url
240241
return nil, err
241242
}
242243

243-
insertCloudflareHeaders(request)
244+
snapcfg.InsertCloudflareHeaders(request)
244245

245246
resp, err := d.client.Do(request)
246247
if err != nil {

db/snapcfg/cdn.go

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
package snapcfg
2+
3+
import (
4+
"fmt"
5+
"maps"
6+
"net/http"
7+
)
8+
9+
// cloudflareHeaders are required for R2 CDN access.
10+
// TODO: Copied from github.com/erigontech/erigon-snapshot/embed.go (cloudflareHeaders).
11+
// Remove the copy in erigon-snapshot once this is the canonical location.
12+
var cloudflareHeaders = http.Header{
13+
"lsjdjwcush6jbnjj3jnjscoscisoc5s": []string{"I%OSJDNFKE783DDHHJD873EFSIVNI7384R78SSJBJBCCJBC32JABBJCBJK45"},
14+
}
15+
16+
// InsertCloudflareHeaders copies the R2 CDN headers into req.
17+
func InsertCloudflareHeaders(req *http.Request) {
18+
maps.Copy(req.Header, cloudflareHeaders)
19+
}
20+
21+
// ChainTomlR2URL returns the R2 CDN URL for a chain's snapshot TOML.
22+
func ChainTomlR2URL(branch, chain string) string {
23+
return fmt.Sprintf("https://erigon-snapshots.erigon.network/%s/%s.toml", branch, chain)
24+
}
25+
26+
// ChainTomlGitHubURL returns the GitHub raw URL for a chain's snapshot TOML.
27+
func ChainTomlGitHubURL(branch, chain string) string {
28+
return fmt.Sprintf("https://raw.githubusercontent.com/erigontech/erigon-snapshot/%s/%s.toml", branch, chain)
29+
}

db/snapcfg/cdn_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package snapcfg
2+
3+
import "testing"
4+
5+
func TestChainTomlR2URL(t *testing.T) {
6+
tests := []struct {
7+
branch, chain string
8+
want string
9+
}{
10+
{"release/3.4", "mainnet", "https://erigon-snapshots.erigon.network/release/3.4/mainnet.toml"},
11+
{"main", "chiado", "https://erigon-snapshots.erigon.network/main/chiado.toml"},
12+
{"release/3.4", "gnosis", "https://erigon-snapshots.erigon.network/release/3.4/gnosis.toml"},
13+
}
14+
for _, tt := range tests {
15+
if got := ChainTomlR2URL(tt.branch, tt.chain); got != tt.want {
16+
t.Errorf("ChainTomlR2URL(%q, %q) = %q, want %q", tt.branch, tt.chain, got, tt.want)
17+
}
18+
}
19+
}
20+
21+
func TestChainTomlGitHubURL(t *testing.T) {
22+
tests := []struct {
23+
branch, chain string
24+
want string
25+
}{
26+
{"release/3.4", "mainnet", "https://raw.githubusercontent.com/erigontech/erigon-snapshot/release/3.4/mainnet.toml"},
27+
{"main", "chiado", "https://raw.githubusercontent.com/erigontech/erigon-snapshot/main/chiado.toml"},
28+
{"release/3.4", "gnosis", "https://raw.githubusercontent.com/erigontech/erigon-snapshot/release/3.4/gnosis.toml"},
29+
}
30+
for _, tt := range tests {
31+
if got := ChainTomlGitHubURL(tt.branch, tt.chain); got != tt.want {
32+
t.Errorf("ChainTomlGitHubURL(%q, %q) = %q, want %q", tt.branch, tt.chain, got, tt.want)
33+
}
34+
}
35+
}

0 commit comments

Comments
 (0)