Skip to content

Commit 2bc289d

Browse files
authored
Support snapshot reset with symlinks (#18273)
Adds a ton of tests. Fixes #18135, #18191, #16693. Adds a --preverified flag to snapshots reset to choose embedded, remote or local file to use for reset. I used it for testing faster, but it is helpful. I tried to implement some extra security to avoid recursively deleting if someone symlinked to the wrong place, but it got out of hand on complexity. I ended up discovering fs.FS, and os.Root are mostly useless for this. I settled on 2 possibilities: Checking symlinks to directories always end with the same name as the source, and having a sequence of os.Roots where deletion is allowed. The first was a bit too complex, and the second would require a bunch of extra documentation and user education which I don't think is worth it. Most good users will be setting correct permissions, and using hard/bind mounts to do things reliably.
1 parent 0a2501b commit 2bc289d

16 files changed

Lines changed: 1021 additions & 320 deletions

File tree

cmd/utils/app/reset-datadir.go

Lines changed: 62 additions & 178 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,23 @@ package app
33
import (
44
"errors"
55
"fmt"
6-
"io/fs"
76
"os"
8-
"path/filepath"
9-
"strings"
107

118
g "github.com/anacrolix/generics"
12-
"github.com/anacrolix/torrent/metainfo"
13-
"github.com/urfave/cli/v2"
14-
15-
"github.com/erigontech/erigon/cmd/utils"
9+
"github.com/anacrolix/missinggo/v2/panicif"
1610
"github.com/erigontech/erigon/common/dir"
17-
"github.com/erigontech/erigon/common/log/v3"
18-
"github.com/erigontech/erigon/db/datadir"
11+
"github.com/erigontech/erigon/db/datadir/reset"
1912
"github.com/erigontech/erigon/db/kv"
2013
"github.com/erigontech/erigon/db/kv/dbcfg"
2114
"github.com/erigontech/erigon/db/kv/mdbx"
2215
"github.com/erigontech/erigon/db/rawdb"
23-
"github.com/erigontech/erigon/db/snapcfg"
2416
"github.com/erigontech/erigon/execution/chain"
17+
"github.com/urfave/cli/v2"
18+
19+
"github.com/erigontech/erigon/cmd/utils"
20+
"github.com/erigontech/erigon/common/log/v3"
21+
"github.com/erigontech/erigon/db/datadir"
22+
"github.com/erigontech/erigon/db/snapcfg"
2523
)
2624

2725
var (
@@ -39,6 +37,12 @@ var (
3937
Aliases: []string{"n"},
4038
Category: "Reset",
4139
}
40+
preverifiedFlag = cli.StringFlag{
41+
Name: "preverified",
42+
Category: "Reset",
43+
Usage: "preverified to use (remote, local, embedded)",
44+
Value: "remote",
45+
}
4246
)
4347

4448
// Checks if a value was explicitly set in the given CLI command context or any of its parents. In
@@ -91,15 +95,28 @@ func resetCliAction(cliCtx *cli.Context) (err error) {
9195
return fmt.Errorf("failed to lock data dir %v: %w", dirs.DataDir, err)
9296
}
9397
defer unlock()
94-
err = snapcfg.LoadRemotePreverified(cliCtx.Context)
95-
if err != nil {
96-
// TODO: Check if we should continue? What if we ask for a git revision and
97-
// can't get it? What about a branch? Can we reset to the embedded snapshot hashes?
98-
return fmt.Errorf("loading remote preverified snapshots: %w", err)
98+
99+
switch value := preverifiedFlag.Get(cliCtx); value {
100+
case "local":
101+
panicif.Err(os.Setenv(snapcfg.RemotePreverifiedEnvKey, dirs.PreverifiedPath()))
102+
fallthrough
103+
case "remote":
104+
err = snapcfg.LoadRemotePreverified(cliCtx.Context)
105+
if err != nil {
106+
// TODO: Check if we should continue? What if we ask for a git revision and
107+
// can't get it? What about a branch? Can we reset to the embedded snapshot hashes?
108+
return fmt.Errorf("loading remote preverified snapshots: %w", err)
109+
}
110+
case "embedded":
111+
// Should already be loaded.
112+
default:
113+
err = fmt.Errorf("invalid preverified flag value %q", value)
114+
return
99115
}
116+
100117
cfg, known := snapcfg.KnownCfg(chainName)
101118
if !known {
102-
// Wtf does this even mean?
119+
// Wtf does this even imply?
103120
return fmt.Errorf("config for chain %v is not known", chainName)
104121
}
105122
// Should we check cfg.Local? We could be resetting to the preverified.toml...?
@@ -108,61 +125,41 @@ func resetCliAction(cliCtx *cli.Context) (err error) {
108125
"len", len(cfg.Preverified.Items),
109126
"chain", chainName,
110127
)
111-
removeFunc := func(path string) error {
112-
logger.Debug("Removing snapshot dir file", "path", path)
113-
return dir.RemoveFile(filepath.Join(dirs.Snap, path))
114-
}
128+
115129
if dryRun {
116-
removeFunc = dryRunRemove
117-
}
118-
reset := reset{
119-
removeUnknown: removeLocal,
120-
logger: logger,
130+
log.Warn("Resetting datadir in dry run mode. Files that would be removed will be printed to stdout.")
131+
}
132+
133+
// Here we intended to have a list of os.Root to restrict deletions. Instead, for now you should
134+
// do a dry run, and make sure to use good permissioning.
135+
//datadirOsRoot, err := os.OpenRoot(dirs.DataDir)
136+
//if err != nil {
137+
// return fmt.Errorf("opening datadir: %w", err)
138+
//}
139+
140+
r := reset.Reset{
141+
Dirs: &dirs,
142+
RemoveUnknown: removeLocal,
143+
Logger: logger,
144+
PreverifiedSnapshots: cfg.Preverified.Items,
145+
RemoveFunc: func(osName reset.OsFilePath) error {
146+
if dryRun {
147+
println(osName)
148+
return nil
149+
}
150+
logger.Debug("Removing datadir file", "name", osName)
151+
//return datadirOsRoot.Remove(string(osName))
152+
return dir.RemoveFile(string(osName))
153+
},
121154
}
122-
logger.Info("Resetting snapshots directory", "path", dirs.Snap)
123-
err = reset.walkSnapshots(dirs.Snap, cfg.Preverified.Items, removeFunc)
155+
err = r.Run()
124156
if err != nil {
125-
err = fmt.Errorf("walking snapshots: %w", err)
126157
return
127158
}
128-
logger.Info("Files NOT removed from snapshots directory",
129-
"torrents", reset.stats.retained.torrentFiles,
130-
"data", reset.stats.retained.dataFiles)
131-
logger.Info("Files removed from snapshots directory",
132-
"torrents", reset.stats.removed.torrentFiles,
133-
"data", reset.stats.removed.dataFiles)
134-
// Remove chaindata last, so that the config is available if there's an error.
135-
if removeLocal {
136-
for _, extraDir := range []string{
137-
dbcfg.HeimdallDB,
138-
dbcfg.PolygonBridgeDB,
139-
} {
140-
extraFullPath := filepath.Join(dirs.DataDir, extraDir)
141-
err = dir.RemoveAll(extraFullPath)
142-
if err != nil {
143-
return fmt.Errorf("removing extra dir %q: %w", extraDir, err)
144-
}
145-
}
146-
logger.Info("Removing chaindata dir", "path", dirs.Chaindata)
147-
if !dryRun {
148-
err = dir.RemoveAll(dirs.Chaindata)
149-
}
150-
if err != nil {
151-
err = fmt.Errorf("removing chaindata dir: %w", err)
152-
return
153-
}
154-
}
155-
err = removeFunc(datadir.PreverifiedFileName)
156-
if err == nil {
157-
logger.Info("Removed snapshots lock file", "path", datadir.PreverifiedFileName)
158-
} else {
159-
if !errors.Is(err, fs.ErrNotExist) {
160-
err = fmt.Errorf("removing snapshot lock file: %w", err)
161-
return
162-
}
159+
if !dryRun {
160+
logger.Info("Reset complete. Start Erigon as usual, missing files will be downloaded.")
163161
}
164-
logger.Info("Reset complete. Start Erigon as usual, missing files will be downloaded.")
165-
return nil
162+
return
166163
}
167164

168165
func getChainNameFromChainData(cliCtx *cli.Context, logger log.Logger, chainDataDir string) (_ g.Option[string], err error) {
@@ -203,116 +200,3 @@ func getChainNameFromChainData(cliCtx *cli.Context, logger log.Logger, chainData
203200
}
204201
return g.Some(chainCfg.ChainName), nil
205202
}
206-
207-
func dryRunRemove(path string) error {
208-
return nil
209-
}
210-
211-
type resetStats struct {
212-
torrentFiles int
213-
dataFiles int
214-
unknownFiles int
215-
}
216-
217-
type reset struct {
218-
logger log.Logger
219-
removeUnknown bool
220-
stats struct {
221-
removed resetStats
222-
retained resetStats
223-
}
224-
}
225-
226-
type resetItemInfo struct {
227-
path string
228-
realFilePath func() string
229-
hash g.Option[string]
230-
isTorrent bool
231-
inPreverified bool
232-
}
233-
234-
// Walks the given snapshots directory, removing files that are not in the preverified set.
235-
func (me *reset) walkSnapshots(
236-
// Could almost pass fs.FS here except metainfo.LoadFromFile expects a string filepath.
237-
snapDir string,
238-
preverified snapcfg.PreverifiedItems,
239-
// path is the relative path to the walk root. Called for each file that should be removed.
240-
// Error is passed back to the walk function.
241-
remove func(path string) error,
242-
) error {
243-
return fs.WalkDir(
244-
os.DirFS(snapDir),
245-
".",
246-
func(path string, d fs.DirEntry, err error) error {
247-
if err != nil {
248-
// Our job is to remove anything that shouldn't be here... so if we can't read a dir
249-
// we are in trouble.
250-
return fmt.Errorf("error walking path %v: %w", path, err)
251-
}
252-
if d.IsDir() {
253-
return nil
254-
}
255-
if path == datadir.PreverifiedFileName {
256-
return nil
257-
}
258-
// Shouldn't be necessary with fs package.
259-
slashPath := filepath.ToSlash(path)
260-
itemName, _ := strings.CutSuffix(slashPath, ".part")
261-
itemName, isTorrent := strings.CutSuffix(itemName, ".torrent")
262-
item, ok := preverified.Get(itemName)
263-
doRemove := me.decideRemove(resetItemInfo{
264-
path: path,
265-
realFilePath: func() string { return filepath.Join(snapDir, path) },
266-
hash: func() g.Option[string] { return g.OptionFromTuple(item.Hash, ok) }(),
267-
isTorrent: isTorrent,
268-
inPreverified: ok,
269-
})
270-
stats := &me.stats.retained
271-
if doRemove {
272-
stats = &me.stats.removed
273-
err = remove(path)
274-
if err != nil {
275-
return fmt.Errorf("removing file %v: %w", path, err)
276-
}
277-
}
278-
if isTorrent {
279-
stats.torrentFiles++
280-
} else {
281-
stats.dataFiles++
282-
}
283-
return nil
284-
},
285-
)
286-
}
287-
288-
// Decides whether to remove a file, and logs the reasoning.
289-
func (me *reset) decideRemove(file resetItemInfo) bool {
290-
logger := me.logger
291-
path := file.path
292-
if !file.inPreverified {
293-
logger.Debug("file NOT in preverified list", "path", path)
294-
return me.removeUnknown
295-
}
296-
if file.isTorrent {
297-
mi, err := metainfo.LoadFromFile(file.realFilePath())
298-
if err != nil {
299-
logger.Error("error loading metainfo file", "path", path, "err", err)
300-
return true
301-
}
302-
expectedHash := file.hash.Unwrap()
303-
if mi.HashInfoBytes().String() == expectedHash {
304-
logger.Debug("torrent file matches preverified hash", "path", path)
305-
return false
306-
} else {
307-
logger.Debug("torrent file infohash does NOT match preverified",
308-
"path", path,
309-
"expected", expectedHash,
310-
"actual", mi.HashInfoBytes())
311-
return true
312-
}
313-
} else {
314-
// No checks required. Downloader will clobber it into shape after reset on next run.
315-
logger.Debug("data file is in preverified", "path", path)
316-
return false
317-
}
318-
}

cmd/utils/app/snapshots_cmd.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -281,6 +281,7 @@ var snapshotCommand = cli.Command{
281281
&utils.DataDirFlag,
282282
&dryRunFlag,
283283
&removeLocalFlag,
284+
&preverifiedFlag,
284285
},
285286
},
286287
{

common/dir/rw_dir.go

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,12 @@ func trackRemovedFiles() {
6363
}
6464
}
6565

66+
// user rwx, group rwx, other rx
67+
// x is required to navigate through directories. umask 0o022 is the default and will mask final
68+
// permissions to 0o755 for newly created files (and directories).
69+
const DirPerm = 0o775
70+
6671
func MustExist(path ...string) {
67-
// user rwx, group rwx, other rx
68-
// x is required to navigate through directories. umask 0o022 is the default and will mask final
69-
// permissions to 0o755 for newly created files (and directories).
70-
const perm = 0o775
7172
for _, p := range path {
7273
exist, err := Exist(p)
7374
if err != nil {
@@ -76,7 +77,7 @@ func MustExist(path ...string) {
7677
if exist {
7778
continue
7879
}
79-
if err := os.MkdirAll(p, perm); err != nil {
80+
if err := os.MkdirAll(p, DirPerm); err != nil {
8081
panic(err)
8182
}
8283
}

db/datadir/dirs.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ func New(datadir string) Dirs {
8888
return dirs
8989
}
9090

91+
// The subdirectory in the datadir for snapshots. This isn't encoded anywhere else because it's not
92+
// an MDBX name, but also a bunch of other datadir subdirs aren't encoded in Dirs, and Dirs does
93+
// absolute path stuff I don't want.
94+
const SnapDir = "snapshots"
95+
9196
// Open new Dirs instance without forcing all the directories to exist.
9297
func Open(datadir string) Dirs {
9398
relativeDataDir := datadir
@@ -105,13 +110,13 @@ func Open(datadir string) Dirs {
105110
DataDir: datadir,
106111
Chaindata: filepath.Join(datadir, "chaindata"),
107112
Tmp: filepath.Join(datadir, "temp"),
108-
Snap: filepath.Join(datadir, "snapshots"),
109-
SnapIdx: filepath.Join(datadir, "snapshots", "idx"),
110-
SnapHistory: filepath.Join(datadir, "snapshots", "history"),
111-
SnapDomain: filepath.Join(datadir, "snapshots", "domain"),
112-
SnapAccessors: filepath.Join(datadir, "snapshots", "accessor"),
113-
SnapCaplin: filepath.Join(datadir, "snapshots", "caplin"),
114-
SnapForkable: filepath.Join(datadir, "snapshots", "forkable"),
113+
Snap: filepath.Join(datadir, SnapDir),
114+
SnapIdx: filepath.Join(datadir, SnapDir, "idx"),
115+
SnapHistory: filepath.Join(datadir, SnapDir, "history"),
116+
SnapDomain: filepath.Join(datadir, SnapDir, "domain"),
117+
SnapAccessors: filepath.Join(datadir, SnapDir, "accessor"),
118+
SnapCaplin: filepath.Join(datadir, SnapDir, "caplin"),
119+
SnapForkable: filepath.Join(datadir, SnapDir, "forkable"),
115120
Downloader: filepath.Join(datadir, "downloader"),
116121
TxPool: filepath.Join(datadir, "txpool"),
117122
Nodes: filepath.Join(datadir, "nodes"),

db/datadir/reset/chaindata

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
symlink

0 commit comments

Comments
 (0)