Skip to content

Commit e9ea8cf

Browse files
authored
feat: add --pid-file option to write PID files (#25499)
Add `--pid-file` option to write PID files on startup. The PID filename is specified by the argument after `--pid-file`. If the PID file already exists, influxd will exit unless the `--overwrite-pid-file` flag is also used. Example: `influxd --pid-file /var/lib/influxd/influxd.pid` PID files are automatically removed when the influxd process is shutdown. Closes: #25498 (cherry picked from commit c35321b) (cherry picked from commit 48f7600)
1 parent 981f2fc commit e9ea8cf

File tree

4 files changed

+238
-1
lines changed

4 files changed

+238
-1
lines changed

cmd/influxd/launcher/cmd.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,9 @@ type InfluxdOpts struct {
144144
TracingType string
145145
ReportingDisabled bool
146146

147+
PIDFile string
148+
OverwritePIDFile bool
149+
147150
AssetsPath string
148151
BoltPath string
149152
SqLitePath string
@@ -213,6 +216,9 @@ func NewOpts(viper *viper.Viper) *InfluxdOpts {
213216
FluxLogEnabled: false,
214217
ReportingDisabled: false,
215218

219+
PIDFile: "",
220+
OverwritePIDFile: false,
221+
216222
BoltPath: filepath.Join(dir, bolt.DefaultFilename),
217223
SqLitePath: filepath.Join(dir, sqlite.DefaultFilename),
218224
EnginePath: filepath.Join(dir, "engine"),
@@ -325,6 +331,18 @@ func (o *InfluxdOpts) BindCliOpts() []cli.Opt {
325331
Default: o.ReportingDisabled,
326332
Desc: "disable sending telemetry data to https://telemetry.influxdata.com every 8 hours",
327333
},
334+
{
335+
DestP: &o.PIDFile,
336+
Flag: "pid-file",
337+
Default: o.PIDFile,
338+
Desc: "write process ID to a file",
339+
},
340+
{
341+
DestP: &o.OverwritePIDFile,
342+
Flag: "overwrite-pid-file",
343+
Default: o.OverwritePIDFile,
344+
Desc: "overwrite PID file if it already exists instead of exiting",
345+
},
328346
{
329347
DestP: &o.SessionLength,
330348
Flag: "session-length",

cmd/influxd/launcher/launcher.go

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,12 @@ import (
55
"crypto/tls"
66
"errors"
77
"fmt"
8+
"io/fs"
89
"net"
910
nethttp "net/http"
1011
"os"
1112
"path/filepath"
13+
"strconv"
1214
"strings"
1315
"sync"
1416
"time"
@@ -109,6 +111,11 @@ const (
109111
JaegerTracing = "jaeger"
110112
)
111113

114+
var (
115+
// ErrPIDFileExists indicates that a PID file already exists.
116+
ErrPIDFileExists = errors.New("PID file exists (possible unclean shutdown or another instance already running)")
117+
)
118+
112119
type labeledCloser struct {
113120
label string
114121
closer func(context.Context) error
@@ -248,6 +255,10 @@ func (m *Launcher) run(ctx context.Context, opts *InfluxdOpts) (err error) {
248255
}
249256
}
250257

258+
if err := m.writePIDFile(opts.PIDFile, opts.OverwritePIDFile); err != nil {
259+
return fmt.Errorf("error writing PIDFile %q: %w", opts.PIDFile, err)
260+
}
261+
251262
m.reg = prom.NewRegistry(m.log.With(zap.String("service", "prom_registry")))
252263
m.reg.MustRegister(collectors.NewGoCollector())
253264

@@ -970,6 +981,74 @@ func (m *Launcher) initTracing(opts *InfluxdOpts) {
970981
}
971982
}
972983

984+
// writePIDFile will write the process ID to pidFilename and register a cleanup function to delete it during
985+
// shutdown. If pidFilename is empty, then no PID file is written and no cleanup function is registered.
986+
// If pidFilename already exists and overwrite is false, then pidFilename is not overwritten and a
987+
// ErrPIDFileExists error is returned. If pidFilename already exists and overwrite is true, then pidFilename
988+
// will be overwritten but a warning will be logged.
989+
func (m *Launcher) writePIDFile(pidFilename string, overwrite bool) error {
990+
if pidFilename == "" {
991+
return nil
992+
}
993+
994+
// Create directory to PIDfile if needed.
995+
if err := os.MkdirAll(filepath.Dir(pidFilename), 0777); err != nil {
996+
return fmt.Errorf("mkdir: %w", err)
997+
}
998+
999+
// Write PID to file, but don't clobber an existing PID file.
1000+
pidBytes := []byte(strconv.Itoa(os.Getpid()))
1001+
pidMode := fs.FileMode(0666)
1002+
openFlags := os.O_WRONLY | os.O_CREATE | os.O_TRUNC
1003+
pidFile, err := os.OpenFile(pidFilename, openFlags|os.O_EXCL, pidMode)
1004+
if err != nil {
1005+
if !errors.Is(err, fs.ErrExist) {
1006+
return fmt.Errorf("open file: %w", err)
1007+
}
1008+
if !overwrite {
1009+
return ErrPIDFileExists
1010+
} else {
1011+
m.log.Warn("PID file already exists, attempting to overwrite", zap.String("pidFile", pidFilename))
1012+
pidFile, err = os.OpenFile(pidFilename, openFlags, pidMode)
1013+
if err != nil {
1014+
return fmt.Errorf("overwrite file: %w", err)
1015+
}
1016+
}
1017+
}
1018+
_, writeErr := pidFile.Write(pidBytes) // Contract says Write must return an error if count < len(pidBytes).
1019+
closeErr := pidFile.Close() // always close the file
1020+
if writeErr != nil || closeErr != nil {
1021+
var errs []error
1022+
if writeErr != nil {
1023+
errs = append(errs, fmt.Errorf("write file: %w", writeErr))
1024+
}
1025+
if closeErr != nil {
1026+
errs = append(errs, fmt.Errorf("close file: %w", closeErr))
1027+
}
1028+
1029+
// Let's make sure we don't leave a PID file behind on error.
1030+
removeErr := os.Remove(pidFilename)
1031+
if removeErr != nil {
1032+
errs = append(errs, fmt.Errorf("remove file: %w", removeErr))
1033+
}
1034+
1035+
return errors.Join(errs...)
1036+
}
1037+
1038+
// Add a cleanup function.
1039+
m.closers = append(m.closers, labeledCloser{
1040+
label: "pidfile",
1041+
closer: func(context.Context) error {
1042+
if err := os.Remove(pidFilename); err != nil {
1043+
return fmt.Errorf("removing PID file %q: %w", pidFilename, err)
1044+
}
1045+
return nil
1046+
},
1047+
})
1048+
1049+
return nil
1050+
}
1051+
9731052
// openMetaStores opens the embedded DBs used to store metadata about influxd resources, migrating them to
9741053
// the latest schema expected by the server.
9751054
// On success, a unique ID is returned to be used as an identifier for the influxd instance in telemetry.

cmd/influxd/launcher/launcher_helpers.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ type TestLauncher struct {
5555
Bucket *influxdb.Bucket
5656
Auth *influxdb.Authorization
5757

58+
Logger *zap.Logger
59+
5860
httpClient *httpc.Client
5961
apiClient *api.APIClient
6062

@@ -146,7 +148,10 @@ func (tl *TestLauncher) Run(tb zaptest.TestingT, ctx context.Context, setters ..
146148
}
147149

148150
// Set up top-level logger to write into the test-case.
149-
tl.Launcher.log = zaptest.NewLogger(tb, zaptest.Level(opts.LogLevel)).With(zap.String("test_name", tb.Name()))
151+
if tl.Logger == nil {
152+
tl.Logger = zaptest.NewLogger(tb, zaptest.Level(opts.LogLevel)).With(zap.String("test_name", tb.Name()))
153+
}
154+
tl.Launcher.log = tl.Logger
150155
return tl.Launcher.run(ctx, opts)
151156
}
152157

cmd/influxd/launcher/launcher_test.go

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,13 @@ package launcher_test
33
import (
44
"context"
55
"encoding/json"
6+
"fmt"
67
"io"
8+
"io/fs"
79
nethttp "net/http"
10+
"os"
11+
"path/filepath"
12+
"strconv"
813
"testing"
914
"time"
1015

@@ -14,6 +19,10 @@ import (
1419
"github.com/influxdata/influxdb/v2/http"
1520
"github.com/influxdata/influxdb/v2/tenant"
1621
"github.com/stretchr/testify/assert"
22+
"github.com/stretchr/testify/require"
23+
"go.uber.org/zap"
24+
"go.uber.org/zap/zapcore"
25+
"go.uber.org/zap/zaptest/observer"
1726
)
1827

1928
// Default context.
@@ -164,3 +173,129 @@ func TestLauncher_PingHeaders(t *testing.T) {
164173
assert.Equal(t, []string{"OSS"}, resp.Header.Values("X-Influxdb-Build"))
165174
assert.Equal(t, []string{"dev"}, resp.Header.Values("X-Influxdb-Version"))
166175
}
176+
177+
func TestLauncher_PIDFile(t *testing.T) {
178+
pidDir := t.TempDir()
179+
pidFilename := filepath.Join(pidDir, "influxd.pid")
180+
181+
l := launcher.RunAndSetupNewLauncherOrFail(ctx, t, func(o *launcher.InfluxdOpts) {
182+
o.PIDFile = pidFilename
183+
})
184+
defer func() {
185+
l.ShutdownOrFail(t, ctx)
186+
require.NoFileExists(t, pidFilename)
187+
}()
188+
189+
require.FileExists(t, pidFilename)
190+
pidBytes, err := os.ReadFile(pidFilename)
191+
require.NoError(t, err)
192+
require.Equal(t, strconv.Itoa(os.Getpid()), string(pidBytes))
193+
}
194+
195+
func TestLauncher_PIDFile_Locked(t *testing.T) {
196+
pidDir := t.TempDir()
197+
pidFilename := filepath.Join(pidDir, "influxd.pid")
198+
lockContents := []byte("foobar") // something wouldn't appear in normal lock file
199+
200+
// Write PID file to lock out the launcher.
201+
require.NoError(t, os.WriteFile(pidFilename, lockContents, 0666))
202+
require.FileExists(t, pidFilename)
203+
origSt, err := os.Stat(pidFilename)
204+
require.NoError(t, err)
205+
206+
// Make sure we get an error about the PID file from the launcher
207+
l := launcher.NewTestLauncher()
208+
err = l.Run(t, ctx, func(o *launcher.InfluxdOpts) {
209+
o.PIDFile = pidFilename
210+
})
211+
defer func() {
212+
l.ShutdownOrFail(t, ctx)
213+
214+
require.FileExists(t, pidFilename)
215+
contents, err := os.ReadFile(pidFilename)
216+
require.NoError(t, err)
217+
require.Equal(t, lockContents, contents)
218+
curSt, err := os.Stat(pidFilename)
219+
require.NoError(t, err)
220+
221+
// We can't compare origSt and curSt directly because even on mounts
222+
// with "noatime" or "relatime" options, the sys.Atim field can still
223+
// change. We'll just compare the most relevant exposed fields.
224+
require.Equal(t, origSt.ModTime(), curSt.ModTime())
225+
require.Equal(t, origSt.Mode(), curSt.Mode())
226+
}()
227+
228+
require.ErrorIs(t, err, launcher.ErrPIDFileExists)
229+
require.ErrorContains(t, err, fmt.Sprintf("error writing PIDFile %q: PID file exists (possible unclean shutdown or another instance already running)", pidFilename))
230+
}
231+
232+
func TestLauncher_PIDFile_Overwrite(t *testing.T) {
233+
pidDir := t.TempDir()
234+
pidFilename := filepath.Join(pidDir, "influxd.pid")
235+
lockContents := []byte("foobar") // something wouldn't appear in normal lock file
236+
237+
// Write PID file to lock out the launcher (or not in this case).
238+
require.NoError(t, os.WriteFile(pidFilename, lockContents, 0666))
239+
require.FileExists(t, pidFilename)
240+
241+
// Make sure we get an error about the PID file from the launcher.
242+
l := launcher.NewTestLauncher()
243+
loggerCore, ol := observer.New(zap.WarnLevel)
244+
l.Logger = zap.New(loggerCore)
245+
err := l.Run(t, ctx, func(o *launcher.InfluxdOpts) {
246+
o.PIDFile = pidFilename
247+
o.OverwritePIDFile = true
248+
})
249+
defer func() {
250+
l.ShutdownOrFail(t, ctx)
251+
252+
require.NoFileExists(t, pidFilename)
253+
}()
254+
require.NoError(t, err)
255+
256+
expLogs := []observer.LoggedEntry{
257+
{
258+
Entry: zapcore.Entry{Level: zap.WarnLevel, Message: "PID file already exists, attempting to overwrite"},
259+
Context: []zapcore.Field{zap.String("pidFile", pidFilename)},
260+
},
261+
}
262+
require.Equal(t, expLogs, ol.AllUntimed())
263+
require.FileExists(t, pidFilename)
264+
pidBytes, err := os.ReadFile(pidFilename)
265+
require.NoError(t, err)
266+
require.Equal(t, strconv.Itoa(os.Getpid()), string(pidBytes))
267+
}
268+
269+
func TestLauncher_PIDFile_OverwriteFail(t *testing.T) {
270+
if os.Geteuid() == 0 {
271+
t.Skip("test will fail when run as root")
272+
}
273+
274+
pidDir := t.TempDir()
275+
pidFilename := filepath.Join(pidDir, "influxd.pid")
276+
lockContents := []byte("foobar") // something wouldn't appear in normal lock file
277+
278+
// Write PID file to lock out the launcher.
279+
require.NoError(t, os.WriteFile(pidFilename, lockContents, 0666))
280+
require.FileExists(t, pidFilename)
281+
require.NoError(t, os.Chmod(pidFilename, 0000))
282+
283+
// Make sure we get an error about the PID file from the launcher
284+
l := launcher.NewTestLauncher()
285+
err := l.Run(t, ctx, func(o *launcher.InfluxdOpts) {
286+
o.PIDFile = pidFilename
287+
o.OverwritePIDFile = true
288+
})
289+
defer func() {
290+
l.ShutdownOrFail(t, ctx)
291+
292+
require.NoError(t, os.Chmod(pidFilename, 0644))
293+
require.FileExists(t, pidFilename)
294+
pidBytes, err := os.ReadFile(pidFilename)
295+
require.NoError(t, err)
296+
require.Equal(t, lockContents, pidBytes)
297+
}()
298+
299+
require.ErrorContains(t, err, fmt.Sprintf("error writing PIDFile %[1]q: overwrite file: open %[1]s:", pidFilename))
300+
require.ErrorIs(t, err, fs.ErrPermission)
301+
}

0 commit comments

Comments
 (0)