Skip to content

Commit 61847ab

Browse files
Merge pull request #35106 from p-se/wip-pse-cephadm-custom-monitoring-images
mgr/cephadm: allow custom images for grafana, prometheus, alertmanager and node_exporter
2 parents d45c6fa + 7ca3b47 commit 61847ab

3 files changed

Lines changed: 108 additions & 29 deletions

File tree

doc/cephadm/monitoring.rst

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,45 @@ completed, you should see something like this from ``ceph orch ls``::
7676
node-exporter 2/2 6s ago docker.io/prom/node-exporter:latest e5a616e4b9cf present
7777
prometheus 1/1 6s ago docker.io/prom/prometheus:latest e935122ab143 present
7878

79+
Using custom images
80+
~~~~~~~~~~~~~~~~~~~
81+
82+
It is possible to install or upgrade monitoring components based on other
83+
images. To do so, the name of the image to be used needs to be stored in the
84+
configuration first. The following configuration options are available.
85+
86+
- ``container_image_prometheus``
87+
- ``container_image_grafana``
88+
- ``container_image_alertmanager``
89+
- ``container_image_node_exporter``
90+
91+
Custom images can be set with the ``ceph config`` command::
92+
93+
ceph config set mgr mgr/cephadm/<option_name> <value>
94+
95+
For example::
96+
97+
ceph config set mgr mgr/cephadm/container_image_prometheus prom/prometheus:v1.4.1
98+
99+
.. note::
100+
101+
By setting a custom image, the default value will be overridden (but not
102+
overwritten). The default value changes when updates become available.
103+
By setting a custom image, you will not be able to update the component
104+
you have set the custom image for automatically. You will need to
105+
manually update the configuration (image name and tag) to be able to
106+
install updates.
107+
108+
If you choose to go with the recommendations instead, you can reset the
109+
custom image you have set before. After that, the default value will be
110+
used again. Use ``ceph config rm`` to reset the configuration option::
111+
112+
ceph config rm mgr mgr/cephadm/<option_name>
113+
114+
For example::
115+
116+
ceph config rm mgr mgr/cephadm/container_image_prometheus
117+
79118
Disabling monitoring
80119
--------------------
81120

src/cephadm/cephadm

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -484,7 +484,7 @@ def attempt_bind(s, address, port):
484484
def port_in_use(port_num):
485485
# type (int) -> bool
486486
"""Detect whether a port is in use on the local machine - IPv4 and IPv6"""
487-
logger.info('Verifying port %d ...' % (port_num))
487+
logger.info('Verifying port %d ...' % port_num)
488488
try:
489489
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
490490
attempt_bind(s, '0.0.0.0', port_num)
@@ -2689,7 +2689,7 @@ def extract_uid_gid_monitoring(daemon_type):
26892689
@default_image
26902690
def command_deploy():
26912691
# type: () -> None
2692-
(daemon_type, daemon_id) = args.name.split('.', 1)
2692+
daemon_type, daemon_id = args.name.split('.', 1)
26932693

26942694
l = FileLock(args.fsid)
26952695
l.acquire()
@@ -2711,8 +2711,8 @@ def command_deploy():
27112711
logger.info('%s daemon %s ...' % ('Deploy', args.name))
27122712

27132713
if daemon_type in Ceph.daemons:
2714-
(config, keyring) = get_config_and_keyring()
2715-
(uid, gid) = extract_uid_gid()
2714+
config, keyring = get_config_and_keyring()
2715+
uid, gid = extract_uid_gid()
27162716
make_var_run(args.fsid, uid, gid)
27172717
c = get_container(args.fsid, daemon_type, daemon_id,
27182718
ptrace=args.allow_ptrace)
@@ -2723,8 +2723,6 @@ def command_deploy():
27232723

27242724
elif daemon_type in Monitoring.components:
27252725
# monitoring daemon - prometheus, grafana, alertmanager, node-exporter
2726-
monitoring_args = [] # type: List[str]
2727-
27282726
# Default Checks
27292727
if not args.reconfig and not redeploy:
27302728
daemon_ports = Monitoring.port_map[daemon_type] # type: List[int]
@@ -2744,7 +2742,6 @@ def command_deploy():
27442742
raise Error("{} deployment requires config-json which must "
27452743
"contain arg for {}".format(daemon_type.capitalize(), ', '.join(required_args)))
27462744

2747-
27482745
uid, gid = extract_uid_gid_monitoring(daemon_type)
27492746
c = get_container(args.fsid, daemon_type, daemon_id)
27502747
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
@@ -2753,17 +2750,17 @@ def command_deploy():
27532750
elif daemon_type == NFSGanesha.daemon_type:
27542751
if not args.reconfig and not redeploy:
27552752
NFSGanesha.port_in_use()
2756-
(config, keyring) = get_config_and_keyring()
2753+
config, keyring = get_config_and_keyring()
27572754
# TODO: extract ganesha uid/gid (997, 994) ?
2758-
(uid, gid) = extract_uid_gid()
2755+
uid, gid = extract_uid_gid()
27592756
c = get_container(args.fsid, daemon_type, daemon_id)
27602757
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
27612758
config=config, keyring=keyring,
27622759
reconfig=args.reconfig)
27632760

27642761
elif daemon_type == CephIscsi.daemon_type:
2765-
(config, keyring) = get_config_and_keyring()
2766-
(uid, gid) = extract_uid_gid()
2762+
config, keyring = get_config_and_keyring()
2763+
uid, gid = extract_uid_gid()
27672764
c = get_container(args.fsid, daemon_type, daemon_id)
27682765
deploy_daemon(args.fsid, daemon_type, daemon_id, c, uid, gid,
27692766
config=config, keyring=keyring,

src/pybind/mgr/cephadm/module.py

Lines changed: 61 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
11
import json
22
import errno
33
import logging
4-
import time
54
from threading import Event
65
from functools import wraps
76

87
import string
9-
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, Type, \
10-
Any, NamedTuple, Iterator, Set, Sequence, TYPE_CHECKING, cast, Union
8+
from typing import List, Dict, Optional, Callable, Tuple, TypeVar, \
9+
Any, Set, TYPE_CHECKING, cast
1110

1211
import datetime
1312
import six
@@ -186,6 +185,26 @@ class CephadmOrchestrator(orchestrator.Orchestrator, MgrModule):
186185
'desc': 'Container image name, without the tag',
187186
'runtime': True,
188187
},
188+
{
189+
'name': 'container_image_prometheus',
190+
'default': 'prom/prometheus:v2.18.1',
191+
'desc': 'Prometheus container image',
192+
},
193+
{
194+
'name': 'container_image_grafana',
195+
'default': 'ceph/ceph-grafana:latest',
196+
'desc': 'Prometheus container image',
197+
},
198+
{
199+
'name': 'container_image_alertmanager',
200+
'default': 'prom/alertmanager:v0.20.0',
201+
'desc': 'Prometheus container image',
202+
},
203+
{
204+
'name': 'container_image_node_exporter',
205+
'default': 'prom/node-exporter:v0.18.1',
206+
'desc': 'Prometheus container image',
207+
},
189208
{
190209
'name': 'warn_on_stray_hosts',
191210
'type': 'bool',
@@ -251,6 +270,10 @@ def __init__(self, *args, **kwargs):
251270
self.host_check_interval = 0
252271
self.mode = ''
253272
self.container_image_base = ''
273+
self.container_image_prometheus = ''
274+
self.container_image_grafana = ''
275+
self.container_image_alertmanager = ''
276+
self.container_image_node_exporter = ''
254277
self.warn_on_stray_hosts = True
255278
self.warn_on_stray_daemons = True
256279
self.warn_on_failed_host_check = True
@@ -873,14 +896,18 @@ def _executable_path(self, conn, executable):
873896
executable_path))
874897
return executable_path
875898

876-
def _run_cephadm(self, host, entity, command, args,
877-
addr=None,
878-
stdin=None,
899+
def _run_cephadm(self,
900+
host: str,
901+
entity: Optional[str],
902+
command: str,
903+
args: List[str],
904+
addr: Optional[str] = None,
905+
stdin: Optional[str] = None,
879906
no_fsid=False,
880907
error_ok=False,
881-
image=None,
882-
env_vars=None):
883-
# type: (str, Optional[str], str, List[str], Optional[str], Optional[str], bool, bool, Optional[str], Optional[List[str]]) -> Tuple[List[str], List[str], int]
908+
image: Optional[str] = None,
909+
env_vars: Optional[List[str]] = None,
910+
) -> Tuple[List[str], List[str], int]:
884911
"""
885912
Run cephadm on the remote host with the given command + args
886913
@@ -902,7 +929,7 @@ def _run_cephadm(self, host, entity, command, args,
902929

903930
assert image or entity
904931
if not image:
905-
daemon_type = entity.split('.', 1)[0] # type: ignore
932+
daemon_type = entity.split('.', 1)[0] # type: ignore
906933
if daemon_type in CEPH_TYPES or \
907934
daemon_type == 'nfs' or \
908935
daemon_type == 'iscsi':
@@ -912,7 +939,16 @@ def _run_cephadm(self, host, entity, command, args,
912939
'who': utils.name_to_config_section(entity),
913940
'key': 'container_image',
914941
})
915-
image = image.strip() # type: ignore
942+
image = image.strip() # type: ignore
943+
elif daemon_type == 'prometheus':
944+
image = self.container_image_prometheus
945+
elif daemon_type == 'grafana':
946+
image = self.container_image_grafana
947+
elif daemon_type == 'alertmanager':
948+
image = self.container_image_alertmanager
949+
elif daemon_type == 'node-exporter':
950+
image = self.container_image_node_exporter
951+
916952
self.log.debug('%s container image %s' % (entity, image))
917953

918954
final_args = []
@@ -929,8 +965,8 @@ def _run_cephadm(self, host, entity, command, args,
929965
final_args += ['--fsid', self._cluster_fsid]
930966
final_args += args
931967

968+
self.log.debug('args: %s' % (' '.join(final_args)))
932969
if self.mode == 'root':
933-
self.log.debug('args: %s' % (' '.join(final_args)))
934970
if stdin:
935971
self.log.debug('stdin: %s' % stdin)
936972
script = 'injected_argv = ' + json.dumps(final_args) + '\n'
@@ -1551,11 +1587,18 @@ def _get_config_and_keyring(self, daemon_type, daemon_id,
15511587
'keyring': keyring,
15521588
}
15531589

1554-
def _create_daemon(self, daemon_type, daemon_id, host,
1555-
keyring=None,
1556-
extra_args=None, extra_config=None,
1590+
def _create_daemon(self,
1591+
daemon_type: str,
1592+
daemon_id: str,
1593+
host: str,
1594+
keyring: Optional[str] = None,
1595+
extra_args: Optional[List[str]] = None,
1596+
extra_config: Optional[Dict[str, Any]] = None,
15571597
reconfig=False,
1558-
osd_uuid_map=None) -> str:
1598+
osd_uuid_map: Optional[Dict[str, Any]] = None,
1599+
redeploy=False,
1600+
) -> str:
1601+
15591602
if not extra_args:
15601603
extra_args = []
15611604
if not extra_config:
@@ -1564,7 +1607,7 @@ def _create_daemon(self, daemon_type, daemon_id, host,
15641607

15651608
start_time = datetime.datetime.utcnow()
15661609
deps = [] # type: List[str]
1567-
cephadm_config = {} # type: Dict[str, Any]
1610+
cephadm_config = {} # type: Dict[str, Any]
15681611
if daemon_type == 'prometheus':
15691612
cephadm_config, deps = self.prometheus_service.generate_config()
15701613
extra_args.extend(['--config-json', '-'])
@@ -1597,7 +1640,7 @@ def _create_daemon(self, daemon_type, daemon_id, host,
15971640
osd_uuid_map = self.get_osd_uuid_map()
15981641
osd_uuid = osd_uuid_map.get(daemon_id)
15991642
if not osd_uuid:
1600-
raise OrchestratorError('osd.%d not in osdmap' % daemon_id)
1643+
raise OrchestratorError('osd.%s not in osdmap' % daemon_id)
16011644
extra_args.extend(['--osd-fsid', osd_uuid])
16021645

16031646
if reconfig:

0 commit comments

Comments
 (0)