3030import java .util .regex .Matcher ;
3131import java .util .regex .Pattern ;
3232import java .util .stream .Collectors ;
33+ import java .util .stream .Stream ;
3334
3435/**
3536 * The {@link OsProbe} class retrieves information about the physical and swap size of the machine
3637 * memory, as well as the system load average and cpu load.
3738 *
38- * In some exceptional cases, it's possible the underlying native methods used by
39+ * <p> In some exceptional cases, it's possible the underlying native methods used by
3940 * {@link #getFreePhysicalMemorySize()}, {@link #getTotalPhysicalMemorySize()},
4041 * {@link #getFreeSwapSpaceSize()}, and {@link #getTotalSwapSpaceSize()} can return a
4142 * negative value. Because of this, we prevent those methods from returning negative values,
4243 * returning 0 instead.
4344 *
44- * The OS can report a negative number in a number of cases:
45- * - Non-supported OSes (HP-UX, or AIX)
46- * - A failure of macOS to initialize host statistics
47- * - An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
48- * - An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
49- * - An error case retrieving these values from a linux kernel
50- * - A non-standard libc implementation not implementing the required values
51- * For a more exhaustive explanation, see https://github.com/elastic/elasticsearch/pull/42725
45+ * <p>The OS can report a negative number in a number of cases:
46+ *
47+ * <ul>
48+ * <li>Non-supported OSes (HP-UX, or AIX)
49+ * <li>A failure of macOS to initialize host statistics
50+ * <li>An OS that does not support the {@code _SC_PHYS_PAGES} or {@code _SC_PAGE_SIZE} flags for the {@code sysconf()} linux kernel call
51+ * <li>An overflow of the product of {@code _SC_PHYS_PAGES} and {@code _SC_PAGE_SIZE}
52+ * <li>An error case retrieving these values from a linux kernel
53+ * <li>A non-standard libc implementation not implementing the required values
54+ * </ul>
55+ *
56+ * <p>For a more exhaustive explanation, see <a href="https://github.com/elastic/elasticsearch/pull/42725"
57+ * >https://github.com/elastic/elasticsearch/pull/42725</a>
5258 */
5359public class OsProbe {
5460
@@ -178,7 +184,7 @@ final double[] getSystemLoadAverage() {
178184 final String procLoadAvg = readProcLoadavg ();
179185 assert procLoadAvg .matches ("(\\ d+\\ .\\ d+\\ s+){3}\\ d+/\\ d+\\ s+\\ d+" );
180186 final String [] fields = procLoadAvg .split ("\\ s+" );
181- return new double []{ Double .parseDouble (fields [0 ]), Double .parseDouble (fields [1 ]), Double .parseDouble (fields [2 ])};
187+ return new double [] { Double .parseDouble (fields [0 ]), Double .parseDouble (fields [1 ]), Double .parseDouble (fields [2 ]) };
182188 } catch (final IOException e ) {
183189 if (logger .isDebugEnabled ()) {
184190 logger .debug ("error reading /proc/loadavg" , e );
@@ -192,7 +198,7 @@ final double[] getSystemLoadAverage() {
192198 }
193199 try {
194200 final double oneMinuteLoadAverage = (double ) getSystemLoadAverage .invoke (osMxBean );
195- return new double []{ oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1 , -1 , -1 };
201+ return new double [] { oneMinuteLoadAverage >= 0 ? oneMinuteLoadAverage : -1 , -1 , -1 };
196202 } catch (IllegalAccessException | InvocationTargetException e ) {
197203 if (logger .isDebugEnabled ()) {
198204 logger .debug ("error reading one minute load average from operating system" , e );
@@ -318,6 +324,23 @@ String readSysFsCgroupCpuAcctCpuAcctUsage(final String controlGroup) throws IOEx
318324 return readSingleLine (PathUtils .get ("/sys/fs/cgroup/cpuacct" , controlGroup , "cpuacct.usage" ));
319325 }
320326
327+ private long [] getCgroupV2CpuLimit (String controlGroup ) throws IOException {
328+ String entry = readCgroupV2CpuLimit (controlGroup );
329+ String [] parts = entry .split ("\\ s+" );
330+ assert parts .length == 2 : "Expected 2 fields in [cpu.max]" ;
331+
332+ long [] values = new long [2 ];
333+
334+ values [0 ] = "max" .equals (parts [0 ]) ? -1L : Long .parseLong (parts [0 ]);
335+ values [1 ] = Long .parseLong (parts [1 ]);
336+ return values ;
337+ }
338+
339+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/cpu.max" )
340+ String readCgroupV2CpuLimit (String controlGroup ) throws IOException {
341+ return readSingleLine (PathUtils .get ("/sys/fs/cgroup/" , controlGroup , "cpu.max" ));
342+ }
343+
321344 /**
322345 * The total period of time in microseconds for how frequently the Elasticsearch control group's access to CPU resources will be
323346 * reallocated.
@@ -454,6 +477,35 @@ String readSysFsCgroupMemoryLimitInBytes(final String controlGroup) throws IOExc
454477 return readSingleLine (PathUtils .get ("/sys/fs/cgroup/memory" , controlGroup , "memory.limit_in_bytes" ));
455478 }
456479
480+ /**
481+ * The maximum amount of user memory (including file cache).
482+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
483+ * unsigned 64 bit number, and this will overflow a long, hence the result type is <code>String</code>.
484+ * (The alternative would have been <code>BigInteger</code> but then it would not be possible to index
485+ * the OS stats document into Elasticsearch without losing information, as <code>BigInteger</code> is
486+ * not a supported Elasticsearch type.)
487+ *
488+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
489+ * @return the maximum amount of user memory (including file cache)
490+ * @throws IOException if an I/O exception occurs reading {@code memory.limit_in_bytes} for the control group
491+ */
492+ private String getCgroupV2MemoryLimitInBytes (final String controlGroup ) throws IOException {
493+ return readSysFsCgroupV2MemoryLimitInBytes (controlGroup );
494+ }
495+
496+ /**
497+ * Returns the line from {@code memory.max} for the control group to which the Elasticsearch process belongs for the
498+ * {@code memory} subsystem. This line represents the maximum amount of user memory (including file cache).
499+ *
500+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
501+ * @return the line from {@code memory.max}
502+ * @throws IOException if an I/O exception occurs reading {@code memory.max} for the control group
503+ */
504+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/memory.max" )
505+ String readSysFsCgroupV2MemoryLimitInBytes (final String controlGroup ) throws IOException {
506+ return readSingleLine (PathUtils .get ("/sys/fs/cgroup/" , controlGroup , "memory.max" ));
507+ }
508+
457509 /**
458510 * The total current memory usage by processes in the cgroup (in bytes).
459511 * If there is no limit then some Linux versions return the maximum value that can be stored in an
@@ -483,27 +535,87 @@ String readSysFsCgroupMemoryUsageInBytes(final String controlGroup) throws IOExc
483535 return readSingleLine (PathUtils .get ("/sys/fs/cgroup/memory" , controlGroup , "memory.usage_in_bytes" ));
484536 }
485537
538+ /**
539+ * The total current memory usage by processes in the cgroup (in bytes).
540+ * If there is no limit then some Linux versions return the maximum value that can be stored in an
541+ * unsigned 64 bit number, and this will overflow a long, hence the result type is <code>String</code>.
542+ * (The alternative would have been <code>BigInteger</code> but then it would not be possible to index
543+ * the OS stats document into Elasticsearch without losing information, as <code>BigInteger</code> is
544+ * not a supported Elasticsearch type.)
545+ *
546+ * @param controlGroup the control group for the Elasticsearch process for the {@code memory} subsystem
547+ * @return the total current memory usage by processes in the cgroup (in bytes)
548+ * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
549+ */
550+ private String getCgroupV2MemoryUsageInBytes (final String controlGroup ) throws IOException {
551+ return readSysFsCgroupV2MemoryUsageInBytes (controlGroup );
552+ }
553+
554+ /**
555+ * Returns the line from {@code memory.current} for the control group to which the Elasticsearch process belongs for the
556+ * {@code memory} subsystem. This line represents the total current memory usage by processes in the cgroup (in bytes).
557+ *
558+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
559+ * @return the line from {@code memory.current}
560+ * @throws IOException if an I/O exception occurs reading {@code memory.current} for the control group
561+ */
562+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/memory.current" )
563+ String readSysFsCgroupV2MemoryUsageInBytes (final String controlGroup ) throws IOException {
564+ return readSingleLine (PathUtils .get ("/sys/fs/cgroup/" , controlGroup , "memory.current" ));
565+ }
566+
486567 /**
487568 * Checks if cgroup stats are available by checking for the existence of {@code /proc/self/cgroup}, {@code /sys/fs/cgroup/cpu},
488569 * {@code /sys/fs/cgroup/cpuacct} and {@code /sys/fs/cgroup/memory}.
489570 *
490571 * @return {@code true} if the stats are available, otherwise {@code false}
491572 */
492573 @ SuppressForbidden (reason = "access /proc/self/cgroup, /sys/fs/cgroup/cpu, /sys/fs/cgroup/cpuacct and /sys/fs/cgroup/memory" )
493- boolean areCgroupStatsAvailable () {
574+ boolean areCgroupStatsAvailable () throws IOException {
494575 if (Files .exists (PathUtils .get ("/proc/self/cgroup" )) == false ) {
495576 return false ;
496577 }
497- if (Files .exists (PathUtils .get ("/sys/fs/cgroup/cpu" )) == false ) {
498- return false ;
499- }
500- if (Files .exists (PathUtils .get ("/sys/fs/cgroup/cpuacct" )) == false ) {
501- return false ;
578+
579+ List <String > lines = readProcSelfCgroup ();
580+
581+ // cgroup v2
582+ if (lines .size () == 1 && lines .get (0 ).startsWith ("0::" )) {
583+ return Stream .of ("/sys/fs/cgroup/cpu.stat" , "/sys/fs/cgroup/memory.stat" ).allMatch (path -> Files .exists (PathUtils .get (path )));
502584 }
503- if (Files .exists (PathUtils .get ("/sys/fs/cgroup/memory" )) == false ) {
504- return false ;
585+
586+ return Stream .of ("/sys/fs/cgroup/cpu" , "/sys/fs/cgroup/cpuacct" , "/sys/fs/cgroup/memory" )
587+ .allMatch (path -> Files .exists (PathUtils .get (path )));
588+ }
589+
590+ /**
591+ * The CPU statistics for all tasks in the Elasticsearch control group.
592+ *
593+ * @param controlGroup the control group to which the Elasticsearch process belongs for the {@code memory} subsystem
594+ * @return the CPU statistics
595+ * @throws IOException if an I/O exception occurs reading {@code cpu.stat} for the control group
596+ */
597+ private Map <String , Long > getCgroupV2CpuStats (String controlGroup ) throws IOException {
598+ final List <String > lines = readCgroupV2CpuStats (controlGroup );
599+ final Map <String , Long > stats = new HashMap <>();
600+
601+ for (String line : lines ) {
602+ String [] parts = line .split ("\\ s+" );
603+ assert parts .length == 2 : "Corrupt cpu.stat line: [" + line + "]" ;
604+ stats .put (parts [0 ], Long .parseLong (parts [1 ]));
505605 }
506- return true ;
606+
607+ final List <String > expectedKeys = List .of ("nr_periods" , "nr_throttled" , "system_usec" , "throttled_usec" , "usage_usec" , "user_usec" );
608+ expectedKeys .forEach (key -> {
609+ assert stats .containsKey (key ) : key ;
610+ assert stats .get (key ) != -1 : stats .get (key );
611+ });
612+
613+ return stats ;
614+ }
615+
616+ @ SuppressForbidden (reason = "access /sys/fs/cgroup/cpu.stat" )
617+ List <String > readCgroupV2CpuStats (final String controlGroup ) throws IOException {
618+ return Files .readAllLines (PathUtils .get ("/sys/fs/cgroup" , controlGroup , "cpu.stat" ));
507619 }
508620
509621 /**
@@ -515,45 +627,79 @@ private OsStats.Cgroup getCgroup() {
515627 try {
516628 if (areCgroupStatsAvailable () == false ) {
517629 return null ;
518- } else {
519- final Map <String , String > controllerMap = getControlGroups ();
520- assert controllerMap .isEmpty () == false ;
630+ }
631+
632+ final Map <String , String > controllerMap = getControlGroups ();
633+ assert controllerMap .isEmpty () == false ;
521634
522- final String cpuAcctControlGroup = controllerMap .get ("cpuacct" );
635+ final String cpuAcctControlGroup ;
636+ final long cgroupCpuAcctUsageNanos ;
637+ final long cgroupCpuAcctCpuCfsPeriodMicros ;
638+ final long cgroupCpuAcctCpuCfsQuotaMicros ;
639+ final String cpuControlGroup ;
640+ final OsStats .Cgroup .CpuStat cpuStat ;
641+ final String memoryControlGroup ;
642+ final String cgroupMemoryLimitInBytes ;
643+ final String cgroupMemoryUsageInBytes ;
644+
645+ if (controllerMap .size () == 1 && controllerMap .containsKey ("" )) {
646+ // There's a single hierarchy for all controllers
647+ cpuControlGroup = cpuAcctControlGroup = memoryControlGroup = controllerMap .get ("" );
648+
649+ // `cpuacct` was merged with `cpu` in v2
650+ final Map <String , Long > cpuStatsMap = getCgroupV2CpuStats (cpuControlGroup );
651+
652+ cgroupCpuAcctUsageNanos = cpuStatsMap .get ("usage_usec" );
653+
654+ long [] cpuLimits = getCgroupV2CpuLimit (cpuControlGroup );
655+ cgroupCpuAcctCpuCfsQuotaMicros = cpuLimits [0 ];
656+ cgroupCpuAcctCpuCfsPeriodMicros = cpuLimits [1 ];
657+
658+ cpuStat = new OsStats .Cgroup .CpuStat (
659+ cpuStatsMap .get ("nr_periods" ),
660+ cpuStatsMap .get ("nr_throttled" ),
661+ cpuStatsMap .get ("throttled_usec" )
662+ );
663+
664+ cgroupMemoryLimitInBytes = getCgroupV2MemoryLimitInBytes (memoryControlGroup );
665+ cgroupMemoryUsageInBytes = getCgroupV2MemoryUsageInBytes (memoryControlGroup );
666+ } else {
667+ cpuAcctControlGroup = controllerMap .get ("cpuacct" );
523668 if (cpuAcctControlGroup == null ) {
524669 logger .debug ("no [cpuacct] data found in cgroup stats" );
525670 return null ;
526671 }
527- final long cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos (cpuAcctControlGroup );
672+ cgroupCpuAcctUsageNanos = getCgroupCpuAcctUsageNanos (cpuAcctControlGroup );
528673
529- final String cpuControlGroup = controllerMap .get ("cpu" );
674+ cpuControlGroup = controllerMap .get ("cpu" );
530675 if (cpuControlGroup == null ) {
531676 logger .debug ("no [cpu] data found in cgroup stats" );
532677 return null ;
533678 }
534- final long cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros (cpuControlGroup );
535- final long cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros (cpuControlGroup );
536- final OsStats . Cgroup . CpuStat cpuStat = getCgroupCpuAcctCpuStat (cpuControlGroup );
679+ cgroupCpuAcctCpuCfsPeriodMicros = getCgroupCpuAcctCpuCfsPeriodMicros (cpuControlGroup );
680+ cgroupCpuAcctCpuCfsQuotaMicros = getCgroupCpuAcctCpuCfsQuotaMicros (cpuControlGroup );
681+ cpuStat = getCgroupCpuAcctCpuStat (cpuControlGroup );
537682
538- final String memoryControlGroup = controllerMap .get ("memory" );
683+ memoryControlGroup = controllerMap .get ("memory" );
539684 if (memoryControlGroup == null ) {
540685 logger .debug ("no [memory] data found in cgroup stats" );
541686 return null ;
542687 }
543- final String cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes (memoryControlGroup );
544- final String cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes (memoryControlGroup );
545-
546- return new OsStats .Cgroup (
547- cpuAcctControlGroup ,
548- cgroupCpuAcctUsageNanos ,
549- cpuControlGroup ,
550- cgroupCpuAcctCpuCfsPeriodMicros ,
551- cgroupCpuAcctCpuCfsQuotaMicros ,
552- cpuStat ,
553- memoryControlGroup ,
554- cgroupMemoryLimitInBytes ,
555- cgroupMemoryUsageInBytes );
688+ cgroupMemoryLimitInBytes = getCgroupMemoryLimitInBytes (memoryControlGroup );
689+ cgroupMemoryUsageInBytes = getCgroupMemoryUsageInBytes (memoryControlGroup );
556690 }
691+
692+ return new OsStats .Cgroup (
693+ cpuAcctControlGroup ,
694+ cgroupCpuAcctUsageNanos ,
695+ cpuControlGroup ,
696+ cgroupCpuAcctCpuCfsPeriodMicros ,
697+ cgroupCpuAcctCpuCfsQuotaMicros ,
698+ cpuStat ,
699+ memoryControlGroup ,
700+ cgroupMemoryLimitInBytes ,
701+ cgroupMemoryUsageInBytes
702+ );
557703 } catch (final IOException e ) {
558704 logger .debug ("error reading control group stats" , e );
559705 return null ;
@@ -576,13 +722,14 @@ public static OsProbe getInstance() {
576722
577723 OsInfo osInfo (long refreshInterval , int allocatedProcessors ) throws IOException {
578724 return new OsInfo (
579- refreshInterval ,
580- Runtime .getRuntime ().availableProcessors (),
581- allocatedProcessors ,
582- Constants .OS_NAME ,
583- getPrettyName (),
584- Constants .OS_ARCH ,
585- Constants .OS_VERSION );
725+ refreshInterval ,
726+ Runtime .getRuntime ().availableProcessors (),
727+ allocatedProcessors ,
728+ Constants .OS_NAME ,
729+ getPrettyName (),
730+ Constants .OS_ARCH ,
731+ Constants .OS_VERSION
732+ );
586733 }
587734
588735 private String getPrettyName () throws IOException {
@@ -594,11 +741,13 @@ private String getPrettyName() throws IOException {
594741 * wrapped in single- or double-quotes.
595742 */
596743 final List <String > etcOsReleaseLines = readOsRelease ();
597- final List <String > prettyNameLines =
598- etcOsReleaseLines .stream ().filter (line -> line .startsWith ("PRETTY_NAME" )).collect (Collectors .toList ());
744+ final List <String > prettyNameLines = etcOsReleaseLines .stream ()
745+ .filter (line -> line .startsWith ("PRETTY_NAME" ))
746+ .collect (Collectors .toList ());
599747 assert prettyNameLines .size () <= 1 : prettyNameLines ;
600- final Optional <String > maybePrettyNameLine =
601- prettyNameLines .size () == 1 ? Optional .of (prettyNameLines .get (0 )) : Optional .empty ();
748+ final Optional <String > maybePrettyNameLine = prettyNameLines .size () == 1
749+ ? Optional .of (prettyNameLines .get (0 ))
750+ : Optional .empty ();
602751 if (maybePrettyNameLine .isPresent ()) {
603752 // we trim since some OS contain trailing space, for example, Oracle Linux Server 6.9 has a trailing space after the quote
604753 final String trimmedPrettyNameLine = maybePrettyNameLine .get ().trim ();
@@ -695,11 +844,15 @@ boolean isDebian8() throws IOException {
695844 return Constants .LINUX && getPrettyName ().equals ("Debian GNU/Linux 8 (jessie)" );
696845 }
697846
847+ OsStats .Cgroup getCgroup (boolean isLinux ) {
848+ return isLinux ? getCgroup () : null ;
849+ }
850+
698851 public OsStats osStats () {
699852 final OsStats .Cpu cpu = new OsStats .Cpu (getSystemCpuPercent (), getSystemLoadAverage ());
700853 final OsStats .Mem mem = new OsStats .Mem (getTotalPhysicalMemorySize (), getFreePhysicalMemorySize ());
701854 final OsStats .Swap swap = new OsStats .Swap (getTotalSwapSpaceSize (), getFreeSwapSpaceSize ());
702- final OsStats .Cgroup cgroup = Constants .LINUX ? getCgroup () : null ;
855+ final OsStats .Cgroup cgroup = getCgroup ( Constants .LINUX ) ;
703856 return new OsStats (System .currentTimeMillis (), cpu , mem , swap , cgroup );
704857 }
705858
0 commit comments