Skip to content

Commit 5cc7c44

Browse files
markmchmellor
andauthored
[Metrics] Add Prometheus counters for Model FLOPs Utilization (MFU) (#30950)
Export the existing Model FLOPs Utilization (MFU) metrics via Prometheus. `--enable-mfu-metrics` is required for these to be exposed. Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Mark McLoughlin <markmc@redhat.com>
1 parent b95bb69 commit 5cc7c44

5 files changed

Lines changed: 109 additions & 1 deletion

File tree

docs/mkdocs/hooks/generate_metrics.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"path": "vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py",
2323
"output": "nixl_connector.inc.md",
2424
},
25+
{"path": "vllm/v1/metrics/perf.py", "output": "perf.inc.md"},
2526
]
2627

2728

docs/usage/metrics.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ The following metrics are exposed:
4545

4646
--8<-- "docs/generated/metrics/nixl_connector.inc.md"
4747

48+
## Model Flops Utilization (MFU) Performance Metrics
49+
50+
These metrics are available via `--enable-mfu-metrics`:
51+
52+
--8<-- "docs/generated/metrics/perf.inc.md"
53+
4854
## Deprecation Policy
4955

5056
Note: when metrics are deprecated in version `X.Y`, they are hidden in version `X.Y+1`

vllm/v1/metrics/loggers.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
from vllm.logger import init_logger
2020
from vllm.plugins import STAT_LOGGER_PLUGINS_GROUP, load_plugins_by_group
2121
from vllm.v1.engine import FinishReason
22-
from vllm.v1.metrics.perf import PerfMetricsLogging
22+
from vllm.v1.metrics.perf import PerfMetricsLogging, PerfMetricsProm
2323
from vllm.v1.metrics.prometheus import unregister_vllm_metrics
2424
from vllm.v1.metrics.stats import (
2525
CachingMetrics,
@@ -392,6 +392,7 @@ class PrometheusStatLogger(AggregateStatLoggerBase):
392392
_histogram_cls = Histogram
393393
_spec_decoding_cls = SpecDecodingProm
394394
_kv_connector_cls = KVConnectorPrometheus
395+
_perf_metrics_cls = PerfMetricsProm
395396

396397
def __init__(
397398
self, vllm_config: VllmConfig, engine_indexes: list[int] | None = None
@@ -424,6 +425,9 @@ def __init__(
424425
self.kv_connector_prom = self._kv_connector_cls(
425426
vllm_config, labelnames, per_engine_labelvalues
426427
)
428+
self.perf_metrics_prom = self._perf_metrics_cls(
429+
vllm_config, labelnames, per_engine_labelvalues
430+
)
427431

428432
#
429433
# Scheduler state
@@ -1065,6 +1069,9 @@ def record(
10651069
scheduler_stats.kv_connector_stats, engine_idx
10661070
)
10671071

1072+
if scheduler_stats.perf_stats is not None:
1073+
self.perf_metrics_prom.observe(scheduler_stats.perf_stats, engine_idx)
1074+
10681075
if (
10691076
self.kv_cache_metrics_enabled
10701077
and scheduler_stats.kv_cache_eviction_events

vllm/v1/metrics/perf.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from dataclasses import asdict, dataclass
1414
from typing import Any, Protocol
1515

16+
import prometheus_client
1617
import torch
1718
from pydantic import BaseModel, Field, ValidationError, model_validator
1819
from typing_extensions import Self
@@ -1233,6 +1234,87 @@ def log(self, log_fn=logger.info, log_prefix: str = "") -> None:
12331234
self.reset()
12341235

12351236

1237+
#### Prometheus Integration ####
1238+
1239+
1240+
class PerfMetricsProm:
1241+
"""Record performance metrics in Prometheus.
1242+
1243+
Average TFLOPS (tera floating-point operations per second) can be
1244+
calculated using a PromQL query:
1245+
1246+
rate(vllm:estimated_flops_per_gpu_total[1m]) / 1e12
1247+
1248+
Average memory bandwidth in GB/s can be calculated using:
1249+
1250+
(rate(vllm:estimated_read_bytes_per_gpu_total[1m]) +
1251+
rate(vllm:estimated_write_bytes_per_gpu_total[1m])) / 1e9
1252+
"""
1253+
1254+
_counter_cls = prometheus_client.Counter
1255+
1256+
def __init__(
1257+
self,
1258+
vllm_config: VllmConfig,
1259+
labelnames: list[str],
1260+
per_engine_labelvalues: dict[int, list[object]],
1261+
):
1262+
counter_flops = self._counter_cls(
1263+
name="vllm:estimated_flops_per_gpu_total",
1264+
documentation=(
1265+
"Estimated number of floating point operations per GPU "
1266+
"(for Model Flops Utilization calculations)."
1267+
),
1268+
labelnames=labelnames,
1269+
)
1270+
self.counter_flops = make_per_engine(counter_flops, per_engine_labelvalues)
1271+
1272+
counter_read_bytes = self._counter_cls(
1273+
name="vllm:estimated_read_bytes_per_gpu_total",
1274+
documentation=(
1275+
"Estimated number of bytes read from memory per GPU "
1276+
"(for Model Flops Utilization calculations)."
1277+
),
1278+
labelnames=labelnames,
1279+
)
1280+
self.counter_read_bytes = make_per_engine(
1281+
counter_read_bytes, per_engine_labelvalues
1282+
)
1283+
1284+
counter_write_bytes = self._counter_cls(
1285+
name="vllm:estimated_write_bytes_per_gpu_total",
1286+
documentation=(
1287+
"Estimated number of bytes written to memory per GPU "
1288+
"(for Model Flops Utilization calculations)."
1289+
),
1290+
labelnames=labelnames,
1291+
)
1292+
self.counter_write_bytes = make_per_engine(
1293+
counter_write_bytes, per_engine_labelvalues
1294+
)
1295+
1296+
def observe(self, perf_stats: PerfStats, engine_idx: int = 0):
1297+
if not (
1298+
perf_stats.num_flops_per_gpu
1299+
or perf_stats.num_read_bytes_per_gpu
1300+
or perf_stats.num_write_bytes_per_gpu
1301+
):
1302+
return
1303+
self.counter_flops[engine_idx].inc(perf_stats.num_flops_per_gpu)
1304+
self.counter_read_bytes[engine_idx].inc(perf_stats.num_read_bytes_per_gpu)
1305+
self.counter_write_bytes[engine_idx].inc(perf_stats.num_write_bytes_per_gpu)
1306+
1307+
1308+
def make_per_engine(
1309+
counter: prometheus_client.Counter, per_engine_labelvalues: dict[int, list[object]]
1310+
):
1311+
"""Create a counter for each label value."""
1312+
return {
1313+
idx: counter.labels(*labelvalues)
1314+
for idx, labelvalues in per_engine_labelvalues.items()
1315+
}
1316+
1317+
12361318
## util functions
12371319

12381320

vllm/v1/metrics/ray_wrappers.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import KVConnectorPrometheus
66
from vllm.v1.metrics.loggers import PrometheusStatLogger
7+
from vllm.v1.metrics.perf import PerfMetricsProm
78
from vllm.v1.spec_decode.metrics import SpecDecodingProm
89

910
try:
@@ -179,6 +180,16 @@ class RayKVConnectorPrometheus(KVConnectorPrometheus):
179180
_histogram_cls = RayHistogramWrapper
180181

181182

183+
class RayPerfMetricsProm(PerfMetricsProm):
184+
"""
185+
RayPerfMetricsProm is used by RayMetrics to log Ray
186+
metrics. Provides the same MFU metrics as PerfMetricsProm
187+
uses Ray's util.metrics library.
188+
"""
189+
190+
_counter_cls = RayCounterWrapper
191+
192+
182193
class RayPrometheusStatLogger(PrometheusStatLogger):
183194
"""RayPrometheusStatLogger uses Ray metrics instead."""
184195

@@ -187,6 +198,7 @@ class RayPrometheusStatLogger(PrometheusStatLogger):
187198
_histogram_cls = RayHistogramWrapper
188199
_spec_decoding_cls = RaySpecDecodingProm
189200
_kv_connector_cls = RayKVConnectorPrometheus
201+
_perf_metrics_cls = RayPerfMetricsProm
190202

191203
@staticmethod
192204
def _unregister_vllm_metrics():

0 commit comments

Comments
 (0)