[cli] Fix startup ImportError, remove heavy deps, clean up build config

deng451e · deng451e · commit fdaa8a8dc1de · 2026-04-10T06:04:15.000Z
- Make openai import lazy in bench/engine_bench (request_sender, config):
  any lmcache command (including --help) no longer crashes with
  ImportError when openai is not installed; error is deferred to
  lmcache bench engine invocation with a clear install hint
- Remove transformers from requirements/cli.txt (already a lazy optional
  import with try/except fallback in prompt.py)
- Revert lmcache/cli/request.py to dev (ttft_s = -1.0 when no token)
- pyproject_cli.toml: drop ninja (not needed for pure-Python wheel),
  relax setuptools to &gt;=68.0.0

Signed-off-by: deng451e &lt;838677410@qq.com&gt;
diff --git a/lmcache/cli/commands/bench/engine_bench/config.py b/lmcache/cli/commands/bench/engine_bench/config.py
@@ -9,9 +9,6 @@
 import urllib.error
 import urllib.request
 
-# Third Party
-from openai import OpenAI
-
 # First Party
 from lmcache.logging import init_logger
 
@@ -76,6 +73,15 @@ def auto_detect_model(engine_url: str) -> str:
     api_key = os.getenv("OPENAI_API_KEY", "sk-dummy")
     logger.debug("Auto-detecting model from %s/models", base_url)
 
+    try:
+        # Third Party
+        from openai import OpenAI  # noqa: PLC0415
+    except ImportError as e:
+        raise ImportError(
+            "The 'openai' package is required for 'lmcache bench'. "
+            "Install it with: pip install openai"
+        ) from e
+
     try:
         client = OpenAI(base_url=base_url, api_key=api_key)
         models = client.models.list()
diff --git a/lmcache/cli/commands/bench/engine_bench/request_sender.py b/lmcache/cli/commands/bench/engine_bench/request_sender.py
@@ -7,9 +7,6 @@
 import os
 import time
 
-# Third Party
-from openai import AsyncOpenAI
-
 # First Party
 from lmcache.cli.commands.bench.engine_bench.stats import RequestResult
 from lmcache.logging import init_logger
@@ -86,6 +83,15 @@ def __init__(
         else:
             logger.debug("API key source: OPENAI_API_KEY env var")
 
+        try:
+            # Third Party
+            from openai import AsyncOpenAI  # noqa: PLC0415
+        except ImportError as e:
+            raise ImportError(
+                "The 'openai' package is required for 'lmcache bench'. "
+                "Install it with: pip install openai"
+            ) from e
+
         self._client = AsyncOpenAI(
             base_url=base_url,
             api_key=api_key,
diff --git a/lmcache/cli/request.py b/lmcache/cli/request.py
@@ -202,9 +202,7 @@ def _stream(
     # Match V2RequestSender: server count if present, else max_tokens cap.
     num_generated = num_completion if num_completion > 0 else max_tokens
     if first_token_t is None:
-        # No non-empty content token observed (e.g. model emitted only EOS).
-        # Use total round-trip as a conservative TTFT approximation.
-        ttft_s = t1 - t0
+        ttft_s = -1.0
         decode_time = 0.0
     else:
         ttft_s = first_token_t - t0
diff --git a/pyproject_cli.toml b/pyproject_cli.toml
@@ -8,9 +8,8 @@
 # Thus, we will still lock a torch version here because we can choose to release wheels
 # in sync with vllm and update our torch version accordingly
 requires = [
-    "ninja",
     "packaging>=24.2",
-    "setuptools>=77.0.3,<81.0.0",
+    "setuptools>=68.0.0",
     "setuptools_scm>=8",
     "wheel",
 ]
diff --git a/requirements/cli.txt b/requirements/cli.txt
@@ -1 +0,0 @@
-transformers>=4.51.1