[ROCm] Make bare-host ROCm install self-sufficient

Shaoting-Feng · claude · Shaoting-Feng · commit f4448ca997b4 · 2026-04-17T20:38:34.000Z
Moves GPU-vendor-specific runtime deps out of common.txt into
requirements/cuda_core.txt and requirements/rocm_core.txt. setup.py
reads common.txt plus whichever core file matches BUILD_WITH_HIP so
`pip install -e .` Just Works on both CUDA and ROCm hosts.

- Drop cupy-cuda12x and nixl from common.txt (both are CUDA-only on PyPI;
  the nixl meta-package unconditionally pulls nixl-cu12, which installs
  nixl_ep/ and breaks ROCm runtime).
- cuda.txt now -r cuda_core.txt so Dockerfile's `pip install -r cuda.txt`
  still pulls the same set.
- Remove the [tool.setuptools.dynamic] dependencies block from
  pyproject.toml; install_requires is driven by setup.py now.
- Add a second "Without vLLM docker base image" subsection to the
  ROCm install docs, mirroring the CUDA from-source flow line-for-line
  (uv venv -&gt; -r build.txt -&gt; torch from ROCm wheel index -&gt; build).
  The existing rocm/vllm-dev flow stays as-is.

Co-Authored-By: Claude Opus 4.7 (1M context) &lt;noreply@anthropic.com&gt;
Signed-off-by: Shaoting Feng &lt;shaotingf@uchicago.edu&gt;
diff --git a/docs/source/getting_started/installation.rst b/docs/source/getting_started/installation.rst
@@ -203,10 +203,13 @@ You can get the nightly build of latest code of LMcache and vLLM as follows:
 
 
 LMCache on ROCm
-------------------
+---------------
+
+With vLLM docker base image
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Get started through using vLLM docker image as base image
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The `AMD Infinity hub <https://hub.docker.com/r/rocm/vllm-dev>`__ for vLLM offers a prebuilt, optimized docker image designed for validating inference performance on the AMD Instinct™ MI300X accelerator.
 The image is based on the latest vLLM v1. Please check `LLM inference performance validation on AMD Instinct MI300X <https://rocm.docs.amd.com/en/latest/how-to/rocm-for-ai/inference/benchmark-docker/vllm.html?model=pyt_vllm_llama-3.1-8b>`__ for instructions on how to use this prebuilt docker image.
@@ -235,7 +238,7 @@ As of the date of writing, the steps are validated on the following environment:
     bash
 
 Install Latest LMCache from Source for ROCm
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 To install from source, clone the repository and install in editable mode.
 
@@ -255,4 +258,37 @@ Example on MI300X (gfx942):
     TORCH_DONT_CHECK_COMPILER_ABI=1 \
     CXX=hipcc \
     BUILD_WITH_HIP=1 \
-    python3 -m pip install --no-build-isolation -e .
+    python3 -m pip install --no-build-isolation -e .
+
+
+On a bare ROCm host 
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Install Latest LMCache from Source for ROCm
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To install from source on a bare ROCm host (no ``rocm/vllm-dev`` base image),
+torch must be installed from the ROCm wheel index before building LMCache.
+This mirrors the CUDA from-source flow above, with the ROCm wheel index and
+HIP build flags in place of their CUDA equivalents.
+
+.. code-block:: bash
+
+    git clone https://github.com/LMCache/LMCache.git
+    cd LMCache
+
+    uv venv --python 3.12
+    source .venv/bin/activate
+
+    # Need to install these packages manually to avoid build isolation
+    uv pip install -r requirements/build.txt
+
+    # Install torch from the ROCm wheel index
+    uv pip install torch torchvision --index-url https://download.pytorch.org/whl/rocm7.0
+
+    # Build LMCache. BUILD_WITH_HIP=1 makes setup.py pick cupy-rocm-7-0 automatically.
+    PYTORCH_ROCM_ARCH="gfx942" \
+    TORCH_DONT_CHECK_COMPILER_ABI=1 \
+    CXX=hipcc \
+    BUILD_WITH_HIP=1 \
+    uv pip install -e . --no-build-isolation
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,9 +57,6 @@ version_file = "lmcache/_version.py"
 # do not include +gREV local version, required for Test PyPI upload
 local_scheme = "no-local-version"
 
-[tool.setuptools.dynamic]
-dependencies = { file = ["requirements/common.txt"] }
-
 [tool.setuptools.packages.find]
 where = [""]
 include = ["lmcache", "lmcache*"]
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -7,8 +7,6 @@ cufile-python
 fastapi
 httpx
 msgspec
-# if nixl decides to support >=3.13 in the future, we can remove this constraint
-nixl; python_version < "3.13"
 # nixl uses numba which requires numpy<=2.2.6
 numpy<=2.2.6
 numba
@@ -42,5 +40,3 @@ torch
 transformers >= 4.51.1
 uvicorn
 httptools
-# Right now we are using cuda 12.x to align with serving engines
-cupy-cuda12x
diff --git a/requirements/cuda.txt b/requirements/cuda.txt
@@ -1,5 +1,7 @@
 # Common project dependencies
 -r common.txt
+# Vendor-specific runtime deps (cupy, nixl) baked into install_requires
+-r cuda_core.txt
 
 # Dependencies for NVIDIA GPUs
 ray >= 2.9
diff --git a/requirements/cuda_core.txt b/requirements/cuda_core.txt
@@ -0,0 +1,9 @@
+# Vendor-specific runtime deps baked into install_requires by setup.py
+# when building for CUDA (i.e. BUILD_WITH_HIP is unset).
+# Kept separate from cuda.txt so `pip install -e .` stays lightweight
+# (no ray/xformers/torchvision) while Docker's `pip install -r cuda.txt`
+# still pulls these through the -r chain.
+
+cupy-cuda12x
+# nixl on PyPI is a meta-package that pulls nixl-cu12 (CUDA-only).
+nixl; python_version < "3.13"
diff --git a/requirements/rocm_core.txt b/requirements/rocm_core.txt
@@ -0,0 +1,9 @@
+# Vendor-specific runtime deps baked into install_requires by setup.py
+# when building for ROCm (BUILD_WITH_HIP=1).
+# cupy-rocm-7-0 is the AMD analogue of cupy-cuda12x.
+# torch/torchvision are NOT listed here because ROCm wheels live on a
+# non-PyPI index (https://download.pytorch.org/whl/rocm7.0) that pip
+# cannot be told about via install_requires; users install them manually
+# per the "LMCache on ROCm / Without vLLM docker base image" docs.
+
+cupy-rocm-7-0
diff --git a/setup.py b/setup.py
@@ -21,6 +21,15 @@
 ENABLE_CXX11_ABI = os.environ.get("ENABLE_CXX11_ABI", "1") == "1"
 
 
+def _read_requirements(path: Path) -> list[str]:
+    reqs: list[str] = []
+    for raw in path.read_text().splitlines():
+        line = raw.strip()
+        if line and not line.startswith("#"):
+            reqs.append(line)
+    return reqs
+
+
 def hipify_wrapper() -> None:
     # Third Party
     from torch.utils.hipify.hipify_python import hipify
@@ -299,11 +308,16 @@ def source_dist_extension() -> tuple[list, dict]:
 
     ext_modules, cmdclass = get_extension()
 
+    install_requires = _read_requirements(ROOT_DIR / "requirements" / "common.txt")
+    core_file = "rocm_core.txt" if BUILD_WITH_HIP else "cuda_core.txt"
+    install_requires += _read_requirements(ROOT_DIR / "requirements" / core_file)
+
     setup(
         packages=find_packages(
             exclude=("csrc",)
         ),  # Ensure csrc is excluded if it only contains sources
         ext_modules=ext_modules,
         cmdclass=cmdclass,
         include_package_data=True,
+        install_requires=install_requires,
     )