feat(gds): add gds_path_sharding config for multi-path strategy

glimchb · Devin · glimchb · commit 2a90190b1469 · 2026-04-02T09:54:47.000-04:00
Add a top-level `gds_path_sharding` config field (default: "by_gpu") that controls how GPUs are assigned to storage paths when multiple comma-separated paths are provided in `gds_path`. This replaces the previously hardcoded by_gpu logic with an explicit, extensible setting. Currently only "by_gpu" is supported (selects path via `device_id % num_paths`); unsupported values raise AssertionError. Generated with [Devin](https://cli.devin.ai/docs) Co-Authored-By: Devin <noreply@cognition.ai> Signed-off-by: Boris Glimcher <Boris.Glimcher@emc.com>
diff --git a/docs/source/api_reference/configurations.rst b/docs/source/api_reference/configurations.rst
@@ -341,7 +341,10 @@ Settings for different storage backends and paths.
      - Description
    * - gds_path
      - LMCACHE_GDS_PATH
-     - Path for GDS backend. Supports comma-separated paths for multi-device I/O (e.g. ``/mnt/nvme0/cache,/mnt/nvme1/cache``). Each GPU selects a path via ``device_id % num_paths``.
+     - Path for GDS backend. Supports comma-separated paths for multi-device I/O (e.g. ``/mnt/nvme0/cache,/mnt/nvme1/cache``). See ``gds_path_sharding`` for how paths are assigned to GPUs.
+   * - gds_path_sharding
+     - LMCACHE_GDS_PATH_SHARDING
+     - Strategy for selecting a path when multiple paths are provided. Currently only ``"by_gpu"`` is supported, which selects paths based on GPU device ID (default: "by_gpu").
    * - cufile_buffer_size
      - LMCACHE_CUFILE_BUFFER_SIZE
      - Buffer size for cuFile/hipFile operations
diff --git a/docs/source/kv_cache/storage_backends/gds.rst b/docs/source/kv_cache/storage_backends/gds.rst
@@ -52,9 +52,11 @@ Multi-Path (Multi-Device) Support
 ---------------------------------
 
 When a system has multiple NVMe drives, you can distribute GDS I/O across them
-by specifying a comma-separated list of paths in ``gds_path``. Each GPU worker
-automatically selects one path based on its device index (``device_id % num_paths``),
-so traffic is spread evenly across the drives without any manual pinning.
+by specifying a comma-separated list of paths in ``gds_path``. The
+``gds_path_sharding`` option controls how each GPU worker selects its path.
+Currently only ``"by_gpu"`` is supported (the default), which selects a path
+based on the device index (``device_id % num_paths``), so traffic is spread
+evenly across the drives without any manual pinning.
 
 **Why this helps:** a single PCIe Gen 4 x4 NVMe tops out at ~7 GB/s. With four
 drives the aggregate bandwidth can reach ~28 GB/s, matching what multi-GPU
@@ -65,12 +67,14 @@ systems need for KV cache eviction and prefetch.
 .. code-block:: bash
 
     export LMCACHE_GDS_PATH="/mnt/nvme0/cache,/mnt/nvme1/cache,/mnt/nvme2/cache,/mnt/nvme3/cache"
+    export LMCACHE_GDS_PATH_SHARDING="by_gpu"
 
 **YAML config:**
 
 .. code-block:: yaml
 
     gds_path: "/mnt/nvme0/cache,/mnt/nvme1/cache,/mnt/nvme2/cache,/mnt/nvme3/cache"
+    gds_path_sharding: "by_gpu"
 
 With the above configuration on a 4-GPU node:
 
diff --git a/lmcache/v1/config.py b/lmcache/v1/config.py
@@ -231,6 +231,11 @@
     },
     # Storage paths
     "gds_path": {"type": Optional[str], "default": None, "env_converter": str},
+    "gds_path_sharding": {
+        "type": str,
+        "default": "by_gpu",
+        "env_converter": str,
+    },
     "cufile_buffer_size": {
         "type": Optional[int],
         "default": None,
diff --git a/lmcache/v1/storage_backend/gds_backend.py b/lmcache/v1/storage_backend/gds_backend.py
@@ -202,11 +202,15 @@ def __init__(
         assert config.gds_path is not None, "Need to specify gds_path for GdsBackend"
 
         # Multi-path support: parse comma-separated paths and select one
-        # based on GPU device ID (by_gpu sharding, like NIXL PR #2418).
+        # based on the configured sharding strategy.
         self.gds_paths = [p.strip() for p in config.gds_path.split(",") if p.strip()]
         assert len(self.gds_paths) > 0, "gds_path cannot be empty"
 
-        # TODO: next patch we can add additional sharding strategies
+        self.gds_path_sharding = config.gds_path_sharding
+        assert self.gds_path_sharding == "by_gpu", (
+            f"Unsupported gds_path_sharding '{self.gds_path_sharding}'. "
+            "Only 'by_gpu' is supported currently."
+        )
         self.gds_path = self.gds_paths[device_id % len(self.gds_paths)]
         self.fstype = get_fstype(self.gds_path)
 
diff --git a/tests/v1/storage_backend/test_gds_backend.py b/tests/v1/storage_backend/test_gds_backend.py
@@ -30,10 +30,11 @@
 from tests.v1.utils import create_test_memory_obj, has_cufile, has_hipfile
 
 
-def create_test_config(gds_path: str):
+def create_test_config(gds_path: str, gds_path_sharding: str = "by_gpu"):
     config = LMCacheEngineConfig.from_defaults(
         chunk_size=256,
         gds_path=gds_path,
+        gds_path_sharding=gds_path_sharding,
         lmcache_instance_id="test_instance",
         cufile_buffer_size=256,
         extra_config={"use_direct_io": True},
@@ -492,7 +493,12 @@ class TestGdsMultiPath:
     """
 
     @staticmethod
-    def _make_backend(gds_path: str, dst_device: str, async_loop):
+    def _make_backend(
+        gds_path: str,
+        dst_device: str,
+        async_loop,
+        gds_path_sharding: str = "by_gpu",
+    ):
         """Create a GdsBackend with mocked allocator and fstype.
 
         Mocks are used so the tests run without cuFile / real NVMe.
@@ -505,7 +511,7 @@ def __init__(self):
             def close(self):
                 pass
 
-        config = create_test_config(gds_path)
+        config = create_test_config(gds_path, gds_path_sharding=gds_path_sharding)
         metadata = create_test_metadata()
         with (
             mock.patch(
@@ -755,3 +761,34 @@ def test_try_to_read_metadata_finds_across_all_paths(self, async_loop):
         finally:
             for p in paths:
                 shutil.rmtree(p, ignore_errors=True)
+
+    def test_gds_path_sharding_default(self, temp_gds_path, async_loop):
+        """Default gds_path_sharding is 'by_gpu'."""
+        backend = self._make_backend(temp_gds_path, "cuda:0", async_loop)
+        try:
+            assert backend.gds_path_sharding == "by_gpu"
+        finally:
+            backend.close()
+
+    def test_gds_path_sharding_explicit_by_gpu(self, temp_gds_path, async_loop):
+        """Explicitly setting gds_path_sharding='by_gpu' works."""
+        backend = self._make_backend(
+            temp_gds_path,
+            "cuda:0",
+            async_loop,
+            gds_path_sharding="by_gpu",
+        )
+        try:
+            assert backend.gds_path_sharding == "by_gpu"
+        finally:
+            backend.close()
+
+    def test_gds_path_sharding_unsupported_raises(self, temp_gds_path, async_loop):
+        """Unsupported gds_path_sharding value raises AssertionError."""
+        with pytest.raises(AssertionError, match="Unsupported gds_path_sharding"):
+            self._make_backend(
+                temp_gds_path,
+                "cuda:0",
+                async_loop,
+                gds_path_sharding="round_robin",
+            )