LMCache
diff --git a/‎csrc/storage_backends/fs/connector.cpp‎
Lines changed: 37 additions & 21 deletions b/‎csrc/storage_backends/fs/connector.cpp‎
Lines changed: 37 additions & 21 deletions
diff --git a/‎csrc/storage_backends/fs/connector.h‎
Lines changed: 11 additions & 8 deletions b/‎csrc/storage_backends/fs/connector.h‎
Lines changed: 11 additions & 8 deletions
diff --git a/‎docs/design/v1/distributed/l2_adapters/l2_per_user_quota.md‎
Lines changed: 39 additions & 49 deletions b/‎docs/design/v1/distributed/l2_adapters/l2_per_user_quota.md‎
Lines changed: 39 additions & 49 deletions
@@ -27,43 +27,59 @@ std::string FSConnector::replace_all(const std::string& str,
 
 std::string FSConnector::key_to_filename(const std::string& key) {
   // Input key format (from _object_key_to_string):
-  //   model_name@kv_rank_hex@chunk_hash_hex
-  //   e.g. "meta-llama/Llama-3@0000002a@deadbeef..."
+  //   Unsalted: <model_name>@<kv_rank_hex>@<chunk_hash_hex>
+  //   Salted  : <model_name>@<kv_rank_hex>@<chunk_hash_hex>@<cache_salt>
   //
-  // Output filename (matching _object_key_to_filename):
-  //   model_name_safe@0xkv_rank_hex@chunk_hash_hex.data
-  //   e.g. "meta-llama-SEP-Llama-3@0x0000002a@deadbeef....data"
-
-  // Split from the right on '@' to get exactly 3 parts
-  size_t last_sep = key.rfind(KEY_SEP);
-  if (last_sep == std::string::npos) {
-    return key + FILE_EXT;
+  // Output filename (matching fs_l2_adapter.py._object_key_to_filename):
+  //   Unsalted: <model_name_safe>@0x<kv_rank_hex>@<chunk_hash_hex>.data
+  //   Salted  :
+  //   <model_name_safe>@0x<kv_rank_hex>@<chunk_hash_hex>@<cache_salt>.data
+  //
+  // The unsalted 3-field shape is bit-identical to the pre-cache_salt
+  // format, so existing cache directories remain valid.
+  //
+  // NOTE: both model_name and cache_salt are forbidden from containing
+  // '@' (invariant enforced on the Python side), so splitting on '@'
+  // is unambiguous — no marker, no rsplit.
+
+  // Split on '@' — must yield 3 (unsalted) or 4 (salted) fields.
+  std::vector<std::string> parts;
+  size_t start = 0;
+  for (size_t pos = 0; pos <= key.size(); ++pos) {
+    if (pos == key.size() || key[pos] == KEY_SEP) {
+      parts.emplace_back(key.substr(start, pos - start));
+      start = pos + 1;
+    }
   }
-  size_t second_sep = key.rfind(KEY_SEP, last_sep - 1);
-  if (second_sep == std::string::npos) {
-    return key + FILE_EXT;
+  if (parts.size() != 3 && parts.size() != 4) {
+    throw std::runtime_error(
+        "FSConnector: malformed key (expected 3 or 4 '@'-separated fields): " +
+        key);
   }
 
-  std::string model_name = key.substr(0, second_sep);
-  std::string kv_rank_hex =
-      key.substr(second_sep + 1, last_sep - second_sep - 1);
-  std::string chunk_hash = key.substr(last_sep + 1);
+  const std::string& model_name = parts[0];
+  const std::string& kv_rank_hex = parts[1];
+  const std::string& chunk_hash = parts[2];
+  const std::string cache_salt = parts.size() == 4 ? parts[3] : std::string();
 
   // Replace '/' with '-SEP-' for filesystem safety
   std::string safe_model = replace_all(model_name, "/", PATH_SLASH_REPLACEMENT);
 
-  // Rebuild with 0x prefix to match Python's {kv_rank:#010x}
-  // Input kv_rank_hex is 8 hex chars (e.g. "0000002a")
-  // Output needs to be "0x0000002a"
+  // Emit filename. Salt is appended at the tail so the unsalted shape
+  // matches what older builds wrote to disk.
   std::string result;
   result.reserve(safe_model.size() + kv_rank_hex.size() + chunk_hash.size() +
-                 32);
+                 cache_salt.size() + 32);
   result += safe_model;
   result += KEY_SEP;
   result += "0x";
   result += kv_rank_hex;
   result += KEY_SEP;
   result += chunk_hash;
+  if (!cache_salt.empty()) {
+    result += KEY_SEP;
+    result += cache_salt;
+  }
   result += FILE_EXT;
   return result;
 }
 
@@ -50,16 +50,19 @@ class FSConnector : public ConnectorBase<WorkerFSConn> {
 
  private:
   // Build the filesystem-safe filename from a serialized key string.
-  // The key string has the format: model_name@kv_rank_hex@chunk_hash_hex
-  // Output filename: model_name_safe@0xkv_rank_hex@chunk_hash_hex.data
   //
-  // NOTE: the key string from NativeConnectorL2Adapter uses
-  //   _object_key_to_string: "{model}@{kv_rank:08x}@{hash.hex()}"
-  // while fs_l2_adapter.py uses _object_key_to_filename:
-  //   "{safe_model}@{kv_rank:#010x}@{hash.hex()}.data"
+  // Input key (from NativeConnectorL2Adapter._object_key_to_string):
+  //   Unsalted: "{model}@{kv_rank:08x}@{hash.hex()}"
+  //   Salted  : "{model}@{kv_rank:08x}@{hash.hex()}@{cache_salt}"
   //
-  // The difference is the 0x prefix in kv_rank. We handle
-  // this by re-encoding here to match the Python FS layout.
+  // Output filename (matching fs_l2_adapter.py._object_key_to_filename):
+  //   Unsalted: "{safe_model}@{kv_rank:#010x}@{hash.hex()}.data"
+  //   Salted  : "{safe_model}@{kv_rank:#010x}@{hash.hex()}@{cache_salt}.data"
+  //
+  // Differences from the input: '/' in model becomes '-SEP-', kv_rank
+  // gains a '0x' prefix, and '.data' is appended. Both model_name and
+  // cache_salt are forbidden from containing '@' (enforced on the
+  // Python side), so the parse is unambiguous.
   static std::string key_to_filename(const std::string& key);
 
   static std::string replace_all(const std::string& str,
 
@@ -24,8 +24,8 @@ vLLM API Server
   │  sends cache_salt directly on IPCCacheEngineKey
   ▼
 LMCache MP Server
-  │  reads key.cache_salt
-  │  ipc_key_to_object_keys(..., cache_salt=key.cache_salt)
+  │  ipc_key_to_object_keys(key, chunk_hashes)
+  │    reads key.cache_salt internally and propagates
   ▼
 ObjectKey(chunk_hash, model_name, kv_rank, cache_salt="alice")
   │                                         ▲
@@ -152,7 +152,8 @@ Worker path (STORE/RETRIEVE):                              │
   → IPCCacheEngineKey(cache_salt="alice", ...) ──STORE──►  MP Server
                                                            │
                                               key.cache_salt = "alice"
-                                              ipc_key_to_object_keys(..., cache_salt="alice")
+                                              ipc_key_to_object_keys(key, hashes)
+                                                — reads key.cache_salt
                                               → ObjectKey(cache_salt="alice", ...)
 ```
 
@@ -228,7 +229,7 @@ GET    /api/quota                    List all quotas and per-user usage
 
 **`_default` sentinel:** Empty strings cannot be URL path parameters. Use
 `_default` as the `cache_salt` in the URL to refer to the `cache_salt=""`
-namespace (legacy/anonymous traffic). For example,
+namespace (anonymous / un-isolated traffic). For example,
 `PUT /api/quota/_default` sets the quota for `cache_salt=""`.
 
 **`PUT /api/quota/{cache_salt}`** — Set or update a user's quota.
@@ -287,22 +288,20 @@ See the **Configuration** section for the full JSON example.
 **File:** `lmcache/v1/distributed/api.py`
 
 Add `cache_salt: str = ""` to `ObjectKey` (as shown in section 1).
-Add `cache_salt: str = ""` parameter to `ipc_key_to_object_keys()` and
-pass it through to each constructed `ObjectKey`.
+`ipc_key_to_object_keys()` reads `ipc_key.cache_salt` directly and
+propagates it to each constructed `ObjectKey` — no separate parameter,
+so callers cannot accidentally drop the salt.
 
-### 2. Server — Pass `cache_salt` through to ObjectKeys
+### 2. Server — `cache_salt` is carried by `IPCCacheEngineKey`
 
 **File:** `lmcache/v1/multiprocess/server.py`
 
 Since both the scheduler and worker adapters set `cache_salt` on
-`IPCCacheEngineKey`, the server simply reads `key.cache_salt` directly in
-all code paths. No session-based fallback is needed.
-
-In `MPCacheEngine.store()`, `MPCacheEngine.retrieve()`, and
-`MPCacheEngine.lookup()`:
+`IPCCacheEngineKey`, the server simply calls `ipc_key_to_object_keys(...)`
+and the salt flows through automatically:
 
 ```python
-obj_keys = ipc_key_to_object_keys(key, chunk_hashes, cache_salt=key.cache_salt)
+obj_keys = ipc_key_to_object_keys(key, chunk_hashes)
 ```
 
 **`session.py` is unchanged** — no `cache_salt` field needed on `Session`.
@@ -717,45 +716,36 @@ curl -X DELETE http://localhost:8000/api/quota/alice
 curl http://localhost:8000/api/quota
 ```
 
-## Backward Compatibility
+## Behavioral Notes
 
 - **No cache_salt from API:** When the API caller doesn't set `cache_salt`,
   `request.cache_salt` is `None`, which maps to `cache_salt=""`.
-  `IPCCacheEngineKey.cache_salt` defaults to `""`. All keys share the same
-  (empty-user) namespace — exactly like today's behavior.
+  `IPCCacheEngineKey.cache_salt` defaults to `""`. All such keys share the
+  same (anonymous) namespace.
 - **`eviction_policy: "LRU"`:** Per-user quota logic is not active. The
   watermark is applied against aggregate capacity as before. Existing
   behavior is fully unchanged.
-- **ObjectKey equality change:** Adding `cache_salt` to ObjectKey identity IS a
-  behavioral change, but since `cache_salt` defaults to `""`, all existing keys
-  (with no cache_salt) remain equal to each other. Only when cache_salt is
-  actively set do keys diverge. Existing tests that construct
-  `ObjectKey(hash, model, rank)` continue to work — the 3-arg form uses
-  `cache_salt=""` by default.
-- **Serialization — what if an adapter doesn't update?** Each adapter uses
-  ObjectKey differently as a storage key:
-
-  | Adapter | How ObjectKey is used as storage key | Impact |
-  |---------|-------------------------------------|--------|
-  | `MockL2Adapter` | Python dict key (`dict[ObjectKey, ...]`) | **No change needed.** `__hash__` includes `cache_salt` automatically. With `cache_salt=""` (LRU mode), hashes are unchanged from today. |
-  | `NixlStoreL2Adapter` | Python dict key (`dict[ObjectKey, ...]`) | **No change needed.** Same as mock. |
-  | `NativeConnectorL2Adapter` | Explicit string serialization via `_object_key_to_string()`: `"{model}@{kv_rank}@{hash}"` | **Must update** to include `cache_salt` for UserLRU. Without the update, different users' keys serialize to the same string → storage collision. |
-
-  **With regular `LRU` policy (no cache_salt set):** All keys have `cache_salt=""`.
-  Even if `_object_key_to_string()` is not updated, there are no collisions
-  because all keys share the same empty cache_salt. **Adapters work unchanged.**
-
-  **With `UserLRU` policy (cache_salt set):** Adapters with explicit string
-  serialization (currently only `NativeConnectorL2Adapter`) must include
-  `cache_salt` in the serialized form, e.g.:
+- **ObjectKey equality:** `cache_salt` is part of identity (eq/hash). Two
+  `ObjectKey`s with different salts are distinct, preventing cross-user
+  collisions. With the default `cache_salt=""` all un-salted traffic
+  hashes identically (unchanged from the pre-cache_salt adapter).
+- **Serialization format — trailing salt:** ``cache_salt`` is appended as
+  a 4th field when non-empty; unsalted keys use the 3-field shape, which
+  is bit-identical to the pre-cache_salt format:
+
   ```python
   def _object_key_to_string(key: ObjectKey) -> str:
+      base = f"{key.model_name}@{key.kv_rank:08x}@{key.chunk_hash.hex()}"
       if key.cache_salt:
-          return f"{key.cache_salt}@{key.model_name}@{key.kv_rank:08x}@{key.chunk_hash.hex()}"
-      return f"{key.model_name}@{key.kv_rank:08x}@{key.chunk_hash.hex()}"
+          return f"{base}@{key.cache_salt}"
+      return base
   ```
-  The empty-cache_salt branch preserves the existing format for backward
-  compatibility with data already stored in Redis/FS.
+
+  Because un-salted wire keys and filenames are unchanged, existing
+  cache directories and remote stores need **no migration**. Parsers
+  split on ``@`` and dispatch by field count (3 vs 4); both
+  ``model_name`` and ``cache_salt`` are forbidden from containing
+  ``@`` so the parse is unambiguous.
 
 - **Listener interface:** `L2AdapterListener` method signatures are unchanged.
   `cache_salt` flows through `ObjectKey.cache_salt`, not through callback
@@ -804,13 +794,13 @@ it through. Update serialization. No behavioral change with `cache_salt=""`.
 
 | File | Change |
 |------|--------|
-| `lmcache/v1/distributed/api.py` | `cache_salt: str = ""` on `ObjectKey`; `cache_salt` param on `ipc_key_to_object_keys()` |
-| `lmcache/v1/multiprocess/custom_types.py` | `cache_salt: str = ""` on `IPCCacheEngineKey` (appended at end); update `no_worker_id_version()`, `from_token_ids()` |
-| `lmcache/v1/multiprocess/server.py` | Pass `key.cache_salt` to `ipc_key_to_object_keys()` in all handlers |
-| `lmcache/v1/multiprocess/blend_server_v2.py` | Same for all 4 call sites |
-| `lmcache/v1/distributed/l2_adapters/native_connector_l2_adapter.py` | Update `_object_key_to_string()` |
-| `lmcache/v1/distributed/l2_adapters/fs_l2_adapter.py` | Update `_object_key_to_filename()` / `_filename_to_object_key()` |
-| `csrc/storage_backends/fs/connector.cpp` | Update `key_to_filename()` parser |
+| `lmcache/v1/distributed/api.py` | `cache_salt: str = ""` on `ObjectKey` with `__post_init__` validation (`@`, `/`, `\`, NUL, length cap on `cache_salt`; `@` rejected on `model_name`); `ipc_key_to_object_keys()` reads `ipc_key.cache_salt` directly (no separate param) |
+| `lmcache/v1/multiprocess/custom_types.py` | `cache_salt: str = ""` on `IPCCacheEngineKey` with `__post_init__` validation; update `no_worker_id_version()`, `from_token_ids()` |
+| `lmcache/v1/multiprocess/server.py` / `blend_server_v2.py` | No code changes — existing `ipc_key_to_object_keys(key, chunk_hashes)` calls now carry salt automatically |
+| `lmcache/integration/vllm/vllm_multi_process_adapter.py` | Scheduler + worker `_create_key()` now forward `cache_salt` to `IPCCacheEngineKey` |
+| `lmcache/v1/distributed/l2_adapters/native_connector_l2_adapter.py` | `_object_key_to_string()` appends trailing `@<cache_salt>` when salted; un-salted output is unchanged |
+| `lmcache/v1/distributed/l2_adapters/fs_l2_adapter.py` | `_object_key_to_filename()` / `_filename_to_object_key()` accept 3-field (unsalted) or 4-field (salted) shapes |
+| `csrc/storage_backends/fs/connector.cpp` | `key_to_filename()` splits on `@` and dispatches by field count |
 
 ### PR3 — LMCache: Adapter interface refactor (LMCache repo)