Skip to content

Commit 67aca0e

Browse files
Merge branch 'dev' into mp-optimize-save
2 parents 9772504 + 0c4f03d commit 67aca0e

47 files changed

Lines changed: 5035 additions & 179 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

csrc/storage_backends/fs/connector.cpp

Lines changed: 37 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -27,43 +27,59 @@ std::string FSConnector::replace_all(const std::string& str,
2727

2828
std::string FSConnector::key_to_filename(const std::string& key) {
2929
// Input key format (from _object_key_to_string):
30-
// model_name@kv_rank_hex@chunk_hash_hex
31-
// e.g. "meta-llama/Llama-3@0000002a@deadbeef..."
30+
// Unsalted: <model_name>@<kv_rank_hex>@<chunk_hash_hex>
31+
// Salted : <model_name>@<kv_rank_hex>@<chunk_hash_hex>@<cache_salt>
3232
//
33-
// Output filename (matching _object_key_to_filename):
34-
// model_name_safe@0xkv_rank_hex@chunk_hash_hex.data
35-
// e.g. "meta-llama-SEP-Llama-3@0x0000002a@deadbeef....data"
36-
37-
// Split from the right on '@' to get exactly 3 parts
38-
size_t last_sep = key.rfind(KEY_SEP);
39-
if (last_sep == std::string::npos) {
40-
return key + FILE_EXT;
33+
// Output filename (matching fs_l2_adapter.py._object_key_to_filename):
34+
// Unsalted: <model_name_safe>@0x<kv_rank_hex>@<chunk_hash_hex>.data
35+
// Salted :
36+
// <model_name_safe>@0x<kv_rank_hex>@<chunk_hash_hex>@<cache_salt>.data
37+
//
38+
// The unsalted 3-field shape is bit-identical to the pre-cache_salt
39+
// format, so existing cache directories remain valid.
40+
//
41+
// NOTE: both model_name and cache_salt are forbidden from containing
42+
// '@' (invariant enforced on the Python side), so splitting on '@'
43+
// is unambiguous — no marker, no rsplit.
44+
45+
// Split on '@' — must yield 3 (unsalted) or 4 (salted) fields.
46+
std::vector<std::string> parts;
47+
size_t start = 0;
48+
for (size_t pos = 0; pos <= key.size(); ++pos) {
49+
if (pos == key.size() || key[pos] == KEY_SEP) {
50+
parts.emplace_back(key.substr(start, pos - start));
51+
start = pos + 1;
52+
}
4153
}
42-
size_t second_sep = key.rfind(KEY_SEP, last_sep - 1);
43-
if (second_sep == std::string::npos) {
44-
return key + FILE_EXT;
54+
if (parts.size() != 3 && parts.size() != 4) {
55+
throw std::runtime_error(
56+
"FSConnector: malformed key (expected 3 or 4 '@'-separated fields): " +
57+
key);
4558
}
4659

47-
std::string model_name = key.substr(0, second_sep);
48-
std::string kv_rank_hex =
49-
key.substr(second_sep + 1, last_sep - second_sep - 1);
50-
std::string chunk_hash = key.substr(last_sep + 1);
60+
const std::string& model_name = parts[0];
61+
const std::string& kv_rank_hex = parts[1];
62+
const std::string& chunk_hash = parts[2];
63+
const std::string cache_salt = parts.size() == 4 ? parts[3] : std::string();
5164

5265
// Replace '/' with '-SEP-' for filesystem safety
5366
std::string safe_model = replace_all(model_name, "/", PATH_SLASH_REPLACEMENT);
5467

55-
// Rebuild with 0x prefix to match Python's {kv_rank:#010x}
56-
// Input kv_rank_hex is 8 hex chars (e.g. "0000002a")
57-
// Output needs to be "0x0000002a"
68+
// Emit filename. Salt is appended at the tail so the unsalted shape
69+
// matches what older builds wrote to disk.
5870
std::string result;
5971
result.reserve(safe_model.size() + kv_rank_hex.size() + chunk_hash.size() +
60-
32);
72+
cache_salt.size() + 32);
6173
result += safe_model;
6274
result += KEY_SEP;
6375
result += "0x";
6476
result += kv_rank_hex;
6577
result += KEY_SEP;
6678
result += chunk_hash;
79+
if (!cache_salt.empty()) {
80+
result += KEY_SEP;
81+
result += cache_salt;
82+
}
6783
result += FILE_EXT;
6884
return result;
6985
}

csrc/storage_backends/fs/connector.h

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,16 +50,19 @@ class FSConnector : public ConnectorBase<WorkerFSConn> {
5050

5151
private:
5252
// Build the filesystem-safe filename from a serialized key string.
53-
// The key string has the format: model_name@kv_rank_hex@chunk_hash_hex
54-
// Output filename: model_name_safe@0xkv_rank_hex@chunk_hash_hex.data
5553
//
56-
// NOTE: the key string from NativeConnectorL2Adapter uses
57-
// _object_key_to_string: "{model}@{kv_rank:08x}@{hash.hex()}"
58-
// while fs_l2_adapter.py uses _object_key_to_filename:
59-
// "{safe_model}@{kv_rank:#010x}@{hash.hex()}.data"
54+
// Input key (from NativeConnectorL2Adapter._object_key_to_string):
55+
// Unsalted: "{model}@{kv_rank:08x}@{hash.hex()}"
56+
// Salted : "{model}@{kv_rank:08x}@{hash.hex()}@{cache_salt}"
6057
//
61-
// The difference is the 0x prefix in kv_rank. We handle
62-
// this by re-encoding here to match the Python FS layout.
58+
// Output filename (matching fs_l2_adapter.py._object_key_to_filename):
59+
// Unsalted: "{safe_model}@{kv_rank:#010x}@{hash.hex()}.data"
60+
// Salted : "{safe_model}@{kv_rank:#010x}@{hash.hex()}@{cache_salt}.data"
61+
//
62+
// Differences from the input: '/' in model becomes '-SEP-', kv_rank
63+
// gains a '0x' prefix, and '.data' is appended. Both model_name and
64+
// cache_salt are forbidden from containing '@' (enforced on the
65+
// Python side), so the parse is unambiguous.
6366
static std::string key_to_filename(const std::string& key);
6467

6568
static std::string replace_all(const std::string& str,

docs/design/v1/distributed/l2_adapters/l2_per_user_quota.md

Lines changed: 39 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ vLLM API Server
2424
│ sends cache_salt directly on IPCCacheEngineKey
2525
2626
LMCache MP Server
27-
reads key.cache_salt
28-
ipc_key_to_object_keys(..., cache_salt=key.cache_salt)
27+
ipc_key_to_object_keys(key, chunk_hashes)
28+
reads key.cache_salt internally and propagates
2929
3030
ObjectKey(chunk_hash, model_name, kv_rank, cache_salt="alice")
3131
│ ▲
@@ -152,7 +152,8 @@ Worker path (STORE/RETRIEVE): │
152152
→ IPCCacheEngineKey(cache_salt="alice", ...) ──STORE──► MP Server
153153
154154
key.cache_salt = "alice"
155-
ipc_key_to_object_keys(..., cache_salt="alice")
155+
ipc_key_to_object_keys(key, hashes)
156+
— reads key.cache_salt
156157
→ ObjectKey(cache_salt="alice", ...)
157158
```
158159

@@ -228,7 +229,7 @@ GET /api/quota List all quotas and per-user usage
228229

229230
**`_default` sentinel:** Empty strings cannot be URL path parameters. Use
230231
`_default` as the `cache_salt` in the URL to refer to the `cache_salt=""`
231-
namespace (legacy/anonymous traffic). For example,
232+
namespace (anonymous / un-isolated traffic). For example,
232233
`PUT /api/quota/_default` sets the quota for `cache_salt=""`.
233234

234235
**`PUT /api/quota/{cache_salt}`** — Set or update a user's quota.
@@ -287,22 +288,20 @@ See the **Configuration** section for the full JSON example.
287288
**File:** `lmcache/v1/distributed/api.py`
288289

289290
Add `cache_salt: str = ""` to `ObjectKey` (as shown in section 1).
290-
Add `cache_salt: str = ""` parameter to `ipc_key_to_object_keys()` and
291-
pass it through to each constructed `ObjectKey`.
291+
`ipc_key_to_object_keys()` reads `ipc_key.cache_salt` directly and
292+
propagates it to each constructed `ObjectKey` — no separate parameter,
293+
so callers cannot accidentally drop the salt.
292294

293-
### 2. Server — Pass `cache_salt` through to ObjectKeys
295+
### 2. Server — `cache_salt` is carried by `IPCCacheEngineKey`
294296

295297
**File:** `lmcache/v1/multiprocess/server.py`
296298

297299
Since both the scheduler and worker adapters set `cache_salt` on
298-
`IPCCacheEngineKey`, the server simply reads `key.cache_salt` directly in
299-
all code paths. No session-based fallback is needed.
300-
301-
In `MPCacheEngine.store()`, `MPCacheEngine.retrieve()`, and
302-
`MPCacheEngine.lookup()`:
300+
`IPCCacheEngineKey`, the server simply calls `ipc_key_to_object_keys(...)`
301+
and the salt flows through automatically:
303302

304303
```python
305-
obj_keys = ipc_key_to_object_keys(key, chunk_hashes, cache_salt=key.cache_salt)
304+
obj_keys = ipc_key_to_object_keys(key, chunk_hashes)
306305
```
307306

308307
**`session.py` is unchanged** — no `cache_salt` field needed on `Session`.
@@ -717,45 +716,36 @@ curl -X DELETE http://localhost:8000/api/quota/alice
717716
curl http://localhost:8000/api/quota
718717
```
719718

720-
## Backward Compatibility
719+
## Behavioral Notes
721720

722721
- **No cache_salt from API:** When the API caller doesn't set `cache_salt`,
723722
`request.cache_salt` is `None`, which maps to `cache_salt=""`.
724-
`IPCCacheEngineKey.cache_salt` defaults to `""`. All keys share the same
725-
(empty-user) namespace — exactly like today's behavior.
723+
`IPCCacheEngineKey.cache_salt` defaults to `""`. All such keys share the
724+
same (anonymous) namespace.
726725
- **`eviction_policy: "LRU"`:** Per-user quota logic is not active. The
727726
watermark is applied against aggregate capacity as before. Existing
728727
behavior is fully unchanged.
729-
- **ObjectKey equality change:** Adding `cache_salt` to ObjectKey identity IS a
730-
behavioral change, but since `cache_salt` defaults to `""`, all existing keys
731-
(with no cache_salt) remain equal to each other. Only when cache_salt is
732-
actively set do keys diverge. Existing tests that construct
733-
`ObjectKey(hash, model, rank)` continue to work — the 3-arg form uses
734-
`cache_salt=""` by default.
735-
- **Serialization — what if an adapter doesn't update?** Each adapter uses
736-
ObjectKey differently as a storage key:
737-
738-
| Adapter | How ObjectKey is used as storage key | Impact |
739-
|---------|-------------------------------------|--------|
740-
| `MockL2Adapter` | Python dict key (`dict[ObjectKey, ...]`) | **No change needed.** `__hash__` includes `cache_salt` automatically. With `cache_salt=""` (LRU mode), hashes are unchanged from today. |
741-
| `NixlStoreL2Adapter` | Python dict key (`dict[ObjectKey, ...]`) | **No change needed.** Same as mock. |
742-
| `NativeConnectorL2Adapter` | Explicit string serialization via `_object_key_to_string()`: `"{model}@{kv_rank}@{hash}"` | **Must update** to include `cache_salt` for UserLRU. Without the update, different users' keys serialize to the same string → storage collision. |
743-
744-
**With regular `LRU` policy (no cache_salt set):** All keys have `cache_salt=""`.
745-
Even if `_object_key_to_string()` is not updated, there are no collisions
746-
because all keys share the same empty cache_salt. **Adapters work unchanged.**
747-
748-
**With `UserLRU` policy (cache_salt set):** Adapters with explicit string
749-
serialization (currently only `NativeConnectorL2Adapter`) must include
750-
`cache_salt` in the serialized form, e.g.:
728+
- **ObjectKey equality:** `cache_salt` is part of identity (eq/hash). Two
729+
`ObjectKey`s with different salts are distinct, preventing cross-user
730+
collisions. With the default `cache_salt=""` all un-salted traffic
731+
hashes identically (unchanged from the pre-cache_salt adapter).
732+
- **Serialization format — trailing salt:** ``cache_salt`` is appended as
733+
a 4th field when non-empty; unsalted keys use the 3-field shape, which
734+
is bit-identical to the pre-cache_salt format:
735+
751736
```python
752737
def _object_key_to_string(key: ObjectKey) -> str:
738+
base = f"{key.model_name}@{key.kv_rank:08x}@{key.chunk_hash.hex()}"
753739
if key.cache_salt:
754-
return f"{key.cache_salt}@{key.model_name}@{key.kv_rank:08x}@{key.chunk_hash.hex()}"
755-
return f"{key.model_name}@{key.kv_rank:08x}@{key.chunk_hash.hex()}"
740+
return f"{base}@{key.cache_salt}"
741+
return base
756742
```
757-
The empty-cache_salt branch preserves the existing format for backward
758-
compatibility with data already stored in Redis/FS.
743+
744+
Because un-salted wire keys and filenames are unchanged, existing
745+
cache directories and remote stores need **no migration**. Parsers
746+
split on ``@`` and dispatch by field count (3 vs 4); both
747+
``model_name`` and ``cache_salt`` are forbidden from containing
748+
``@`` so the parse is unambiguous.
759749

760750
- **Listener interface:** `L2AdapterListener` method signatures are unchanged.
761751
`cache_salt` flows through `ObjectKey.cache_salt`, not through callback
@@ -804,13 +794,13 @@ it through. Update serialization. No behavioral change with `cache_salt=""`.
804794

805795
| File | Change |
806796
|------|--------|
807-
| `lmcache/v1/distributed/api.py` | `cache_salt: str = ""` on `ObjectKey`; `cache_salt` param on `ipc_key_to_object_keys()` |
808-
| `lmcache/v1/multiprocess/custom_types.py` | `cache_salt: str = ""` on `IPCCacheEngineKey` (appended at end); update `no_worker_id_version()`, `from_token_ids()` |
809-
| `lmcache/v1/multiprocess/server.py` | Pass `key.cache_salt` to `ipc_key_to_object_keys()` in all handlers |
810-
| `lmcache/v1/multiprocess/blend_server_v2.py` | Same for all 4 call sites |
811-
| `lmcache/v1/distributed/l2_adapters/native_connector_l2_adapter.py` | Update `_object_key_to_string()` |
812-
| `lmcache/v1/distributed/l2_adapters/fs_l2_adapter.py` | Update `_object_key_to_filename()` / `_filename_to_object_key()` |
813-
| `csrc/storage_backends/fs/connector.cpp` | Update `key_to_filename()` parser |
797+
| `lmcache/v1/distributed/api.py` | `cache_salt: str = ""` on `ObjectKey` with `__post_init__` validation (`@`, `/`, `\`, NUL, length cap on `cache_salt`; `@` rejected on `model_name`); `ipc_key_to_object_keys()` reads `ipc_key.cache_salt` directly (no separate param) |
798+
| `lmcache/v1/multiprocess/custom_types.py` | `cache_salt: str = ""` on `IPCCacheEngineKey` with `__post_init__` validation; update `no_worker_id_version()`, `from_token_ids()` |
799+
| `lmcache/v1/multiprocess/server.py` / `blend_server_v2.py` | No code changes — existing `ipc_key_to_object_keys(key, chunk_hashes)` calls now carry salt automatically |
800+
| `lmcache/integration/vllm/vllm_multi_process_adapter.py` | Scheduler + worker `_create_key()` now forward `cache_salt` to `IPCCacheEngineKey` |
801+
| `lmcache/v1/distributed/l2_adapters/native_connector_l2_adapter.py` | `_object_key_to_string()` appends trailing `@<cache_salt>` when salted; un-salted output is unchanged |
802+
| `lmcache/v1/distributed/l2_adapters/fs_l2_adapter.py` | `_object_key_to_filename()` / `_filename_to_object_key()` accept 3-field (unsalted) or 4-field (salted) shapes |
803+
| `csrc/storage_backends/fs/connector.cpp` | `key_to_filename()` splits on `@` and dispatches by field count |
814804

815805
### PR3 — LMCache: Adapter interface refactor (LMCache repo)
816806

0 commit comments

Comments
 (0)