Skip to content

Commit c59f18b

Browse files
committed
[Refactor] Rename per_cache_salt → by_cache_salt for clarity
The `per_*` prefix chained with a multi-word identifier produces long, awkward names (`per_cache_salt_bytes`, `_per_cache_salt_size_bytes`). Switch to `by_cache_salt` as the grouping-dimension qualifier: - AdapterUsage.per_cache_salt_bytes → AdapterUsage.bytes_by_cache_salt - L2AdapterInterface._per_cache_salt_size_bytes → _bytes_by_cache_salt - _EMPTY_PER_CACHE_SALT → _EMPTY_BY_CACHE_SALT No behavior change. Tests and the design doc are updated in lockstep. Signed-off-by: royyhuang <roy.y.huang@gmail.com>
1 parent 31a27e5 commit c59f18b

5 files changed

Lines changed: 55 additions & 55 deletions

File tree

docs/design/v1/distributed/l2_adapters/l2_per_user_quota.md

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ L1 Manager → StoreController → L2 Adapter
3737
│ │
3838
│ _notify_keys_stored(keys, sizes)
3939
│ → base class updates _total_bytes_used
40-
│ and _per_cache_salt_size_bytes
40+
│ and _bytes_by_cache_salt
4141
4242
Listeners: L2EvictionPolicy bridge → UserLRUEvictionPolicy
4343
┌──────────────────────┐
@@ -49,7 +49,7 @@ L2EvictionController (every 1s):
4949
for each adapter state:
5050
usage = adapter.get_usage() → AdapterUsage dataclass
5151
if policy.is_user_level:
52-
for cache_salt, bytes in usage.per_cache_salt_bytes:
52+
for cache_salt, bytes in usage.bytes_by_cache_salt:
5353
if bytes > watermark * quota(cache_salt):
5454
policy.get_eviction_actions(ratio, cache_salt=cache_salt)
5555
else:
@@ -399,7 +399,7 @@ class AdapterUsage:
399399
total_capacity_bytes: int
400400
"""Adapter's maximum capacity. 0 means unknown/unlimited."""
401401

402-
per_cache_salt_bytes: dict[str, int]
402+
bytes_by_cache_salt: dict[str, int]
403403
"""Bytes used per cache_salt. Only entries with positive usage."""
404404

405405
@property
@@ -418,7 +418,7 @@ class L2AdapterInterface(ABC):
418418
self._listeners: list[L2AdapterListener] = []
419419
self._max_capacity_bytes = max_capacity_bytes
420420
self._total_bytes_used: int = 0
421-
self._per_cache_salt_size_bytes: dict[str, int] = {}
421+
self._bytes_by_cache_salt: dict[str, int] = {}
422422
self._usage_lock = threading.Lock()
423423

424424
@property
@@ -437,8 +437,8 @@ class L2AdapterInterface(ABC):
437437
with self._usage_lock:
438438
for key, size in zip(keys, sizes, strict=True):
439439
self._total_bytes_used += size
440-
self._per_cache_salt_size_bytes[key.cache_salt] = (
441-
self._per_cache_salt_size_bytes.get(key.cache_salt, 0) + size
440+
self._bytes_by_cache_salt[key.cache_salt] = (
441+
self._bytes_by_cache_salt.get(key.cache_salt, 0) + size
442442
)
443443
for listener in self._listeners:
444444
listener.on_l2_keys_stored(keys)
@@ -449,8 +449,8 @@ class L2AdapterInterface(ABC):
449449
with self._usage_lock:
450450
for key, size in zip(keys, sizes, strict=True):
451451
self._total_bytes_used -= size
452-
self._per_cache_salt_size_bytes[key.cache_salt] = (
453-
self._per_cache_salt_size_bytes.get(key.cache_salt, 0) - size
452+
self._bytes_by_cache_salt[key.cache_salt] = (
453+
self._bytes_by_cache_salt.get(key.cache_salt, 0) - size
454454
)
455455
for listener in self._listeners:
456456
listener.on_l2_keys_deleted(keys)
@@ -460,8 +460,8 @@ class L2AdapterInterface(ABC):
460460
return AdapterUsage(
461461
total_bytes_used=self._total_bytes_used,
462462
total_capacity_bytes=self._max_capacity_bytes,
463-
per_cache_salt_bytes={
464-
k: v for k, v in self._per_cache_salt_size_bytes.items()
463+
bytes_by_cache_salt={
464+
k: v for k, v in self._bytes_by_cache_salt.items()
465465
if v > 0
466466
},
467467
)
@@ -653,7 +653,7 @@ class L2EvictionController(StorageControllerInterface):
653653

654654
if policy.is_user_level and self._quota_manager:
655655
# Per-user watermark check
656-
for cache_salt, user_bytes in usage.per_cache_salt_bytes.items():
656+
for cache_salt, user_bytes in usage.bytes_by_cache_salt.items():
657657
limit = self._quota_manager.get_limit_bytes(cache_salt)
658658
if user_bytes <= watermark * limit:
659659
continue
@@ -673,7 +673,7 @@ class L2EvictionController(StorageControllerInterface):
673673
```
674674

675675
The controller uses `policy.is_user_level` (not `isinstance`) to branch:
676-
- **`is_user_level=True`**: reads `usage.per_cache_salt_bytes`, checks each user
676+
- **`is_user_level=True`**: reads `usage.bytes_by_cache_salt`, checks each user
677677
against `watermark * quota`. Unregistered users (quota=0) get ratio=1.0.
678678
- **`is_user_level=False`**: reads `usage.usage_fraction` for global check.
679679

@@ -841,7 +841,7 @@ The feature PR. Depends on PR1a + PR1b + PR2 + PR3 + PR4.
841841
| `lmcache/v1/distributed/eviction_policy/__init__.py` | Export `UserLRUEvictionPolicy` |
842842
| `lmcache/v1/distributed/config.py` | Add `"UserLRU"` to literal |
843843
| `lmcache/v1/distributed/l2_adapters/config.py` | Add `"UserLRU"` to allowed values |
844-
| `lmcache/v1/distributed/storage_controllers/eviction_controller.py` | `QuotaManager`; per-user branch using `is_user_level` + `per_cache_salt_bytes` |
844+
| `lmcache/v1/distributed/storage_controllers/eviction_controller.py` | `QuotaManager`; per-user branch using `is_user_level` + `bytes_by_cache_salt` |
845845
| `lmcache/v1/distributed/storage_manager.py` | Create `QuotaManager`; wire to controller + HTTP |
846846
| `lmcache/v1/multiprocess/http_server.py` | Quota CRUD endpoints |
847847
| `tests/v1/distributed/test_user_lru_eviction_policy.py` (new) | Unit tests |

lmcache/v1/distributed/l2_adapters/base.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -28,21 +28,21 @@
2828
L2TaskId = int
2929

3030

31-
_EMPTY_PER_CACHE_SALT: Mapping[str, int] = MappingProxyType({})
31+
_EMPTY_BY_CACHE_SALT: Mapping[str, int] = MappingProxyType({})
3232

3333

3434
@dataclass(frozen=True)
3535
class AdapterUsage:
3636
"""Unified usage report for an L2 adapter.
3737
3838
Replaces the old ``tuple[float, float]`` return shape with a structured
39-
record that exposes both aggregate and per-cache_salt byte counts.
39+
record that exposes both aggregate and per cache_salt byte counts.
4040
Buckets are keyed by ``ObjectKey.cache_salt`` directly — the salt may
4141
represent a user, a vLLM deployment, or any other isolation
4242
granularity the caller chooses; the adapter stays agnostic.
4343
4444
Instances are returned as immutable snapshots:
45-
``per_cache_salt_bytes`` is a read-only ``Mapping``
45+
``bytes_by_cache_salt`` is a read-only ``Mapping``
4646
(``MappingProxyType``) so callers cannot mutate the snapshot after
4747
the fact. Each ``get_usage()`` call returns a fresh snapshot so a
4848
held reference will never reflect later state.
@@ -55,8 +55,8 @@ class AdapterUsage:
5555
"""Adapter's maximum capacity. ``0`` means unknown / unlimited; the
5656
adapter does not support aggregate (global) usage-based eviction."""
5757

58-
per_cache_salt_bytes: Mapping[str, int] = field(
59-
default_factory=lambda: _EMPTY_PER_CACHE_SALT
58+
bytes_by_cache_salt: Mapping[str, int] = field(
59+
default_factory=lambda: _EMPTY_BY_CACHE_SALT
6060
)
6161
"""Bytes used per ``cache_salt``. Only entries with positive usage
6262
appear; an empty mapping means no traffic has been tracked yet.
@@ -131,11 +131,11 @@ def __init__(self, max_capacity_bytes: int = 0) -> None:
131131

132132
# Centralized byte accounting. Subclasses pass ``sizes`` to
133133
# ``_notify_keys_stored`` / ``_notify_keys_deleted`` and the base
134-
# class maintains both aggregate and per-cache_salt totals so
134+
# class maintains both aggregate and per cache_salt totals so
135135
# every adapter exposes the same shape via ``get_usage()``.
136136
self._max_capacity_bytes: int = max_capacity_bytes
137137
self._total_bytes_used: int = 0
138-
self._per_cache_salt_size_bytes: dict[str, int] = {}
138+
self._bytes_by_cache_salt: dict[str, int] = {}
139139
self._usage_lock = threading.Lock()
140140

141141
#####################
@@ -358,7 +358,7 @@ def _notify_keys_stored(self, keys: list[ObjectKey], sizes: list[int]) -> None:
358358
outside the lock so a slow listener cannot stall further notifies.
359359
"""
360360
# Aggregate per-salt deltas before touching
361-
# ``_per_cache_salt_size_bytes`` — one dict read/write per unique
361+
# ``_bytes_by_cache_salt`` — one dict read/write per unique
362362
# salt instead of one per key. This matters when the registry is
363363
# large (10k+ salts) and keys/sizes are bulky.
364364
delta: dict[str, int] = {}
@@ -370,8 +370,8 @@ def _notify_keys_stored(self, keys: list[ObjectKey], sizes: list[int]) -> None:
370370
with self._usage_lock:
371371
self._total_bytes_used += total_delta
372372
for salt, d in delta.items():
373-
self._per_cache_salt_size_bytes[salt] = (
374-
self._per_cache_salt_size_bytes.get(salt, 0) + d
373+
self._bytes_by_cache_salt[salt] = (
374+
self._bytes_by_cache_salt.get(salt, 0) + d
375375
)
376376
for listener in self._listeners:
377377
listener.on_l2_keys_stored(keys)
@@ -388,7 +388,7 @@ def _notify_keys_deleted(self, keys: list[ObjectKey], sizes: list[int]) -> None:
388388
the same value the adapter passed to ``_notify_keys_stored``).
389389
390390
Per-cache_salt buckets that drop to zero are removed so the
391-
``per_cache_salt_bytes`` snapshot in ``AdapterUsage`` stays compact.
391+
``bytes_by_cache_salt`` snapshot in ``AdapterUsage`` stays compact.
392392
393393
Counters are clamped at zero — a delete that would drive
394394
``_total_bytes_used`` negative indicates an accounting bug in the
@@ -420,11 +420,11 @@ def _notify_keys_deleted(self, keys: list[ObjectKey], sizes: list[int]) -> None:
420420
)
421421
self._total_bytes_used = 0
422422
for salt, d in delta.items():
423-
new_total = self._per_cache_salt_size_bytes.get(salt, 0) - d
423+
new_total = self._bytes_by_cache_salt.get(salt, 0) - d
424424
if new_total <= 0:
425-
self._per_cache_salt_size_bytes.pop(salt, None)
425+
self._bytes_by_cache_salt.pop(salt, None)
426426
else:
427-
self._per_cache_salt_size_bytes[salt] = new_total
427+
self._bytes_by_cache_salt[salt] = new_total
428428
for listener in self._listeners:
429429
listener.on_l2_keys_deleted(keys)
430430

@@ -484,15 +484,15 @@ def get_usage(self) -> AdapterUsage:
484484
"""
485485
with self._usage_lock:
486486
per_salt_snapshot = {
487-
k: v for k, v in self._per_cache_salt_size_bytes.items() if v > 0
487+
k: v for k, v in self._bytes_by_cache_salt.items() if v > 0
488488
}
489489
return AdapterUsage(
490490
total_bytes_used=self._total_bytes_used,
491491
total_capacity_bytes=self._max_capacity_bytes,
492492
# Wrap in a read-only view so callers can't mutate the
493493
# snapshot. The underlying dict is a fresh copy so the
494494
# view is fully detached from the adapter's live state.
495-
per_cache_salt_bytes=MappingProxyType(per_salt_snapshot),
495+
bytes_by_cache_salt=MappingProxyType(per_salt_snapshot),
496496
)
497497

498498
#####################

lmcache/v1/distributed/storage_controllers/eviction_controller.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def stop(self):
183183
self._thread.join()
184184

185185
def report_status(self) -> dict:
186-
# NOTE: ``usage.per_cache_salt_bytes`` is intentionally NOT
186+
# NOTE: ``usage.bytes_by_cache_salt`` is intentionally NOT
187187
# surfaced here. A deployment can have 10k+ salts, so embedding
188188
# the full bucket map in the status response would blow up the
189189
# payload. A separate paginated / queried endpoint is the right
@@ -199,7 +199,7 @@ def report_status(self) -> dict:
199199
"current_usage": usage.usage_fraction,
200200
"total_bytes_used": usage.total_bytes_used,
201201
"total_capacity_bytes": usage.total_capacity_bytes,
202-
"num_cache_salt_buckets": len(usage.per_cache_salt_bytes),
202+
"num_cache_salt_buckets": len(usage.bytes_by_cache_salt),
203203
}
204204
)
205205
return {

tests/v1/distributed/test_l2_adapter_base.py

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"""
33
Unit tests for the L2 adapter base class accounting + AdapterUsage.
44
5-
The base class owns ``_total_bytes_used`` / ``_per_cache_salt_size_bytes`` and
5+
The base class owns ``_total_bytes_used`` / ``_bytes_by_cache_salt`` and
66
exposes them through ``get_usage()``. Adapters drive accounting by passing
77
``sizes`` to ``_notify_keys_stored`` / ``_notify_keys_deleted``.
88
"""
@@ -89,9 +89,9 @@ def test_usage_fraction_negative_capacity_returns_minus_one(self):
8989
u = AdapterUsage(total_bytes_used=10, total_capacity_bytes=-1)
9090
assert u.usage_fraction == -1.0
9191

92-
def test_per_cache_salt_bytes_default_empty(self):
92+
def test_bytes_by_cache_salt_default_empty(self):
9393
u = AdapterUsage(total_bytes_used=0, total_capacity_bytes=100)
94-
assert u.per_cache_salt_bytes == {}
94+
assert u.bytes_by_cache_salt == {}
9595

9696
def test_frozen(self):
9797
# Standard
@@ -136,7 +136,7 @@ def test_store_increments_aggregate_and_per_user(self):
136136
a._notify_keys_stored([k_alice, k_bob, k_alice2], [100, 200, 50])
137137
u = a.get_usage()
138138
assert u.total_bytes_used == 350
139-
assert u.per_cache_salt_bytes == {"alice": 150, "bob": 200}
139+
assert u.bytes_by_cache_salt == {"alice": 150, "bob": 200}
140140

141141
def test_delete_decrements_aggregate_and_per_user(self):
142142
a = _StubAdapter(max_capacity_bytes=10_000)
@@ -146,7 +146,7 @@ def test_delete_decrements_aggregate_and_per_user(self):
146146
a._notify_keys_deleted([k_alice], [100])
147147
u = a.get_usage()
148148
assert u.total_bytes_used == 200
149-
assert u.per_cache_salt_bytes == {"bob": 200}
149+
assert u.bytes_by_cache_salt == {"bob": 200}
150150

151151
def test_per_user_bucket_dropped_when_zero(self):
152152
"""Per-user bookkeeping should not retain stale ``0`` entries — it
@@ -158,29 +158,29 @@ def test_per_user_bucket_dropped_when_zero(self):
158158
a._notify_keys_deleted([k], [100])
159159
u = a.get_usage()
160160
assert u.total_bytes_used == 0
161-
assert "alice" not in u.per_cache_salt_bytes
162-
assert u.per_cache_salt_bytes == {}
161+
assert "alice" not in u.bytes_by_cache_salt
162+
assert u.bytes_by_cache_salt == {}
163163

164164
def test_empty_salt_is_a_real_bucket(self):
165165
"""Un-salted traffic accumulates under the empty-string key.
166-
``per_cache_salt_bytes`` must reflect that so legacy/unisolated traffic
166+
``bytes_by_cache_salt`` must reflect that so legacy/unisolated traffic
167167
is observable too."""
168168
a = _StubAdapter(max_capacity_bytes=10_000)
169169
k = _make_key(1, salt="")
170170
a._notify_keys_stored([k], [100])
171171
u = a.get_usage()
172-
assert u.per_cache_salt_bytes == {"": 100}
172+
assert u.bytes_by_cache_salt == {"": 100}
173173

174174
def test_get_usage_filters_zero_buckets(self):
175175
"""Snapshot only includes positive-byte buckets."""
176176
a = _StubAdapter(max_capacity_bytes=10_000)
177177
# Manually inject a ``0`` entry as if accounting drift left one
178178
# behind.
179-
a._per_cache_salt_size_bytes["ghost"] = 0
180-
a._per_cache_salt_size_bytes["alice"] = 100
179+
a._bytes_by_cache_salt["ghost"] = 0
180+
a._bytes_by_cache_salt["alice"] = 100
181181
a._total_bytes_used = 100
182182
u = a.get_usage()
183-
assert u.per_cache_salt_bytes == {"alice": 100}
183+
assert u.bytes_by_cache_salt == {"alice": 100}
184184

185185
def test_size_list_length_mismatch_raises(self):
186186
"""``zip(strict=True)`` catches caller bugs where keys/sizes drift
@@ -204,17 +204,17 @@ def test_underflow_clamps_to_zero(self):
204204
assert u.usage_fraction == 0.0 # not the -1 sentinel
205205

206206
def test_get_usage_returns_immutable_per_user_view(self):
207-
"""``per_cache_salt_bytes`` is a read-only ``Mapping`` so a caller
207+
"""``bytes_by_cache_salt`` is a read-only ``Mapping`` so a caller
208208
cannot mutate the snapshot or the adapter's live state."""
209209
a = _StubAdapter(max_capacity_bytes=10_000)
210210
k = _make_key(1, salt="alice")
211211
a._notify_keys_stored([k], [100])
212212
u = a.get_usage()
213213
# MappingProxyType raises TypeError on mutation attempts.
214214
with pytest.raises(TypeError):
215-
u.per_cache_salt_bytes["bob"] = 999 # type: ignore[index]
215+
u.bytes_by_cache_salt["bob"] = 999 # type: ignore[index]
216216
# Original snapshot still intact.
217-
assert dict(u.per_cache_salt_bytes) == {"alice": 100}
217+
assert dict(u.bytes_by_cache_salt) == {"alice": 100}
218218

219219
def test_get_usage_snapshot_is_detached(self):
220220
"""A held ``AdapterUsage`` reference does not see later
@@ -226,11 +226,11 @@ def test_get_usage_snapshot_is_detached(self):
226226
a._notify_keys_stored([_make_key(2, salt="bob")], [200])
227227
# u1 is the snapshot at the earlier moment.
228228
assert u1.total_bytes_used == 100
229-
assert dict(u1.per_cache_salt_bytes) == {"alice": 100}
229+
assert dict(u1.bytes_by_cache_salt) == {"alice": 100}
230230
# Live state is now 300 / {alice, bob}.
231231
u2 = a.get_usage()
232232
assert u2.total_bytes_used == 300
233-
assert dict(u2.per_cache_salt_bytes) == {"alice": 100, "bob": 200}
233+
assert dict(u2.bytes_by_cache_salt) == {"alice": 100, "bob": 200}
234234

235235
def test_zero_size_notify_is_listener_only(self):
236236
"""Calling ``_notify_keys_stored`` with size=0 fires the listener
@@ -245,7 +245,7 @@ def test_zero_size_notify_is_listener_only(self):
245245
a._notify_keys_stored([k], [0])
246246
u = a.get_usage()
247247
assert u.total_bytes_used == 100
248-
assert dict(u.per_cache_salt_bytes) == {"alice": 100}
248+
assert dict(u.bytes_by_cache_salt) == {"alice": 100}
249249
# Listener fired twice (LRU policy can move_to_end on second).
250250
assert lst.stored == [[k], [k]]
251251

tests/v1/distributed/test_mock_l2_adapter.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ def test_get_usage_empty_adapter_is_zero(self, adapter):
708708
usage = adapter.get_usage()
709709
assert usage.total_bytes_used == 0
710710
assert usage.usage_fraction == 0.0
711-
assert usage.per_cache_salt_bytes == {}
711+
assert usage.bytes_by_cache_salt == {}
712712

713713
def test_get_usage_increases_after_store(self, adapter):
714714
"""get_usage() should report positive bytes after a store."""
@@ -734,7 +734,7 @@ def test_get_usage_decreases_after_delete(self, adapter):
734734
assert usage.total_bytes_used == 0
735735
assert usage.usage_fraction == 0.0
736736

737-
def test_per_cache_salt_bytes_populated_from_cache_salt(self, adapter):
737+
def test_bytes_by_cache_salt_populated_from_cache_salt(self, adapter):
738738
"""End-to-end: storing keys with different ``cache_salt`` values
739739
should drive the per-user byte buckets in ``AdapterUsage`` —
740740
proves the salt actually flows through the real adapter into the
@@ -761,22 +761,22 @@ def test_per_cache_salt_bytes_populated_from_cache_salt(self, adapter):
761761
_store_and_wait(adapter, k, obj)
762762

763763
usage = adapter.get_usage()
764-
assert usage.per_cache_salt_bytes == {"alice": 1024, "bob": 512}
764+
assert usage.bytes_by_cache_salt == {"alice": 1024, "bob": 512}
765765
assert usage.total_bytes_used == 1536
766766

767767
# Deleting one of alice's keys should shrink alice's bucket but
768768
# leave bob's untouched.
769769
adapter.delete([alice_keys[0]])
770770
usage = adapter.get_usage()
771-
assert usage.per_cache_salt_bytes == {"alice": 512, "bob": 512}
771+
assert usage.bytes_by_cache_salt == {"alice": 512, "bob": 512}
772772
assert usage.total_bytes_used == 1024
773773

774774
# Deleting alice's last key should drop the bucket entirely so
775775
# the snapshot stays compact.
776776
adapter.delete([alice_keys[1]])
777777
usage = adapter.get_usage()
778-
assert "alice" not in usage.per_cache_salt_bytes
779-
assert usage.per_cache_salt_bytes == {"bob": 512}
778+
assert "alice" not in usage.bytes_by_cache_salt
779+
assert usage.bytes_by_cache_salt == {"bob": 512}
780780

781781
def test_listener_notified_on_store(self, adapter):
782782
"""Listener.on_l2_keys_stored should be called after a store completes."""

0 commit comments

Comments
 (0)