Skip to content

Commit d1e4179

Browse files
committed
fix(mp): correct store bookkeeping for cached requests in lmcache_mp_connector_0180
1 parent 755362a commit d1e4179

1 file changed

Lines changed: 21 additions & 5 deletions

File tree

lmcache/integration/vllm/lmcache_mp_connector_0180.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -292,13 +292,26 @@ def GetStoreMetadata(
292292
# Store the blocks that has block hashes
293293
# NOTE: the invariant here is that `num_stored_blocks` should
294294
# always be a multiple of `blocks_in_chunk`
295-
# TODO: This should be checked everytime we update the num_stored_blocks
295+
# TODO: This should be checked everytime we update
296+
# the num_stored_blocks
297+
#
298+
# Why computed_blocks includes num_lmcache_hit_blocks:
299+
#
300+
# Include lmcache-hit blocks so that the upper bound
301+
# matches num_stored_blocks (which already covers
302+
# them). Hit blocks are NOT re-stored.
303+
computed_blocks = (
304+
tracker.num_scheduled_tokens // vllm_block_size
305+
+ tracker.num_lmcache_hit_blocks
306+
)
296307
min_available_blocks = min(
297308
len(tracker.block_hashes),
298309
len(tracker.allocated_block_ids),
299-
tracker.num_scheduled_tokens // vllm_block_size,
310+
computed_blocks,
311+
)
312+
num_staging_blocks = (
313+
min_available_blocks - tracker.num_stored_blocks
300314
)
301-
num_staging_blocks = min_available_blocks - tracker.num_stored_blocks
302315
num_chunks = num_staging_blocks // blocks_in_chunk
303316

304317
if num_chunks >= 1:
@@ -996,8 +1009,11 @@ def _process_cached_requests(
9961009
if request_id not in cached_reqs.resumed_req_ids:
9971010
request_tracker.append_block_ids(new_block_ids)
9981011

999-
# Update new scheduled tokens
1000-
num_new_tokens = cached_reqs.num_computed_tokens[idx]
1012+
# Use the incremental num_scheduled_tokens to
1013+
# stay consistent with _process_new_requests.
1014+
num_new_tokens = (
1015+
scheduler_output.num_scheduled_tokens[request_id]
1016+
)
10011017
request_tracker.increase_num_scheduled_tokens(num_new_tokens)
10021018

10031019
r_meta = LMCacheMPRequestMetadata.GetStoreMetadata(

0 commit comments

Comments
 (0)