Skip to content

Commit be1c955

Browse files
Clarit-AIclaude
andcommitted
fix(pr13): address CodeRabbit review findings
- Remove accidental .remember/tmp/save-session.pid from tracking - Add .remember/tmp/ to .gitignore - Fix undefined variables in bench_multiturn.py barrier mode - Add missing self.lora_path in WorkloadGenerator.__init__ - Zero padded tail in nemotron_h piecewise graph mode Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent c813876 commit be1c955

4 files changed

Lines changed: 11 additions & 4 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,5 +275,8 @@ sgl-kernel/csrc/**/*_musa/
275275
# VM migration context (local only, not for commit)
276276
docs/migration-prep/
277277

278+
# Runtime state files
279+
.remember/tmp/
280+
278281
# Beads / Dolt files (added by bd init)
279282
*.db

.remember/tmp/save-session.pid

Lines changed: 0 additions & 1 deletion
This file was deleted.

benchmark/hicache/bench_multiturn.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,7 @@ def __init__(self, args):
260260
}
261261
self.num_clients = args.num_clients
262262

263+
self.lora_path = args.lora_path
263264
self.num_rounds = args.num_rounds
264265
self.max_parallel = args.max_parallel
265266
self.output_length = args.output_length
@@ -309,6 +310,8 @@ async def request_loop():
309310

310311
def response_handler(self):
311312
next_round_reqs = []
313+
current_barrier_round = 0
314+
barrier_round_completed = 0
312315
while True:
313316
try:
314317
client_id, response = self.response_queue.get(
@@ -352,7 +355,7 @@ def response_handler(self):
352355
gen_payload(
353356
self.client_records[client_id]["history"],
354357
self.output_length,
355-
args.lora_path,
358+
self.lora_path,
356359
),
357360
)
358361
if self.enable_round_barrier:
@@ -368,10 +371,10 @@ def response_handler(self):
368371
# current barrier round have completed
369372
if (
370373
self.enable_round_barrier
371-
and current_barrier_round < self.max_rounds
374+
and current_barrier_round < self.num_rounds
372375
):
373376
barrier_round_completed += 1
374-
expected = self.clients_per_round[current_barrier_round]
377+
expected = self.num_clients
375378
if barrier_round_completed == expected:
376379
print(
377380
f"\n Barrier: round {current_barrier_round} complete "

python/sglang/srt/models/nemotron_h.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,4 +898,6 @@ def nemotron_mamba2_with_output(
898898

899899
# Copy result back; output may be larger (padded) so only fill actual tokens
900900
output[:num_actual_tokens].view(ret.shape).copy_(ret)
901+
if output.shape[0] > num_actual_tokens:
902+
output[num_actual_tokens:].zero_()
901903
return

0 commit comments

Comments
 (0)