We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 966ae87 commit cfead25Copy full SHA for cfead25
1 file changed
python/sglang/srt/disaggregation/mooncake/conn.py
@@ -726,7 +726,9 @@ def _send_mamba_state_slice(
726
# Each prefill sends all its dims to the appropriate offset in decode
727
src_dim_start = 0
728
num_dims_to_send = src_dim
729
- dst_dim_start = local_tp_rank_in_group * src_dim
+ writers_per_decode = self.attn_tp_size // dst_attn_tp_size
730
+ local_writer_idx = local_tp_rank_in_group % writers_per_decode
731
+ dst_dim_start = local_writer_idx * src_dim
732
else:
733
# 1 prefill rank sends to multiple decode ranks
734
# Prefill sends a slice of its dims to each decode rank
0 commit comments