Skip to content

Commit 5f7aee7

Browse files
ch-wanclaude
andauthored
refactor(moe): de-duplicate triton MoE runner path into shared helpers (#23019)
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent fd7db0e commit 5f7aee7

322 files changed

Lines changed: 567 additions & 682 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

3rdparty/amd/tuning/benchmark_moe_rocm.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from tqdm import tqdm
1111
from transformers import AutoConfig
1212

13-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
13+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import (
1414
fused_moe,
1515
get_config_file_name,
1616
)

benchmark/kernels/fused_moe_triton/benchmark_sglang_fused_moe_triton.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
init_distributed_environment,
1313
initialize_model_parallel,
1414
)
15-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
16-
fused_moe as fused_moe_sglang,
17-
)
1815
from sglang.srt.layers.moe.fused_moe_triton.triton_kernels_moe import (
1916
triton_kernel_moe_forward,
2017
)
2118
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
19+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import (
20+
fused_moe as fused_moe_sglang,
21+
)
2222
from sglang.srt.layers.moe.topk import (
2323
TopK,
2424
TopKConfig,

benchmark/kernels/fused_moe_triton/benchmark_torch_compile_fused_moe.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from transformers import AutoConfig
88

99
from sglang.benchmark.bench_utils import run_bench
10-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
10+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import (
1111
fused_moe as fused_moe_triton,
1212
)
1313
from sglang.srt.model_executor.cuda_graph_runner import set_torch_compile_config

benchmark/kernels/fused_moe_triton/benchmark_vllm_vs_sglang_fused_moe_triton.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
init_distributed_environment,
1313
initialize_model_parallel,
1414
)
15-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
15+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import (
1616
fused_moe as fused_moe_sglang,
1717
)
1818

benchmark/kernels/fused_moe_triton/common_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33

44
import torch
55

6-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import get_config_dtype_str
7-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe_triton_config import (
6+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import get_config_dtype_str
7+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe_triton_config import (
88
get_config_file_name,
99
)
1010
from sglang.srt.utils import is_hip

benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,13 @@
2020
from ray.experimental.tqdm_ray import tqdm
2121

2222
from sglang.srt.layers.moe.fused_moe_triton import override_config
23-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_moe
24-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe_triton_config import (
23+
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
24+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import fused_moe
25+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe_triton_config import (
2526
get_config_dtype_str,
2627
get_default_config,
2728
get_moe_configs,
2829
)
29-
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
3030
from sglang.srt.layers.moe.topk import TopKConfig, select_experts
3131
from sglang.srt.server_args import (
3232
ServerArgs,

benchmark/kernels/fused_moe_triton/tuning_fused_moe_triton_sep.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,15 @@
2222
)
2323
from ray.experimental.tqdm_ray import tqdm
2424

25-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import (
25+
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
26+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe import (
2627
get_config_dtype_str,
2728
invoke_fused_moe_kernel,
2829
moe_align_block_size,
2930
)
30-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe_triton_config import (
31+
from sglang.srt.layers.moe.moe_runner.triton_utils.fused_moe_triton_config import (
3132
get_config_file_name,
3233
)
33-
from sglang.srt.layers.moe.moe_runner import MoeRunnerConfig
3434
from sglang.srt.layers.moe.topk import TopKConfig, select_experts
3535
from sglang.srt.server_args import (
3636
ServerArgs,

python/sglang/srt/layers/moe/fused_moe_triton/__init__.py

Lines changed: 6 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,16 @@
1-
from contextlib import contextmanager
2-
from typing import Any, Dict, Optional
3-
4-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe import fused_experts
5-
from sglang.srt.layers.moe.fused_moe_triton.fused_moe_triton_config import (
6-
get_config_file_name,
7-
try_get_optimal_moe_config,
8-
)
91
from sglang.srt.layers.moe.fused_moe_triton.layer import (
102
FusedMoE,
113
FusedMoeWeightScaleSupported,
124
)
13-
from sglang.srt.layers.moe.fused_moe_triton.moe_align_block_size import (
5+
from sglang.srt.layers.moe.moe_runner.triton_utils import (
6+
fused_experts,
7+
get_config,
8+
get_config_file_name,
149
moe_align_block_size,
10+
override_config,
11+
try_get_optimal_moe_config,
1512
)
1613

17-
_config: Optional[Dict[str, Any]] = None
18-
19-
20-
@contextmanager
21-
def override_config(config):
22-
global _config
23-
old_config = _config
24-
_config = config
25-
yield
26-
_config = old_config
27-
28-
29-
def get_config() -> Optional[Dict[str, Any]]:
30-
return _config
31-
32-
3314
__all__ = [
3415
"FusedMoE",
3516
"FusedMoeWeightScaleSupported",

0 commit comments

Comments
 (0)