@@ -31,6 +31,13 @@ class RouterArgs:
3131 host : str = "127.0.0.1"
3232 port : int = 30000
3333
34+ # PD-specific configuration
35+ pd_disaggregated : bool = False # Enable PD disaggregated mode
36+ prefill_urls : List [tuple ] = dataclasses .field (
37+ default_factory = list
38+ ) # List of (url, bootstrap_port)
39+ decode_urls : List [str ] = dataclasses .field (default_factory = list )
40+
3441 # Routing policy
3542 policy : str = "cache_aware"
3643 worker_startup_timeout_secs : int = 300
@@ -40,7 +47,7 @@ class RouterArgs:
4047 balance_rel_threshold : float = 1.0001
4148 eviction_interval : int = 60
4249 max_tree_size : int = 2 ** 24
43- max_payload_size : int = 4 * 1024 * 1024 # 4MB
50+ max_payload_size : int = 256 * 1024 * 1024 # 256MB default for large batches
4451 verbose : bool = False
4552 log_dir : Optional [str ] = None
4653 # Service discovery configuration
@@ -95,8 +102,29 @@ def add_cli_args(
95102 f"--{ prefix } policy" ,
96103 type = str ,
97104 default = RouterArgs .policy ,
98- choices = ["random" , "round_robin" , "cache_aware" ],
99- help = "Load balancing policy to use" ,
105+ choices = ["random" , "round_robin" , "cache_aware" , "power_of_two" ],
106+ help = "Load balancing policy to use. Note: power_of_two is only available in PD disaggregated mode" ,
107+ )
108+
109+ # PD-specific arguments
110+ parser .add_argument (
111+ f"--{ prefix } pd-disaggregated" ,
112+ action = "store_true" ,
113+ help = "Enable PD (Prefill-Decode) disaggregated mode" ,
114+ )
115+ parser .add_argument (
116+ f"--{ prefix } prefill" ,
117+ nargs = 2 ,
118+ action = "append" ,
119+ metavar = ("URL" , "BOOTSTRAP_PORT" ),
120+ help = "Prefill server URL and bootstrap port. Can be specified multiple times. BOOTSTRAP_PORT can be 'none' for no bootstrap port." ,
121+ )
122+ parser .add_argument (
123+ f"--{ prefix } decode" ,
124+ nargs = 1 ,
125+ action = "append" ,
126+ metavar = ("URL" ,),
127+ help = "Decode server URL. Can be specified multiple times." ,
100128 )
101129 parser .add_argument (
102130 f"--{ prefix } worker-startup-timeout-secs" ,
@@ -205,11 +233,19 @@ def from_cli_args(
205233 use_router_prefix: If True, look for arguments with 'router-' prefix
206234 """
207235 prefix = "router_" if use_router_prefix else ""
208- worker_urls = args .worker_urls if args .worker_urls is not None else []
236+ worker_urls = getattr (args , "worker_urls" , [])
237+
238+ # Parse PD URLs
239+ prefill_urls = cls ._parse_prefill_urls (getattr (args , f"{ prefix } prefill" , None ))
240+ decode_urls = cls ._parse_decode_urls (getattr (args , f"{ prefix } decode" , None ))
241+
209242 return cls (
210243 worker_urls = worker_urls ,
211244 host = args .host ,
212245 port = args .port ,
246+ pd_disaggregated = getattr (args , f"{ prefix } pd_disaggregated" , False ),
247+ prefill_urls = prefill_urls ,
248+ decode_urls = decode_urls ,
213249 policy = getattr (args , f"{ prefix } policy" ),
214250 worker_startup_timeout_secs = getattr (
215251 args , f"{ prefix } worker_startup_timeout_secs"
@@ -247,13 +283,54 @@ def _parse_selector(selector_list):
247283 selector [key ] = value
248284 return selector
249285
286+ @staticmethod
287+ def _parse_prefill_urls (prefill_list ):
288+ """Parse prefill URLs from --prefill arguments.
289+
290+ Format: --prefill URL BOOTSTRAP_PORT
291+ Example: --prefill http://prefill1:8080 9000 --prefill http://prefill2:8080 none
292+ """
293+ if not prefill_list :
294+ return []
295+
296+ prefill_urls = []
297+ for url , bootstrap_port_str in prefill_list :
298+ # Handle 'none' as None
299+ if bootstrap_port_str .lower () == "none" :
300+ bootstrap_port = None
301+ else :
302+ try :
303+ bootstrap_port = int (bootstrap_port_str )
304+ except ValueError :
305+ raise ValueError (
306+ f"Invalid bootstrap port: { bootstrap_port_str } . Must be a number or 'none'"
307+ )
308+
309+ prefill_urls .append ((url , bootstrap_port ))
310+
311+ return prefill_urls
312+
313+ @staticmethod
314+ def _parse_decode_urls (decode_list ):
315+ """Parse decode URLs from --decode arguments.
316+
317+ Format: --decode URL
318+ Example: --decode http://decode1:8081 --decode http://decode2:8081
319+ """
320+ if not decode_list :
321+ return []
322+
323+ # decode_list is a list of single-element lists due to nargs=1
324+ return [url [0 ] for url in decode_list ]
325+
250326
251327def policy_from_str (policy_str : str ) -> PolicyType :
252328 """Convert policy string to PolicyType enum."""
253329 policy_map = {
254330 "random" : PolicyType .Random ,
255331 "round_robin" : PolicyType .RoundRobin ,
256332 "cache_aware" : PolicyType .CacheAware ,
333+ "power_of_two" : PolicyType .PowerOfTwo ,
257334 }
258335 return policy_map [policy_str ]
259336
@@ -277,8 +354,19 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
277354 else :
278355 router_args = args
279356
357+ # Validate configuration based on mode
358+ if router_args .pd_disaggregated :
359+ # Validate PD configuration
360+ if not router_args .prefill_urls :
361+ raise ValueError ("PD disaggregated mode requires --prefill" )
362+ if not router_args .decode_urls :
363+ raise ValueError ("PD disaggregated mode requires --decode" )
364+
365+ # Create router with unified constructor
280366 router = Router (
281- worker_urls = router_args .worker_urls ,
367+ worker_urls = (
368+ router_args .worker_urls if not router_args .pd_disaggregated else []
369+ ),
282370 host = router_args .host ,
283371 port = router_args .port ,
284372 policy = policy_from_str (router_args .policy ),
@@ -298,6 +386,13 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
298386 service_discovery_namespace = router_args .service_discovery_namespace ,
299387 prometheus_port = router_args .prometheus_port ,
300388 prometheus_host = router_args .prometheus_host ,
389+ pd_disaggregated = router_args .pd_disaggregated ,
390+ prefill_urls = (
391+ router_args .prefill_urls if router_args .pd_disaggregated else None
392+ ),
393+ decode_urls = (
394+ router_args .decode_urls if router_args .pd_disaggregated else None
395+ ),
301396 )
302397
303398 router .start ()
@@ -326,8 +421,14 @@ def parse_router_args(args: List[str]) -> RouterArgs:
326421multi-node setups or when you want to start workers and router separately.
327422
328423Examples:
424+ # Regular mode
329425 python -m sglang_router.launch_router --worker-urls http://worker1:8000 http://worker2:8000
330- python -m sglang_router.launch_router --worker-urls http://worker1:8000 http://worker2:8000 --cache-threshold 0.7 --balance-abs-threshold 64 --balance-rel-threshold 1.2
426+
427+ # PD disaggregated mode
428+ python -m sglang_router.launch_router --pd-disaggregated \\
429+ --prefill http://prefill1:8000 9000 --prefill http://prefill2:8000 none \\
430+ --decode http://decode1:8001 --decode http://decode2:8001 \\
431+ --policy cache_aware
331432
332433 """ ,
333434 formatter_class = CustomHelpFormatter ,
0 commit comments