Skip to content

Commit 2224cb1

Browse files
committed
sglang: bump to 0.5.11 and prune 0.5.9 fallbacks in _compat
All 9 testable launch scripts in examples/backends/sglang/launch/ pass on 0.5.11 with no handler/init code changes. (disagg_router needs 4 GPUs and multimodal_disagg needs 3 GPUs; both skipped on the test box's 2x L40S.) The new support window is N=0.5.11, N-1=0.5.10, so the 0.5.9-targeted branches in _compat.py come out as part of this bump per the N/N-1 policy in components/src/dynamo/sglang/CLAUDE.md. Removed: * `NetworkAddress` polyfill class + `try/except` around its import (`sglang.srt.utils.network` is canonical from 0.5.10 onward). * `mm_encode()` wrapper. Both 0.5.10 and 0.5.11 take `_encode(mm_items, modality)` and return the same 3-tuple, so the call sites in `encode_worker_handler.py` now invoke `await self.encoder._encode(...)` directly. * `enable_disjoint_streaming_output` `stream_output` fallback. The field has been `incremental_streaming_output` since at-or-before 0.5.10 (verified: `ServerArgs.__dataclass_fields__` in 0.5.11 has `incremental_streaming_output` and not `stream_output`). The wrapper itself stays — diffusion `SimpleNamespace` stubs need the attribute-absent no-op path. Kept (not version-bound): * `ensure_sglang_top_level_exports()`, `filter_supported_async_generate_kwargs`, `get_scheduler_info` (the latter still probes fork/experimental attribute paths). CLAUDE.md example version bumped to reflect 0.5.10 as the new N-1.
1 parent 26af597 commit 2224cb1

4 files changed

Lines changed: 15 additions & 147 deletions

File tree

components/src/dynamo/sglang/CLAUDE.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ support the current version plus 1 version back (N and N-1). The pattern:
2020
enough surface area to cover what Dynamo actually calls.
2121
4. Each fallback branch in `_compat.py` MUST have a comment noting which SGLang
2222
version it supports and when it can be removed, e.g.:
23-
`# Fallback for sglang <= 0.5.9. Remove when min supported version is 0.6.0+`
23+
`# Fallback for sglang <= 0.5.10. Remove when min supported version is 0.5.12+`
2424
5. When a new SGLang version is released and the old N-1 falls outside the support
2525
window, delete the corresponding fallback branches and polyfills from `_compat.py`.
2626
If `_compat.py` becomes trivial re-exports, inline the imports and delete the file.
@@ -341,7 +341,7 @@ Checklist for adding a new worker (e.g., a new modality or serving mode):
341341

342342
```
343343
sglang/
344-
_compat.py # SGLang version compat shim (network imports + NetworkAddress polyfill)
344+
_compat.py # SGLang version compat shim (signature probing for async_generate kwargs)
345345
__main__.py # Entry point
346346
main.py # Worker dispatch
347347
args.py # Config parsing (ServerArgs vs SimpleNamespace)

components/src/dynamo/sglang/_compat.py

Lines changed: 9 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,16 @@
1616
"""
1717

1818
import inspect
19-
import ipaddress
2019
import logging
21-
import socket
2220
from functools import lru_cache
2321
from typing import Any
2422

23+
from sglang.srt.utils.network import ( # noqa: F401 — re-exported for callers
24+
NetworkAddress,
25+
get_local_ip_auto,
26+
get_zmq_socket,
27+
)
28+
2529
logger = logging.getLogger(__name__)
2630

2731

@@ -105,117 +109,6 @@ def filter_supported_async_generate_kwargs(
105109
return {key: value for key, value in kwargs.items() if key in supported_kwarg_names}
106110

107111

108-
# ---------------------------------------------------------------------------
109-
# Network utilities: NetworkAddress, get_local_ip_auto, get_zmq_socket
110-
#
111-
# 0.5.10+: sglang.srt.utils.network (canonical)
112-
# 0.5.9: sglang.srt.utils (get_local_ip_auto, get_zmq_socket only;
113-
# NetworkAddress did not exist)
114-
# ---------------------------------------------------------------------------
115-
try:
116-
from sglang.srt.utils.network import ( # noqa: F401
117-
NetworkAddress,
118-
get_local_ip_auto,
119-
get_zmq_socket,
120-
)
121-
except ImportError:
122-
# Fallback for sglang 0.5.9. Remove when min supported version is 0.5.10+
123-
from sglang.srt.utils import ( # type: ignore[no-redef] # noqa: F401
124-
get_local_ip_auto,
125-
get_zmq_socket,
126-
)
127-
128-
logger.info(
129-
"sglang.srt.utils.network not found (sglang 0.5.9); "
130-
"using compatibility shim for NetworkAddress"
131-
)
132-
133-
class NetworkAddress: # type: ignore[no-redef]
134-
"""Minimal polyfill for sglang.srt.utils.network.NetworkAddress."""
135-
136-
def __init__(self, host: str, port: int) -> None:
137-
self.host = host
138-
self.port = port
139-
140-
@property
141-
def is_ipv6(self) -> bool:
142-
try:
143-
ipaddress.IPv6Address(self.host)
144-
return True
145-
except ValueError:
146-
return False
147-
148-
@classmethod
149-
def parse(cls, addr: str) -> "NetworkAddress":
150-
"""Parse 'host:port', '[IPv6]:port', or bare host."""
151-
addr = addr.strip()
152-
if addr.startswith("["):
153-
end = addr.find("]")
154-
host = addr[1:end] if end != -1 else addr.strip("[]")
155-
rest = addr[end + 1 :] if end != -1 else ""
156-
if rest.startswith(":") and rest[1:].isdigit():
157-
return cls(host, int(rest[1:]))
158-
return cls(host, 0)
159-
if addr.count(":") == 1:
160-
host_part, port_part = addr.rsplit(":", 1)
161-
if port_part.isdigit():
162-
return cls(host_part, int(port_part))
163-
return cls(addr, 0)
164-
165-
def resolved(self) -> "NetworkAddress":
166-
"""DNS-resolve the host, preserving port."""
167-
try:
168-
infos = socket.getaddrinfo(
169-
self.host, None, family=socket.AF_UNSPEC, type=socket.SOCK_STREAM
170-
)
171-
resolved_ip = infos[0][4][0]
172-
return NetworkAddress(resolved_ip, self.port)
173-
except socket.gaierror:
174-
return self
175-
176-
def to_host_port_str(self) -> str:
177-
"""Return '[IPv6]:port' or 'host:port'."""
178-
if self.is_ipv6:
179-
return f"[{self.host}]:{self.port}"
180-
return f"{self.host}:{self.port}"
181-
182-
def to_tcp(self) -> str:
183-
"""Return 'tcp://[IPv6]:port' or 'tcp://host:port'."""
184-
if self.is_ipv6:
185-
return f"tcp://[{self.host}]:{self.port}"
186-
return f"tcp://{self.host}:{self.port}"
187-
188-
189-
# ---------------------------------------------------------------------------
190-
# MMEncoder._encode() adapter
191-
#
192-
# 0.5.10+: _encode(mm_items, modality) -> (grid_dim, embedding, aux_data)
193-
# 0.5.9: _encode(mm_items) -> (grid_dim, embedding)
194-
#
195-
# Imports are deferred to avoid pulling sgl_kernel (CUDA-only) at module
196-
# level, which breaks test collection on arm64 CPU-only CI nodes.
197-
# ---------------------------------------------------------------------------
198-
199-
200-
async def mm_encode(encoder: Any, mm_items: Any, modality: Any) -> tuple:
201-
"""Version-safe wrapper around MMEncoder._encode().
202-
203-
Always returns (grid_dim, embedding, aux_data). On sglang 0.5.9
204-
_encode takes no modality arg and returns a 2-tuple; on 0.5.10+ it
205-
takes modality and returns a 3-tuple. We try the new signature first
206-
and fall back to the old one.
207-
"""
208-
try:
209-
result = await encoder._encode(mm_items, modality)
210-
except TypeError:
211-
# sglang 0.5.9: _encode(mm_items) -> (grid_dim, embedding)
212-
result = await encoder._encode(mm_items)
213-
214-
if len(result) == 2:
215-
return (*result, None)
216-
return result
217-
218-
219112
def get_scheduler_info(engine: Any) -> dict:
220113
"""Return the scheduler-info dict for rank-0 of an ``sgl.Engine``.
221114
@@ -249,36 +142,13 @@ def get_scheduler_info(engine: Any) -> dict:
249142

250143

251144
def enable_disjoint_streaming_output(server_args: Any) -> None:
252-
"""
253-
Enable SGLang's disjoint streaming output across ServerArgs field renames.
145+
"""Enable SGLang's disjoint streaming output.
254146
255-
Covers sglang <= 0.5.x (`stream_output`) and newer releases
256-
(`incremental_streaming_output`).
147+
Diffusion workers pass a ``SimpleNamespace`` stub that does not carry the
148+
field, so this is a no-op when the attribute is absent.
257149
"""
258-
fields = getattr(type(server_args), "__dataclass_fields__", None)
259-
if isinstance(fields, dict):
260-
if "incremental_streaming_output" in fields:
261-
server_args.incremental_streaming_output = True
262-
return
263-
if "stream_output" in fields:
264-
server_args.stream_output = True
265-
return
266-
raise AttributeError(
267-
"SGLang ServerArgs has neither 'incremental_streaming_output' nor "
268-
"'stream_output'"
269-
)
270-
271150
if hasattr(server_args, "incremental_streaming_output"):
272151
server_args.incremental_streaming_output = True
273-
return
274-
if hasattr(server_args, "stream_output"):
275-
server_args.stream_output = True
276-
return
277-
278-
logger.debug(
279-
"Skipping streaming output compatibility for non-ServerArgs object: %s",
280-
type(server_args).__name__,
281-
)
282152

283153

284154
__all__ = [
@@ -289,5 +159,4 @@ def enable_disjoint_streaming_output(server_args: Any) -> None:
289159
"get_local_ip_auto",
290160
"get_scheduler_info",
291161
"get_zmq_socket",
292-
"mm_encode",
293162
]

components/src/dynamo/sglang/request_handlers/multimodal/encode_worker_handler.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@
2626
)
2727
from dynamo.common.multimodal import EMBEDDING_SENDER_FACTORIES
2828
from dynamo.common.utils import nvtx_utils as _nvtx
29-
from dynamo.sglang._compat import mm_encode
3029
from dynamo.sglang.args import Config
3130
from dynamo.sglang.protocol import (
3231
MultiModalGroup,
@@ -218,8 +217,8 @@ async def _encode_with_cache(
218217
# SGLang's _encode outputs are already on CPU; use CPU as target for consistency
219218
target_device = torch.device("cpu")
220219
if uncached_urls:
221-
grid_dim, new_embeddings, _aux = await mm_encode(
222-
self.encoder, uncached_urls, Modality.IMAGE
220+
grid_dim, new_embeddings, _aux = await self.encoder._encode(
221+
uncached_urls, Modality.IMAGE
223222
)
224223
# Verify SGLang output is on CPU as expected
225224
if new_embeddings.device != target_device:
@@ -344,7 +343,7 @@ async def generate(
344343
image_grid_dim,
345344
precomputed_embeddings,
346345
_aux,
347-
) = await mm_encode(self.encoder, image_urls, Modality.IMAGE)
346+
) = await self.encoder._encode(image_urls, Modality.IMAGE)
348347

349348
image_grid_thw_list = (
350349
image_grid_dim.tolist()

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ vllm = [
6464

6565
sglang = [
6666
"uvloop",
67-
"sglang[diffusion]==0.5.10.post1",
67+
"sglang[diffusion]==0.5.11",
6868
"nixl[cu12]>=1.0.0,<1.1.0",
6969
"cupy-cuda12x>=13.0.0",
7070
]

0 commit comments

Comments
 (0)