Skip to content

Commit ac19772

Browse files
Merge branch 'main' into feat/sglang-guided-decoding-support
2 parents a18b513 + 5841ded commit ac19772

14 files changed

Lines changed: 386 additions & 144 deletions

File tree

.github/labeler.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ documentation:
9494
- '**/*.md'
9595
- '**/*.rst'
9696

97+
xpu:
98+
- changed-files:
99+
- any-glob-to-any-file:
100+
- '**/xpu/**'
101+
- '**/*xpu*'
102+
97103
multimodal:
98104
- changed-files:
99105
- any-glob-to-any-file:

.github/workflows/generate-allure-report.yml

Lines changed: 5 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -94,15 +94,6 @@ jobs:
9494
run: |
9595
git fetch origin gh-pages:gh-pages 2>/dev/null || echo "No gh-pages branch yet"
9696
97-
# Restore history for Allure 3 report (primary, at allure/${SUBDIR}/)
98-
# Allure 3 uses history.jsonl (not history/*.json like Allure 2)
99-
if git show "gh-pages:allure/${SUBDIR}/history.jsonl" 2>/dev/null; then
100-
git show "gh-pages:allure/${SUBDIR}/history.jsonl" > allure-v3-history.jsonl
101-
echo "Restored Allure 3 history.jsonl from gh-pages/allure/${SUBDIR}"
102-
else
103-
echo "No previous Allure 3 history found"
104-
fi
105-
10697
# Restore history for Allure 2 report (at allure/v2/${SUBDIR}/)
10798
if git show "gh-pages:allure/v2/${SUBDIR}/history" 2>/dev/null; then
10899
mkdir -p allure-results/history
@@ -119,20 +110,6 @@ jobs:
119110
run: |
120111
./allure2-cli/bin/allure generate allure-results -o allure-report --clean
121112
122-
- name: Generate Allure 3 Report
123-
if: steps.check-results.outputs.has_results == 'true'
124-
run: |
125-
mkdir -p allure-v3-workspace/allure-results
126-
# Copy results but exclude Allure 2 history dir to avoid format confusion
127-
rsync -a --exclude='history' allure-results/ allure-v3-workspace/allure-results/
128-
# Restore Allure 3 history.jsonl into the workspace
129-
if [ -f allure-v3-history.jsonl ]; then
130-
cp allure-v3-history.jsonl allure-v3-workspace/history.jsonl
131-
fi
132-
# historyPath requires a config file
133-
echo 'export default { historyPath: "./history.jsonl" };' > allure-v3-workspace/allurerc.mjs
134-
npx allure generate --config="${GITHUB_WORKSPACE}/allure-v3-workspace/allurerc.mjs" --cwd allure-v3-workspace -o allure-report-v3
135-
136113
- name: Generate unified dashboard
137114
if: steps.check-results.outputs.has_results == 'true'
138115
env:
@@ -149,18 +126,18 @@ jobs:
149126
for dir in pr post-merge nightly release; do
150127
if [ "$dir" = "$SUBDIR" ]; then continue; fi
151128
if git show "gh-pages:dashboard-results/${dir}" 2>/dev/null; then
152-
mkdir -p "unified-workspace/allure-results/${dir}-imported"
153129
git archive gh-pages "dashboard-results/${dir}" | tar -x -C /tmp/
154-
cp -r "/tmp/dashboard-results/${dir}/"* "unified-workspace/allure-results/${dir}-imported/" 2>/dev/null || true
130+
cp -r "/tmp/dashboard-results/${dir}/"* "unified-workspace/allure-results/" 2>/dev/null || true
155131
fi
156132
done
157133
158-
# Restore unified dashboard history for trend charts
134+
# Restore unified dashboard history for trend charts (or seed empty file)
159135
if git show "gh-pages:allure/history.jsonl" 2>/dev/null; then
160136
git show "gh-pages:allure/history.jsonl" > unified-workspace/history.jsonl
161137
echo "Restored unified dashboard history from gh-pages"
162138
else
163139
echo "No previous unified dashboard history found"
140+
touch unified-workspace/history.jsonl
164141
fi
165142
166143
# Generate unified dashboard using allurerc.mjs
@@ -189,23 +166,6 @@ jobs:
189166
git checkout --orphan gh-pages
190167
fi
191168
192-
# One-time cleanup: remove old path layout from before allure/ prefix migration
193-
for old_dir in allure-all pre-merge pre-merge-v2 post-merge post-merge-v2 nightly nightly-v2 release release-v2; do
194-
if [ -d "$old_dir" ]; then
195-
echo "Removing legacy directory: $old_dir"
196-
rm -rf "$old_dir"
197-
fi
198-
done
199-
200-
# Deploy Allure 3 report (primary)
201-
mkdir -p "allure/${SUBDIR}"
202-
rm -rf "allure/${SUBDIR}/"*
203-
cp -r "${GITHUB_WORKSPACE}/allure-report-v3/"* "allure/${SUBDIR}/"
204-
# Persist Allure 3 history.jsonl for trend charts
205-
if [ -f "${GITHUB_WORKSPACE}/allure-v3-workspace/history.jsonl" ]; then
206-
cp "${GITHUB_WORKSPACE}/allure-v3-workspace/history.jsonl" "allure/${SUBDIR}/history.jsonl"
207-
fi
208-
209169
# Deploy Allure 2 report
210170
mkdir -p "allure/v2/${SUBDIR}"
211171
rm -rf "allure/v2/${SUBDIR}/"*
@@ -216,7 +176,7 @@ jobs:
216176
rm -rf "dashboard-results/${SUBDIR}/"*
217177
rsync -a --exclude='history' "${GITHUB_WORKSPACE}/allure-results/" "dashboard-results/${SUBDIR}/"
218178
219-
# Deploy unified dashboard (overwrites report files, preserves subdirs)
179+
# Deploy unified dashboard (per-workflow tabs generated by allurerc.mjs plugins)
220180
mkdir -p allure
221181
cp -r "${GITHUB_WORKSPACE}/allure-all-report/"* allure/
222182
# Persist history.jsonl for trend charts
@@ -248,6 +208,5 @@ jobs:
248208
run: |
249209
echo "## Allure Reports" >> $GITHUB_STEP_SUMMARY
250210
echo "" >> $GITHUB_STEP_SUMMARY
251-
echo "- [Allure Report](https://ai-dynamo.github.io/dynamo/allure/${SUBDIR}/)" >> $GITHUB_STEP_SUMMARY
211+
echo "- [Allure Dashboard](https://ai-dynamo.github.io/dynamo/allure/)" >> $GITHUB_STEP_SUMMARY
252212
echo "- [Allure 2 Report](https://ai-dynamo.github.io/dynamo/allure/v2/${SUBDIR}/)" >> $GITHUB_STEP_SUMMARY
253-
echo "- [Unified Dashboard](https://ai-dynamo.github.io/dynamo/allure/)" >> $GITHUB_STEP_SUMMARY

components/src/dynamo/common/protocols/image_protocol.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ class NvCreateImageRequest(BaseModel):
6565
moderation: Optional[str] = None
6666
"""Content moderation level: auto or low."""
6767

68+
input_reference: Optional[str] = None
69+
"""Optional image reference that guides generation (for I2I)."""
70+
6871
nvext: Optional[ImageNvExt] = None
6972
"""NVIDIA extensions."""
7073

components/src/dynamo/sglang/protocol.py

Lines changed: 9 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from sglang.srt.entrypoints.openai.protocol import ChatCompletionRequest
88

99
from dynamo.common.multimodal import TransferRequest
10+
from dynamo.common.protocols.image_protocol import ImageNvExt
1011

1112
TokenIdType = int
1213

@@ -143,18 +144,13 @@ class DisaggSglangMultimodalRequest(BaseModel):
143144
# ============================================================================
144145

145146

146-
class NvExt(BaseModel):
147-
"""NVIDIA extensions for image generation"""
148-
149-
negative_prompt: Optional[str] = None
150-
num_inference_steps: Optional[int] = 50
151-
guidance_scale: float = 7.5
152-
seed: Optional[int] = None
153-
annotations: Optional[list[str]] = None
154-
155-
156147
class CreateImageRequest(BaseModel):
157-
"""OpenAI /v1/images/generations compatible request"""
148+
"""OpenAI /v1/images/generations and /v1/images/edits compatible request.
149+
150+
Generation params (seed, guidance_scale, num_inference_steps, negative_prompt)
151+
are specified under ``nvext``. SGLang-specific defaults (guidance_scale=7.5,
152+
num_inference_steps=50) are applied in the handler, not the model.
153+
"""
158154

159155
prompt: str
160156
model: str # e.g. "stabilityai/stable-diffusion-3.5-medium"
@@ -163,9 +159,9 @@ class CreateImageRequest(BaseModel):
163159
quality: Optional[str] = "standard" # standard, hd
164160
response_format: Optional[str] = "url" # url or b64_json
165161
user: Optional[str] = None
162+
input_reference: Optional[str] = None # For I2I/TI2I - image path/url
166163

167-
# NVIDIA extensions nested under nvext
168-
nvext: Optional[NvExt] = None
164+
nvext: Optional[ImageNvExt] = None
169165

170166

171167
class ImageData(BaseModel):

components/src/dynamo/sglang/request_handlers/image_diffusion/image_diffusion_handler.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,19 @@
1414
from PIL import Image
1515

1616
from dynamo._core import Context
17+
from dynamo.common.protocols.image_protocol import ImageNvExt
1718
from dynamo.common.storage import upload_to_fs
1819
from dynamo.common.utils.otel_tracing import build_trace_headers
1920
from dynamo.sglang.args import Config
20-
from dynamo.sglang.protocol import CreateImageRequest, ImageData, ImagesResponse, NvExt
21+
from dynamo.sglang.protocol import CreateImageRequest, ImageData, ImagesResponse
2122
from dynamo.sglang.publisher import DynamoSglangPublisher
2223
from dynamo.sglang.request_handlers.handler_base import BaseGenerativeHandler
2324

2425
logger = logging.getLogger(__name__)
2526

2627
MAX_NUM_INFERENCE_STEPS = 50
28+
DEFAULT_NUM_INFERENCE_STEPS = 50
29+
DEFAULT_GUIDANCE_SCALE = 7.5
2730

2831

2932
class ImageDiffusionWorkerHandler(BaseGenerativeHandler):
@@ -92,11 +95,17 @@ async def generate(
9295
try:
9396
req = CreateImageRequest(**request)
9497

95-
# get extra parameters
96-
nvext = req.nvext or NvExt()
97-
nvext.num_inference_steps = min(
98-
nvext.num_inference_steps or 50, MAX_NUM_INFERENCE_STEPS
99-
)
98+
nvext = req.nvext or ImageNvExt()
99+
100+
# Apply SGLang-specific defaults for unset values
101+
raw_steps = nvext.num_inference_steps or DEFAULT_NUM_INFERENCE_STEPS
102+
if raw_steps > MAX_NUM_INFERENCE_STEPS:
103+
logger.warning(
104+
f"num_inference_steps={raw_steps} exceeds max "
105+
f"{MAX_NUM_INFERENCE_STEPS}, clamping"
106+
)
107+
num_inference_steps = min(raw_steps, MAX_NUM_INFERENCE_STEPS)
108+
guidance_scale = nvext.guidance_scale or DEFAULT_GUIDANCE_SCALE
100109

101110
width, height = self._parse_size(req.size)
102111

@@ -105,9 +114,10 @@ async def generate(
105114
negative_prompt=nvext.negative_prompt,
106115
width=width,
107116
height=height,
108-
num_inference_steps=nvext.num_inference_steps,
109-
guidance_scale=nvext.guidance_scale,
117+
num_inference_steps=num_inference_steps,
118+
guidance_scale=guidance_scale,
110119
seed=nvext.seed,
120+
input_reference=req.input_reference,
111121
)
112122

113123
context_id = context.id()
@@ -145,6 +155,7 @@ async def _generate_images(
145155
guidance_scale: float,
146156
seed: Optional[int],
147157
negative_prompt: Optional[str] = None,
158+
input_reference: Optional[str] = None,
148159
) -> list[bytes]:
149160
"""Generate images using SGLang DiffGenerator"""
150161
args = {
@@ -155,8 +166,15 @@ async def _generate_images(
155166
"num_inference_steps": num_inference_steps,
156167
"save_output": False, # We handle saving ourselves
157168
"guidance_scale": guidance_scale,
158-
"seed": seed if seed else random.randint(0, 1000000),
169+
"seed": seed if seed is not None else random.randint(0, 1000000),
159170
}
171+
172+
# Add image_path for I2I/TI2I if provided
173+
if input_reference is not None:
174+
if not input_reference.strip():
175+
raise ValueError("input_reference must be a non-empty string")
176+
args["image_path"] = input_reference
177+
160178
result = await asyncio.to_thread(
161179
self.generator.generate,
162180
sampling_params_kwargs=args,
@@ -175,7 +193,7 @@ async def _generate_images(
175193
for img in images:
176194
if isinstance(img, bytes):
177195
image_bytes_list.append(img)
178-
elif Image is not None and isinstance(img, Image.Image):
196+
elif isinstance(img, Image.Image):
179197
# Convert PIL Image to bytes
180198
buf = io.BytesIO()
181199
img.save(buf, format="PNG")

components/src/dynamo/sglang/tests/test_sglang_image_diffusion_handler.py

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import base64
77
import io
88
from types import SimpleNamespace
9-
from unittest.mock import MagicMock, Mock, patch
9+
from unittest.mock import AsyncMock, MagicMock, Mock, patch
1010

1111
import pytest
1212
from PIL import Image
@@ -347,7 +347,7 @@ async def test_generate_with_nvext(self, handler, mock_context):
347347
"""Test that nvext parameters are passed to the generator."""
348348
test_image = Image.new("RGB", (256, 256), color="yellow")
349349

350-
handler._generate_images = Mock(return_value=[test_image.tobytes()])
350+
handler._generate_images = AsyncMock(return_value=[test_image.tobytes()])
351351

352352
request = {
353353
"prompt": "A yellow square",
@@ -382,4 +382,59 @@ async def test_generate_with_nvext(self, handler, mock_context):
382382
guidance_scale=7.5,
383383
seed=42,
384384
negative_prompt="negative",
385+
input_reference=None,
385386
)
387+
388+
@pytest.mark.asyncio
389+
async def test_generate_i2i_passes_image_path(
390+
self, handler, mock_context, tmp_path
391+
):
392+
"""Test that input_reference is passed as image_path to the generator."""
393+
test_image = Image.new("RGB", (256, 256), color="green")
394+
395+
handler.generator.generate = Mock(
396+
return_value=SimpleNamespace(frames=[test_image])
397+
)
398+
399+
input_ref = str(tmp_path / "test_input.png")
400+
request = {
401+
"prompt": "Transform this image",
402+
"model": "test-model",
403+
"size": "256x256",
404+
"response_format": "b64_json",
405+
"input_reference": input_ref,
406+
}
407+
408+
results = []
409+
async for result in handler.generate(request, mock_context):
410+
results.append(result)
411+
412+
# Verify image_path was passed to the generator
413+
call_args = handler.generator.generate.call_args
414+
sampling_params = call_args[1]["sampling_params_kwargs"]
415+
assert sampling_params["image_path"] == input_ref
416+
417+
@pytest.mark.asyncio
418+
async def test_generate_t2i_no_image_path(self, handler, mock_context):
419+
"""Test that image_path is NOT passed when input_reference is absent."""
420+
test_image = Image.new("RGB", (256, 256), color="red")
421+
422+
handler.generator.generate = Mock(
423+
return_value=SimpleNamespace(frames=[test_image])
424+
)
425+
426+
request = {
427+
"prompt": "A red square",
428+
"model": "test-model",
429+
"size": "256x256",
430+
"response_format": "b64_json",
431+
}
432+
433+
results = []
434+
async for result in handler.generate(request, mock_context):
435+
results.append(result)
436+
437+
# Verify image_path was NOT passed
438+
call_args = handler.generator.generate.call_args
439+
sampling_params = call_args[1]["sampling_params_kwargs"]
440+
assert "image_path" not in sampling_params

deploy/operator/internal/controller/dynamographdeployment_controller.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,13 @@ func (r *DynamoGraphDeploymentReconciler) reconcileResources(ctx context.Context
326326
return ReconcileResult{}, fmt.Errorf("failed to reconcile EPP resources: %w", err)
327327
}
328328

329+
// Reconcile the wait-for-leader ConfigMap for multinode mp deployments
330+
err = r.reconcileWaitLeaderConfigMap(ctx, dynamoDeployment)
331+
if err != nil {
332+
logger.Error(err, "Failed to reconcile wait-leader ConfigMap")
333+
return ReconcileResult{}, fmt.Errorf("failed to reconcile wait-leader ConfigMap: %w", err)
334+
}
335+
329336
// Determine if any service is multinode
330337
hasMultinode := dynamoDeployment.HasAnyMultinodeService()
331338

@@ -1582,6 +1589,21 @@ func (r *DynamoGraphDeploymentReconciler) reconcileEPPResources(ctx context.Cont
15821589
return nil
15831590
}
15841591

1592+
// reconcileWaitLeaderConfigMap ensures the wait-for-leader Python script
1593+
// ConfigMap exists for multinode DGDs. The ConfigMap is only mounted by
1594+
// vLLM mp worker pods (via UpdatePodSpec); for other backends it is inert.
1595+
func (r *DynamoGraphDeploymentReconciler) reconcileWaitLeaderConfigMap(ctx context.Context, dgd *nvidiacomv1alpha1.DynamoGraphDeployment) error {
1596+
if !dgd.HasAnyMultinodeService() {
1597+
return nil
1598+
}
1599+
1600+
cm := dynamo.GenerateWaitLeaderConfigMap(dgd.Name, dgd.Namespace)
1601+
_, _, err := commoncontroller.SyncResource(ctx, r, dgd, func(ctx context.Context) (*corev1.ConfigMap, bool, error) {
1602+
return cm, false, nil
1603+
})
1604+
return err
1605+
}
1606+
15851607
func (r *DynamoGraphDeploymentReconciler) FinalizeResource(ctx context.Context, dynamoDeployment *nvidiacomv1alpha1.DynamoGraphDeployment) error {
15861608
// for now doing nothing
15871609
return nil

deploy/operator/internal/discovery/resource.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func GetK8sDiscoveryRole(dgdName string, namespace string) *rbacv1.Role {
5555
Rules: []rbacv1.PolicyRule{
5656
{
5757
APIGroups: []string{apiGroupCore},
58-
Resources: []string{"endpoints"},
58+
Resources: []string{"endpoints", "pods"},
5959
Verbs: []string{"get", "list", "watch"},
6060
},
6161
{

0 commit comments

Comments
 (0)