@@ -69,7 +69,8 @@ RUN apt-get update -y && \
6969 tmux \
7070 vim \
7171 autoconf \
72- libtool
72+ libtool \
73+ net-tools
7374
7475# These headers are missing with the hpcx installer, required
7576# by UCX to find RDMA devices
@@ -120,12 +121,21 @@ WORKDIR /workspace
120121# Copy nixl source, and use commit hash as cache hint
121122COPY --from=nixl_base /opt/nixl /opt/nixl
122123COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
123- RUN cd /opt/nixl && \
124- mkdir build && \
125- meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
126- cd build/ && \
127- ninja && \
128- ninja install
124+ RUN if [ "$ARCH" = "arm64" ]; then \
125+ cd /opt/nixl && \
126+ mkdir build && \
127+ meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
128+ cd build/ && \
129+ ninja && \
130+ ninja install; \
131+ else \
132+ cd /opt/nixl && \
133+ mkdir build && \
134+ meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
135+ cd build/ && \
136+ ninja && \
137+ ninja install; \
138+ fi
129139
130140### NATS & ETCD SETUP ###
131141# nats
@@ -152,65 +162,37 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
152162ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
153163
154164# Install NIXL Python module
155- RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
165+ # TODO: Move gds_path selection based on arch into NIXL build
166+ RUN if [ "$ARCH" = "arm64" ]; then \
167+ cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
168+ --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
169+ else \
170+ cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
171+ fi
156172
157173# Install the wheel
158174# TODO: Move NIXL wheel install to the wheel_builder stage
159175RUN uv pip install /workspace/wheels/nixl/*.whl
160176
161- # Install patched vllm - keep this early in Dockerfile to avoid
177+ # Install vllm - keep this early in Dockerfile to avoid
162178# rebuilds from unrelated source code changes
163- ARG VLLM_REF="0.8.4"
164- ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
165- ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
166- ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post4"
167- ARG VLLM_MAX_JOBS=4
179+ ARG VLLM_REF="059d4cd"
180+ ENV CUDA_HOME=/usr/local/cuda
168181RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
169182 --mount=type=cache,target=/root/.cache/uv \
170- mkdir /tmp/vllm && \
171- uv pip install pip wheel && \
172- # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
173- if [ "$ARCH" = "arm64" ]; then \
174- # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
175- # NIXL has a torch dependency, so need to force-reinstall to install the correct version
176- uv pip install torch==2.7.0 torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu128 && \
177- # Download vLLM source with version matching patch
178- git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
179- cd /tmp/vllm/vllm-${VLLM_REF}/ && \
180- # Patch vLLM source with dynamo additions
181- patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
182- # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
183- # platform detection issues on ARM install.
184- # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
185- sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
186- # Remove pytorch from vllm install dependencies
187- python use_existing_torch.py && \
188- # Build/install vllm from source
189- uv pip install -r requirements/build.txt && \
190- # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
191- # significantly impact the overall build time. Each job can take up
192- # to -16GB RAM each, so tune according to available system memory.
193- MAX_JOBS=${VLLM_MAX_JOBS} uv pip install -vv . --no-build-isolation ; \
194- # Handle x86_64: Download wheel, unpack, setup for later steps
195- else \
196- python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
197- # Patch vLLM pre-built download with dynamo additions
198- cd /tmp/vllm && \
199- wheel unpack *.whl && \
200- cd vllm-${VLLM_REF}/ && \
201- patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
202- # Rename the package from vllm to ai_dynamo_vllm
203- mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
204- sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
205- sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
206- # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
207- sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
208- # Also update the tag in RECORD file to match
209- sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
210- mkdir -p /workspace/dist && \
211- wheel pack . --dest-dir /workspace/dist && \
212- uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
213- fi
183+ uv pip install pip cuda-python && \
184+ mkdir /opt/vllm && \
185+ cd /opt/vllm && \
186+ git clone https://github.com/vllm-project/vllm.git && \
187+ cd vllm && \
188+ git checkout $VLLM_REF && \
189+ VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
190+ cd tools/ep_kernels && \
191+ bash install_python_libraries.sh && \
192+ cd ep_kernels_workspace && \
193+ git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
194+ cd DeepGEMM && \
195+ python setup.py install
214196
215197# Common dependencies
216198RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
@@ -326,8 +308,6 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman
326308
327309RUN mkdir -p /home/$USERNAME/.cache/
328310
329- ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
330-
331311ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
332312
333313##################################
@@ -445,12 +425,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
445425 sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
446426 echo "cat ~/.launch_screen" >> ~/.bashrc
447427
448- # Tell vllm to use the Dynamo LLM C API for KV Cache Routing
449- ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
450-
451- ARG ARCH_ALT
452- ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
453- ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
428+ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
454429
455430########################################
456431########## Development Image ###########
@@ -486,7 +461,6 @@ COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
486461# Copy nats and etcd from base image
487462COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
488463COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
489- ENV PATH=/usr/local/bin/etcd/:$PATH
490464
491465# Copy UCX from base image as plugin for NIXL
492466# Copy NIXL source from base image (required for NIXL plugins)
@@ -505,32 +479,23 @@ RUN uv venv $VIRTUAL_ENV --python 3.12 && \
505479RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
506480 uv pip install --requirement /tmp/requirements.txt
507481
508- # Install test dependencies
509- #TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
510- RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
511- uv pip install --requirement /tmp/requirements.txt
512-
513- #TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
514- COPY . /workspace
515- RUN uv pip install /workspace/benchmarks
516-
517482# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
518483# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
519484#Copy NIXL and Dynamo wheels into wheelhouse
520485COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
521486COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
522- RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
487+ RUN uv pip install ai-dynamo --find-links wheelhouse && \
523488 uv pip install nixl --find-links wheelhouse && \
524- ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/
525-
526- # Tell vllm to use the Dynamo LLM C API for KV Cache Routing
527- ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
489+ ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
490+ rm -r wheelhouse
528491
529492# Copy launch banner
530493RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
531494 sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
532495 echo "cat ~/.launch_screen" >> ~/.bashrc
533496
497+ # Copy examples
498+ COPY ./examples examples/
534499
535- ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh" ]
500+ ENTRYPOINT [ "/usr/bin/bash" ]
536501CMD []
0 commit comments