ai-dynamo
diff --git a/‎container/Dockerfile.vllm‎
Lines changed: 47 additions & 82 deletions b/‎container/Dockerfile.vllm‎
Lines changed: 47 additions & 82 deletions
@@ -69,7 +69,8 @@ RUN apt-get update -y && \
     tmux \
     vim \
     autoconf \
-    libtool
+    libtool \
+    net-tools
 
 # These headers are missing with the hpcx installer, required
 # by UCX to find RDMA devices
@@ -120,12 +121,21 @@ WORKDIR /workspace
 # Copy nixl source, and use commit hash as cache hint
 COPY --from=nixl_base /opt/nixl /opt/nixl
 COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
-RUN cd /opt/nixl && \
-    mkdir build && \
-    meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
-    cd build/ && \
-    ninja && \
-    ninja install
+RUN if [ "$ARCH" = "arm64" ]; then \
+        cd /opt/nixl && \
+        mkdir build && \
+        meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
+        cd build/ && \
+        ninja && \
+        ninja install; \
+    else \
+        cd /opt/nixl && \
+        mkdir build && \
+        meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
+        cd build/ && \
+        ninja && \
+        ninja install; \
+    fi
 
 ### NATS & ETCD SETUP ###
 # nats
@@ -152,65 +162,37 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
 ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
 
 # Install NIXL Python module
-RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
+# TODO: Move gds_path selection based on arch into NIXL build
+RUN if [ "$ARCH" = "arm64" ]; then \
+        cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
+        --config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
+    else \
+        cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
+    fi
 
 # Install the wheel
 # TODO: Move NIXL wheel install to the wheel_builder stage
 RUN uv pip install /workspace/wheels/nixl/*.whl
 
-# Install patched vllm - keep this early in Dockerfile to avoid
+# Install vllm - keep this early in Dockerfile to avoid
 # rebuilds from unrelated source code changes
-ARG VLLM_REF="0.8.4"
-ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
-ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
-ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post4"
-ARG VLLM_MAX_JOBS=4
+ARG VLLM_REF="059d4cd"
+ENV CUDA_HOME=/usr/local/cuda
 RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
     --mount=type=cache,target=/root/.cache/uv \
-    mkdir /tmp/vllm && \
-    uv pip install pip wheel && \
-    # NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
-    if [ "$ARCH" = "arm64" ]; then \
-        # PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
-        # NIXL has a torch dependency, so need to force-reinstall to install the correct version
-        uv pip install torch==2.7.0 torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu128 && \
-        # Download vLLM source with version matching patch
-        git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
-        cd /tmp/vllm/vllm-${VLLM_REF}/ && \
-        # Patch vLLM source with dynamo additions
-        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
-        # WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
-        # platform detection issues on ARM install.
-        # TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
-        sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
-        # Remove pytorch from vllm install dependencies
-        python use_existing_torch.py && \
-        # Build/install vllm from source
-        uv pip install -r requirements/build.txt && \
-        # MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
-        # significantly impact the overall build time. Each job can take up
-        # to -16GB RAM each, so tune according to available system memory.
-        MAX_JOBS=${VLLM_MAX_JOBS} uv pip install -vv . --no-build-isolation ; \
-    # Handle x86_64: Download wheel, unpack, setup for later steps
-    else \
-        python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
-        # Patch vLLM pre-built download with dynamo additions
-        cd /tmp/vllm && \
-        wheel unpack *.whl && \
-        cd vllm-${VLLM_REF}/ && \
-        patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
-        # Rename the package from vllm to ai_dynamo_vllm
-        mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
-        sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
-        sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
-        # Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
-        sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
-        # Also update the tag in RECORD file to match
-        sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
-        mkdir -p /workspace/dist && \
-        wheel pack . --dest-dir /workspace/dist && \
-        uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
-    fi
+    uv pip install pip cuda-python && \
+    mkdir /opt/vllm && \
+    cd /opt/vllm && \
+    git clone https://github.com/vllm-project/vllm.git && \
+    cd vllm && \
+    git checkout $VLLM_REF && \
+    VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
+    cd tools/ep_kernels && \
+    bash install_python_libraries.sh && \
+    cd ep_kernels_workspace && \
+    git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
+    cd DeepGEMM && \
+    python setup.py install
 
 # Common dependencies
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
@@ -326,8 +308,6 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman
 
 RUN mkdir -p /home/$USERNAME/.cache/
 
-ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
-
 ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
 
 ##################################
@@ -445,12 +425,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
     sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
     echo "cat ~/.launch_screen" >> ~/.bashrc
 
-# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
-ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
-
-ARG ARCH_ALT
-ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
-ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
 
 ########################################
 ########## Development Image ###########
@@ -486,7 +461,6 @@ COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
 # Copy nats and etcd from base image
 COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
 COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
-ENV PATH=/usr/local/bin/etcd/:$PATH
 
 # Copy UCX from base image as plugin for NIXL
 # Copy NIXL source from base image (required for NIXL plugins)
@@ -505,32 +479,23 @@ RUN uv venv $VIRTUAL_ENV --python 3.12 && \
 RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
     uv pip install --requirement /tmp/requirements.txt
 
-# Install test dependencies
-#TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
-RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
-    uv pip install --requirement /tmp/requirements.txt
-
-#TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
-COPY . /workspace
-RUN uv pip install /workspace/benchmarks
-
 # Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
 # Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
 #Copy NIXL and Dynamo wheels into wheelhouse
 COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
 COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
-RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
+RUN uv pip install ai-dynamo --find-links wheelhouse && \
     uv pip install nixl --find-links wheelhouse && \
-    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/
-
-# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
-ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
+    ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
+    rm -r wheelhouse
 
 # Copy launch banner
 RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
     sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
     echo "cat ~/.launch_screen" >> ~/.bashrc
 
+# Copy examples
+COPY ./examples examples/
 
-ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
+ENTRYPOINT [ "/usr/bin/bash" ]
 CMD []