Skip to content

Commit 9bfa8da

Browse files
committed
refactor: replace vllm with vllm_v1 container version
1 parent a9e0891 commit 9bfa8da

6 files changed

Lines changed: 54 additions & 595 deletions

File tree

container/Dockerfile.vllm

Lines changed: 47 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,8 @@ RUN apt-get update -y && \
6969
tmux \
7070
vim \
7171
autoconf \
72-
libtool
72+
libtool \
73+
net-tools
7374

7475
# These headers are missing with the hpcx installer, required
7576
# by UCX to find RDMA devices
@@ -120,12 +121,21 @@ WORKDIR /workspace
120121
# Copy nixl source, and use commit hash as cache hint
121122
COPY --from=nixl_base /opt/nixl /opt/nixl
122123
COPY --from=nixl_base /opt/nixl/commit.txt /opt/nixl/commit.txt
123-
RUN cd /opt/nixl && \
124-
mkdir build && \
125-
meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
126-
cd build/ && \
127-
ninja && \
128-
ninja install
124+
RUN if [ "$ARCH" = "arm64" ]; then \
125+
cd /opt/nixl && \
126+
mkdir build && \
127+
meson setup build/ --buildtype=release --prefix=/usr/local/nixl -Dgds_path=/usr/local/cuda/targets/sbsa-linux && \
128+
cd build/ && \
129+
ninja && \
130+
ninja install; \
131+
else \
132+
cd /opt/nixl && \
133+
mkdir build && \
134+
meson setup build/ --buildtype=release --prefix=/usr/local/nixl && \
135+
cd build/ && \
136+
ninja && \
137+
ninja install; \
138+
fi
129139

130140
### NATS & ETCD SETUP ###
131141
# nats
@@ -152,65 +162,37 @@ ENV VIRTUAL_ENV=/opt/dynamo/venv
152162
ENV PATH="${VIRTUAL_ENV}/bin:${PATH}"
153163

154164
# Install NIXL Python module
155-
RUN cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl
165+
# TODO: Move gds_path selection based on arch into NIXL build
166+
RUN if [ "$ARCH" = "arm64" ]; then \
167+
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl \
168+
--config-settings=setup-args="-Dgds_path=/usr/local/cuda/targets/sbsa-linux"; \
169+
else \
170+
cd /opt/nixl && uv build . --out-dir /workspace/wheels/nixl; \
171+
fi
156172

157173
# Install the wheel
158174
# TODO: Move NIXL wheel install to the wheel_builder stage
159175
RUN uv pip install /workspace/wheels/nixl/*.whl
160176

161-
# Install patched vllm - keep this early in Dockerfile to avoid
177+
# Install vllm - keep this early in Dockerfile to avoid
162178
# rebuilds from unrelated source code changes
163-
ARG VLLM_REF="0.8.4"
164-
ARG VLLM_PATCH="vllm_v${VLLM_REF}-dynamo-kv-disagg-patch.patch"
165-
ARG VLLM_PATCHED_PACKAGE_NAME="ai_dynamo_vllm"
166-
ARG VLLM_PATCHED_PACKAGE_VERSION="0.8.4.post4"
167-
ARG VLLM_MAX_JOBS=4
179+
ARG VLLM_REF="059d4cd"
180+
ENV CUDA_HOME=/usr/local/cuda
168181
RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
169182
--mount=type=cache,target=/root/.cache/uv \
170-
mkdir /tmp/vllm && \
171-
uv pip install pip wheel && \
172-
# NOTE: vLLM build from source on ARM can take several hours, see VLLM_MAX_JOBS details.
173-
if [ "$ARCH" = "arm64" ]; then \
174-
# PyTorch 2.7 supports CUDA 12.8 and aarch64 installs
175-
# NIXL has a torch dependency, so need to force-reinstall to install the correct version
176-
uv pip install torch==2.7.0 torchvision torchaudio --force-reinstall --index-url https://download.pytorch.org/whl/cu128 && \
177-
# Download vLLM source with version matching patch
178-
git clone --branch v${VLLM_REF} --depth 1 https://github.com/vllm-project/vllm.git /tmp/vllm/vllm-${VLLM_REF} && \
179-
cd /tmp/vllm/vllm-${VLLM_REF}/ && \
180-
# Patch vLLM source with dynamo additions
181-
patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
182-
# WAR: Set package version check to 'vllm' instead of 'ai_dynamo_vllm' to avoid
183-
# platform detection issues on ARM install.
184-
# TODO: Rename package from vllm to ai_dynamo_vllm like x86 path below to remove this WAR.
185-
sed -i 's/version("ai_dynamo_vllm")/version("vllm")/g' vllm/platforms/__init__.py && \
186-
# Remove pytorch from vllm install dependencies
187-
python use_existing_torch.py && \
188-
# Build/install vllm from source
189-
uv pip install -r requirements/build.txt && \
190-
# MAX_JOBS set to avoid running OOM on vllm-flash-attn build, this can
191-
# significantly impact the overall build time. Each job can take up
192-
# to -16GB RAM each, so tune according to available system memory.
193-
MAX_JOBS=${VLLM_MAX_JOBS} uv pip install -vv . --no-build-isolation ; \
194-
# Handle x86_64: Download wheel, unpack, setup for later steps
195-
else \
196-
python -m pip download --only-binary=:all: --no-deps --dest /tmp/vllm vllm==v${VLLM_REF} && \
197-
# Patch vLLM pre-built download with dynamo additions
198-
cd /tmp/vllm && \
199-
wheel unpack *.whl && \
200-
cd vllm-${VLLM_REF}/ && \
201-
patch -p1 < /tmp/deps/vllm/${VLLM_PATCH} && \
202-
# Rename the package from vllm to ai_dynamo_vllm
203-
mv vllm-${VLLM_REF}.dist-info ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info && \
204-
sed -i "s/^Name: vllm/Name: ${VLLM_PATCHED_PACKAGE_NAME}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
205-
sed -i "s/^Version: ${VLLM_REF}/Version: ${VLLM_PATCHED_PACKAGE_VERSION}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/METADATA && \
206-
# Update wheel tag from linux_${ARCH_ALT} to manylinux1_${ARCH_ALT} in WHEEL file
207-
sed -i "s/Tag: cp38-abi3-linux_${ARCH_ALT}/Tag: cp38-abi3-manylinux1_${ARCH_ALT}/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/WHEEL && \
208-
# Also update the tag in RECORD file to match
209-
sed -i "s/-cp38-abi3-linux_${ARCH_ALT}.whl/-cp38-abi3-manylinux1_${ARCH_ALT}.whl/g" ${VLLM_PATCHED_PACKAGE_NAME}-${VLLM_PATCHED_PACKAGE_VERSION}.dist-info/RECORD && \
210-
mkdir -p /workspace/dist && \
211-
wheel pack . --dest-dir /workspace/dist && \
212-
uv pip install /workspace/dist/${VLLM_PATCHED_PACKAGE_NAME}-*.whl ; \
213-
fi
183+
uv pip install pip cuda-python && \
184+
mkdir /opt/vllm && \
185+
cd /opt/vllm && \
186+
git clone https://github.com/vllm-project/vllm.git && \
187+
cd vllm && \
188+
git checkout $VLLM_REF && \
189+
VLLM_USE_PRECOMPILED=1 uv pip install -e . && \
190+
cd tools/ep_kernels && \
191+
bash install_python_libraries.sh && \
192+
cd ep_kernels_workspace && \
193+
git clone --recursive https://github.com/deepseek-ai/DeepGEMM.git && \
194+
cd DeepGEMM && \
195+
python setup.py install
214196

215197
# Common dependencies
216198
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
@@ -326,8 +308,6 @@ RUN SNIPPET="export PROMPT_COMMAND='history -a' && export HISTFILE=$HOME/.comman
326308

327309
RUN mkdir -p /home/$USERNAME/.cache/
328310

329-
ENV VLLM_KV_CAPI_PATH=$HOME/dynamo/.build/target/debug/libdynamo_llm_capi.so
330-
331311
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
332312

333313
##################################
@@ -445,12 +425,7 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
445425
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
446426
echo "cat ~/.launch_screen" >> ~/.bashrc
447427

448-
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
449-
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
450-
451-
ARG ARCH_ALT
452-
ENV NIXL_PLUGIN_DIR=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins
453-
ENV LD_LIBRARY_PATH=/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu:/usr/local/nixl/lib/${ARCH_ALT}-linux-gnu/plugins:/usr/local/ucx/lib:$LD_LIBRARY_PATH
428+
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
454429

455430
########################################
456431
########## Development Image ###########
@@ -486,7 +461,6 @@ COPY --from=ci_minimum /opt/dynamo/bindings /opt/dynamo/bindings
486461
# Copy nats and etcd from base image
487462
COPY --from=base /usr/bin/nats-server /usr/bin/nats-server
488463
COPY --from=base /usr/local/bin/etcd/ /usr/local/bin/etcd/
489-
ENV PATH=/usr/local/bin/etcd/:$PATH
490464

491465
# Copy UCX from base image as plugin for NIXL
492466
# Copy NIXL source from base image (required for NIXL plugins)
@@ -505,32 +479,23 @@ RUN uv venv $VIRTUAL_ENV --python 3.12 && \
505479
RUN --mount=type=bind,source=./container/deps/requirements.txt,target=/tmp/requirements.txt \
506480
uv pip install --requirement /tmp/requirements.txt
507481

508-
# Install test dependencies
509-
#TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
510-
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
511-
uv pip install --requirement /tmp/requirements.txt
512-
513-
#TODO: Remove this once we have a functional ci_minimum image built on top of the runtime image
514-
COPY . /workspace
515-
RUN uv pip install /workspace/benchmarks
516-
517482
# Install the wheels and symlink executables to /usr/local/bin so dynamo components can use them
518483
# Dynamo components currently do not have the VIRTUAL_ENV in their PATH, so we need to symlink the executables
519484
#Copy NIXL and Dynamo wheels into wheelhouse
520485
COPY --from=base /workspace/wheels/nixl/*.whl wheelhouse/
521486
COPY --from=wheel_builder /workspace/dist/*.whl wheelhouse/
522-
RUN uv pip install ai-dynamo[vllm] --find-links wheelhouse && \
487+
RUN uv pip install ai-dynamo --find-links wheelhouse && \
523488
uv pip install nixl --find-links wheelhouse && \
524-
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/
525-
526-
# Tell vllm to use the Dynamo LLM C API for KV Cache Routing
527-
ENV VLLM_KV_CAPI_PATH="/opt/dynamo/bindings/lib/libdynamo_llm_capi.so"
489+
ln -sf $VIRTUAL_ENV/bin/* /usr/local/bin/ && \
490+
rm -r wheelhouse
528491

529492
# Copy launch banner
530493
RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/launch_message.txt \
531494
sed '/^#\s/d' /workspace/launch_message.txt > ~/.launch_screen && \
532495
echo "cat ~/.launch_screen" >> ~/.bashrc
533496

497+
# Copy examples
498+
COPY ./examples examples/
534499

535-
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
500+
ENTRYPOINT [ "/usr/bin/bash" ]
536501
CMD []

0 commit comments

Comments
 (0)