(EngineCore_DP0 pid=397122) Process EngineCore_DP0:
(EngineCore_DP0 pid=397122) Traceback (most recent call last):
(EngineCore_DP0 pid=397122) File "/usr/lib/python3.12/multiprocessing/process.py", line 314, in _bootstrap
(EngineCore_DP0 pid=397122) self.run()
(EngineCore_DP0 pid=397122) File "/usr/lib/python3.12/multiprocessing/process.py", line 108, in run
(EngineCore_DP0 pid=397122) self._target(*self._args, **self._kwargs)
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 722, in run_engine_core
(EngineCore_DP0 pid=397122) raise e
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 705, in run_engine_core
(EngineCore_DP0 pid=397122) engine_core = DPEngineCoreProc(*args, **kwargs)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 975, in __init__
(EngineCore_DP0 pid=397122) super().__init__(vllm_config, local_client, handshake_address,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 505, in __init__
(EngineCore_DP0 pid=397122) super().__init__(vllm_config, executor_class, log_stats,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/engine/core.py", line 82, in __init__
(EngineCore_DP0 pid=397122) self.model_executor = executor_class(vllm_config)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/executor_base.py", line 54, in __init__
(EngineCore_DP0 pid=397122) self._init_executor()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/uniproc_executor.py", line 48, in _init_executor
(EngineCore_DP0 pid=397122) self.collective_rpc("init_device")
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/uniproc_executor.py", line 58, in collective_rpc
(EngineCore_DP0 pid=397122) answer = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/utils/__init__.py", line 3060, in run_method
(EngineCore_DP0 pid=397122) return func(*args, **kwargs)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/worker/worker_base.py", line 611, in init_device
(EngineCore_DP0 pid=397122) self.worker.init_device() # type: ignore
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/worker/gpu_worker.py", line 193, in init_device
(EngineCore_DP0 pid=397122) init_worker_distributed_environment(self.vllm_config, self.rank,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/worker/gpu_worker.py", line 692, in init_worker_distributed_environment
(EngineCore_DP0 pid=397122) ensure_model_parallel_initialized(
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 1185, in ensure_model_parallel_initialized
(EngineCore_DP0 pid=397122) initialize_model_parallel(tensor_model_parallel_size,
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 1150, in initialize_model_parallel
(EngineCore_DP0 pid=397122) _DP = init_model_parallel_group(group_ranks,
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 883, in init_model_parallel_group
(EngineCore_DP0 pid=397122) return GroupCoordinator(
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/parallel_state.py", line 262, in __init__
(EngineCore_DP0 pid=397122) self.device_communicator = device_comm_cls(
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/device_communicators/cuda_communicator.py", line 61, in __init__
(EngineCore_DP0 pid=397122) self.symm_mem_comm = SymmMemCommunicator(
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/distributed/device_communicators/symm_mem.py", line 88, in __init__
(EngineCore_DP0 pid=397122) handle = torch_symm_mem.rendezvous(self.buffer, self.group.group_name)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/.venv/lib/python3.12/site-packages/torch/distributed/_symmetric_memory/__init__.py", line 1609, in rendezvous
(EngineCore_DP0 pid=397122) return _SymmetricMemory.rendezvous(tensor, group_name)
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) RuntimeError: CUDASymmetricMemoryAllocator::rendezvous: detected allocations from overlapping devices from different ranks.
(EngineCore_DP0 pid=397122) Exception ignored in: <function ExecutorBase.__del__ at 0x742b826a68e0>
(EngineCore_DP0 pid=397122) Traceback (most recent call last):
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/executor_base.py", line 237, in __del__
(EngineCore_DP0 pid=397122) self.shutdown()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/executor/uniproc_executor.py", line 76, in shutdown
(EngineCore_DP0 pid=397122) worker.shutdown()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/worker/worker_base.py", line 528, in shutdown
(EngineCore_DP0 pid=397122) self.worker.shutdown()
(EngineCore_DP0 pid=397122) File "/data/vllm-community-homes/vllm-user-6/vllm/vllm/v1/worker/gpu_worker.py", line 675, in shutdown
(EngineCore_DP0 pid=397122) self.model_runner.ensure_kv_transfer_shutdown()
(EngineCore_DP0 pid=397122) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
(EngineCore_DP0 pid=397122) AttributeError: 'NoneType' object has no attribute 'ensure_kv_transfer_shutdown'
Your current environment
The output of
python collect_env.py🐛 Describe the bug
vllm serve --model="deepseek-ai/DeepSeek-V2-lite" --max-num-seqs 512 --data-parallel-size 2 --enable-expert-parallel --gpu-memory-utilization 0.9 --port 9256Cause
Before submitting a new issue...