-
Notifications
You must be signed in to change notification settings - Fork 95
Error when vllm-v0.16.0 uses --kv-offloading-size #267
Copy link
Copy link
Open
Description
Launch script:
vllm serve Qwen/Qwen2-1.5B \
--no-enable-prefix-caching \
--port 30000 \
--kv-offloading-size 4A6000 error:
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] EngineCore failed to start.
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] Traceback (most recent call last):
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 996, in run_engine_core
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 740, in __init__
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] super().__init__(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/kvcached/integration/vllm/patches.py", line 193, in _patched_engine_init
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] return original_init(self, vllm_config, *args, **kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 113, in __init__
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 275, in _initialize_kv_caches
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] self.model_executor.initialize_from_config(kv_cache_configs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/executor/abstract.py", line 117, in initialize_from_config
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] self.collective_rpc("initialize_from_config", args=(kv_cache_configs,))
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/executor/uniproc_executor.py", line 75, in collective_rpc
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] result = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/serial_utils.py", line 459, in run_method
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] return func(*args, **kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/worker/worker_base.py", line 316, in initialize_from_config
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] self.worker.initialize_from_config(kv_cache_config) # type: ignore
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/v1/worker/gpu_worker.py", line 421, in initialize_from_config
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] ensure_kv_transfer_initialized(self.vllm_config, kv_cache_config)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/distributed/kv_transfer/kv_transfer_state.py", line 67, in ensure_kv_transfer_initialized
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] File "/mnt/permanent/kvcached/vllm/vllm/distributed/kv_transfer/kv_connector/factory.py", line 59, in create_connector
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] raise ValueError(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] ValueError: Connector OffloadingConnector does not support HMA but HMA is enabled. Please set `--disable-hybrid-kv-cache-manager`.The vLLMOffloadingConnector doesn't implement the SupportsHMA, so need to set --disable-hybrid-kv-cache-manager
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
No labels