Skip to content

Error when vllm-v0.16.0 uses --kv-offloading-size #267

@cui36

Description

@cui36

Launch script:

vllm serve Qwen/Qwen2-1.5B \
--no-enable-prefix-caching \
--port 30000 \
--kv-offloading-size 4

A6000 error:

(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] EngineCore failed to start.
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] Traceback (most recent call last):
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 996, in run_engine_core
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     engine_core = EngineCoreProc(*args, engine_index=dp_rank, **kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 740, in __init__
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     super().__init__(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/kvcached/integration/vllm/patches.py", line 193, in _patched_engine_init
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     return original_init(self, vllm_config, *args, **kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 113, in __init__
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     num_gpu_blocks, num_cpu_blocks, kv_cache_config = self._initialize_kv_caches(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/engine/core.py", line 275, in _initialize_kv_caches
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     self.model_executor.initialize_from_config(kv_cache_configs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/executor/abstract.py", line 117, in initialize_from_config
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     self.collective_rpc("initialize_from_config", args=(kv_cache_configs,))
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/executor/uniproc_executor.py", line 75, in collective_rpc
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     result = run_method(self.driver_worker, method, args, kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/serial_utils.py", line 459, in run_method
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     return func(*args, **kwargs)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/worker/worker_base.py", line 316, in initialize_from_config
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     self.worker.initialize_from_config(kv_cache_config)  # type: ignore
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/v1/worker/gpu_worker.py", line 421, in initialize_from_config
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     ensure_kv_transfer_initialized(self.vllm_config, kv_cache_config)
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/distributed/kv_transfer/kv_transfer_state.py", line 67, in ensure_kv_transfer_initialized
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     _KV_CONNECTOR_AGENT = KVConnectorFactory.create_connector(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]   File "/mnt/permanent/kvcached/vllm/vllm/distributed/kv_transfer/kv_connector/factory.py", line 59, in create_connector
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006]     raise ValueError(
(EngineCore_DP0 pid=320180) ERROR 03-05 22:50:37 [core.py:1006] ValueError: Connector OffloadingConnector does not support HMA but HMA is enabled. Please set `--disable-hybrid-kv-cache-manager`.

The vLLMOffloadingConnector doesn't implement the SupportsHMA, so need to set --disable-hybrid-kv-cache-manager

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions