diff --git a/vllm/config/ec_transfer.py b/vllm/config/ec_transfer.py index c7f56557f..a3a927d51 100644 --- a/vllm/config/ec_transfer.py +++ b/vllm/config/ec_transfer.py @@ -7,8 +7,8 @@ from typing import Any, Literal, get_args from vllm.config.utils import config -ECProducer = Literal["ec_producer"] -ECConsumer = Literal["ec_consumer"] +ECProducer = Literal["ec_producer", "ec_both"] +ECConsumer = Literal["ec_consumer", "ec_both"] ECRole = Literal[ECProducer, ECConsumer] @@ -33,7 +33,7 @@ class ECTransferConfig: ec_role: ECRole | None = None """Whether this vLLM instance produces, consumes EC cache, or both. Choices - are 'ec_producer', 'ec_consumer'.""" + are 'ec_producer', 'ec_consumer', 'ec_both'.""" ec_rank: int | None = None """The rank of this vLLM instance in the EC cache transfer. Typical value: diff --git a/vllm/distributed/ec_transfer/ec_connector/base.py b/vllm/distributed/ec_transfer/ec_connector/base.py index 2c212c29c..7f1407d0c 100644 --- a/vllm/distributed/ec_transfer/ec_connector/base.py +++ b/vllm/distributed/ec_transfer/ec_connector/base.py @@ -63,6 +63,7 @@ class ECConnectorBase(ABC): self._role = role if vllm_config.ec_transfer_config is not None: self._is_producer = vllm_config.ec_transfer_config.is_ec_producer + self._is_consumer = vllm_config.ec_transfer_config.is_ec_consumer else: raise ValueError("ec_transfer_config must be set for ECConnectorBase") @@ -74,6 +75,10 @@ class ECConnectorBase(ABC): def is_producer(self) -> bool: return self._is_producer + @property + def is_consumer(self) -> bool: + return self._is_consumer + # ============================== # Worker-side methods # ============================== diff --git a/vllm/v1/worker/ec_connector_model_runner_mixin.py b/vllm/v1/worker/ec_connector_model_runner_mixin.py index 1a347a0b9..4d785c4ef 100644 --- a/vllm/v1/worker/ec_connector_model_runner_mixin.py +++ b/vllm/v1/worker/ec_connector_model_runner_mixin.py @@ -72,7 +72,8 @@ class ECConnectorModelRunnerMixin: assert scheduler_output.ec_connector_metadata is not None ec_connector.bind_connector_metadata(scheduler_output.ec_connector_metadata) - if not ec_connector.is_producer: + # Load caches for consumer or both roles + if ec_connector.is_consumer: ec_connector.start_load_caches(encoder_cache, **kwargs) try: