[Misc] Introduce ec_both role EC (encoder cache) connector (#34182)
Signed-off-by: Qi Wang <qiwa@nvidia.com>
This commit is contained in:
@@ -7,8 +7,8 @@ from typing import Any, Literal, get_args
|
||||
|
||||
from vllm.config.utils import config
|
||||
|
||||
ECProducer = Literal["ec_producer"]
|
||||
ECConsumer = Literal["ec_consumer"]
|
||||
ECProducer = Literal["ec_producer", "ec_both"]
|
||||
ECConsumer = Literal["ec_consumer", "ec_both"]
|
||||
ECRole = Literal[ECProducer, ECConsumer]
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ class ECTransferConfig:
|
||||
|
||||
ec_role: ECRole | None = None
|
||||
"""Whether this vLLM instance produces, consumes EC cache, or both. Choices
|
||||
are 'ec_producer', 'ec_consumer'."""
|
||||
are 'ec_producer', 'ec_consumer', 'ec_both'."""
|
||||
|
||||
ec_rank: int | None = None
|
||||
"""The rank of this vLLM instance in the EC cache transfer. Typical value:
|
||||
|
||||
@@ -63,6 +63,7 @@ class ECConnectorBase(ABC):
|
||||
self._role = role
|
||||
if vllm_config.ec_transfer_config is not None:
|
||||
self._is_producer = vllm_config.ec_transfer_config.is_ec_producer
|
||||
self._is_consumer = vllm_config.ec_transfer_config.is_ec_consumer
|
||||
else:
|
||||
raise ValueError("ec_transfer_config must be set for ECConnectorBase")
|
||||
|
||||
@@ -74,6 +75,10 @@ class ECConnectorBase(ABC):
|
||||
def is_producer(self) -> bool:
|
||||
return self._is_producer
|
||||
|
||||
@property
|
||||
def is_consumer(self) -> bool:
|
||||
return self._is_consumer
|
||||
|
||||
# ==============================
|
||||
# Worker-side methods
|
||||
# ==============================
|
||||
|
||||
@@ -72,7 +72,8 @@ class ECConnectorModelRunnerMixin:
|
||||
assert scheduler_output.ec_connector_metadata is not None
|
||||
ec_connector.bind_connector_metadata(scheduler_output.ec_connector_metadata)
|
||||
|
||||
if not ec_connector.is_producer:
|
||||
# Load caches for consumer or both roles
|
||||
if ec_connector.is_consumer:
|
||||
ec_connector.start_load_caches(encoder_cache, **kwargs)
|
||||
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user