[Misc] Introduce ec_both role EC (encoder cache) connector (#34182)
Signed-off-by: Qi Wang <qiwa@nvidia.com>
This commit is contained in:
@@ -7,8 +7,8 @@ from typing import Any, Literal, get_args
|
|||||||
|
|
||||||
from vllm.config.utils import config
|
from vllm.config.utils import config
|
||||||
|
|
||||||
ECProducer = Literal["ec_producer"]
|
ECProducer = Literal["ec_producer", "ec_both"]
|
||||||
ECConsumer = Literal["ec_consumer"]
|
ECConsumer = Literal["ec_consumer", "ec_both"]
|
||||||
ECRole = Literal[ECProducer, ECConsumer]
|
ECRole = Literal[ECProducer, ECConsumer]
|
||||||
|
|
||||||
|
|
||||||
@@ -33,7 +33,7 @@ class ECTransferConfig:
|
|||||||
|
|
||||||
ec_role: ECRole | None = None
|
ec_role: ECRole | None = None
|
||||||
"""Whether this vLLM instance produces, consumes EC cache, or both. Choices
|
"""Whether this vLLM instance produces, consumes EC cache, or both. Choices
|
||||||
are 'ec_producer', 'ec_consumer'."""
|
are 'ec_producer', 'ec_consumer', 'ec_both'."""
|
||||||
|
|
||||||
ec_rank: int | None = None
|
ec_rank: int | None = None
|
||||||
"""The rank of this vLLM instance in the EC cache transfer. Typical value:
|
"""The rank of this vLLM instance in the EC cache transfer. Typical value:
|
||||||
|
|||||||
@@ -63,6 +63,7 @@ class ECConnectorBase(ABC):
|
|||||||
self._role = role
|
self._role = role
|
||||||
if vllm_config.ec_transfer_config is not None:
|
if vllm_config.ec_transfer_config is not None:
|
||||||
self._is_producer = vllm_config.ec_transfer_config.is_ec_producer
|
self._is_producer = vllm_config.ec_transfer_config.is_ec_producer
|
||||||
|
self._is_consumer = vllm_config.ec_transfer_config.is_ec_consumer
|
||||||
else:
|
else:
|
||||||
raise ValueError("ec_transfer_config must be set for ECConnectorBase")
|
raise ValueError("ec_transfer_config must be set for ECConnectorBase")
|
||||||
|
|
||||||
@@ -74,6 +75,10 @@ class ECConnectorBase(ABC):
|
|||||||
def is_producer(self) -> bool:
|
def is_producer(self) -> bool:
|
||||||
return self._is_producer
|
return self._is_producer
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_consumer(self) -> bool:
|
||||||
|
return self._is_consumer
|
||||||
|
|
||||||
# ==============================
|
# ==============================
|
||||||
# Worker-side methods
|
# Worker-side methods
|
||||||
# ==============================
|
# ==============================
|
||||||
|
|||||||
@@ -72,7 +72,8 @@ class ECConnectorModelRunnerMixin:
|
|||||||
assert scheduler_output.ec_connector_metadata is not None
|
assert scheduler_output.ec_connector_metadata is not None
|
||||||
ec_connector.bind_connector_metadata(scheduler_output.ec_connector_metadata)
|
ec_connector.bind_connector_metadata(scheduler_output.ec_connector_metadata)
|
||||||
|
|
||||||
if not ec_connector.is_producer:
|
# Load caches for consumer or both roles
|
||||||
|
if ec_connector.is_consumer:
|
||||||
ec_connector.start_load_caches(encoder_cache, **kwargs)
|
ec_connector.start_load_caches(encoder_cache, **kwargs)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
Reference in New Issue
Block a user