[core][distributed] add ep group and all2all interface (#18077)

Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
youkaichao
2025-05-14 10:46:49 +08:00
committed by GitHub
parent 754b699cbe
commit 6266c57bae
8 changed files with 234 additions and 41 deletions

View File

@@ -19,7 +19,8 @@ from vllm.config import (CompilationLevel, VllmConfig,
from vllm.distributed.kv_transfer import (get_kv_transfer_group,
has_kv_transfer_group)
from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1
from vllm.distributed.parallel_state import get_pp_group, graph_capture
from vllm.distributed.parallel_state import (
get_pp_group, graph_capture, prepare_communication_buffer_for_model)
from vllm.forward_context import get_forward_context, set_forward_context
from vllm.logger import init_logger
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
@@ -1457,6 +1458,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
logger.info("Model loading took %.4f GiB and %.6f seconds",
self.model_memory_usage / GiB_bytes,
time_after_load - time_before_load)
prepare_communication_buffer_for_model(self.model)
def _get_prompt_logprobs_dict(
self,