[core][distributed] add ep group and all2all interface (#18077)
Signed-off-by: youkaichao <youkaichao@gmail.com>
This commit is contained in:
@@ -19,7 +19,8 @@ from vllm.config import (CompilationLevel, VllmConfig,
|
||||
from vllm.distributed.kv_transfer import (get_kv_transfer_group,
|
||||
has_kv_transfer_group)
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1
|
||||
from vllm.distributed.parallel_state import get_pp_group, graph_capture
|
||||
from vllm.distributed.parallel_state import (
|
||||
get_pp_group, graph_capture, prepare_communication_buffer_for_model)
|
||||
from vllm.forward_context import get_forward_context, set_forward_context
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding
|
||||
@@ -1457,6 +1458,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
logger.info("Model loading took %.4f GiB and %.6f seconds",
|
||||
self.model_memory_usage / GiB_bytes,
|
||||
time_after_load - time_before_load)
|
||||
prepare_communication_buffer_for_model(self.model)
|
||||
|
||||
def _get_prompt_logprobs_dict(
|
||||
self,
|
||||
|
||||
Reference in New Issue
Block a user