Update batch invariant to use attention config (#30704)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
@@ -931,10 +931,11 @@ def init_worker_distributed_environment(
|
||||
backend: str = "nccl",
|
||||
) -> None:
|
||||
"""Initialize the distributed environment."""
|
||||
attention_config = vllm_config.attention_config
|
||||
parallel_config = vllm_config.parallel_config
|
||||
from vllm.model_executor.layers.batch_invariant import init_batch_invariance
|
||||
|
||||
init_batch_invariance()
|
||||
init_batch_invariance(attention_config.backend)
|
||||
set_custom_all_reduce(not parallel_config.disable_custom_all_reduce)
|
||||
|
||||
init_method = distributed_init_method or "env://"
|
||||
|
||||
Reference in New Issue
Block a user