[Bugfix] Fix Basic Models Test (#34818)

Signed-off-by: Matthew Bonanni <mbonanni@redhat.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
2026-02-19 17:49:07 -05:00
parent 4fb8beefaa
commit 662205d34e
14 changed files with 175 additions and 221 deletions
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -41,6 +41,7 @@ from vllm.distributed.parallel_state import (
 )
 from vllm.envs import enable_envs_cache
 from vllm.logger import init_logger
+from vllm.platforms import current_platform
 from vllm.tracing import instrument, maybe_init_worker_tracer
 from vllm.utils.network_utils import (
    get_distributed_init_method,
@@ -579,6 +580,9 @@ class WorkerProc:
        self._init_message_queues(input_shm_handle, vllm_config)
        self.worker.load_model()

+        # Set block size based on the attention backends
+        current_platform.update_block_size_for_backend(vllm_config)
+
        # Enable environment variable cache (e.g. assume no more
        # environment variable overrides after this point)
        enable_envs_cache()