diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 5fe594db6..6ddb2c422 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1270,7 +1270,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): if sync_self: assert intermediate_tensors is not None for k, v in intermediate_tensors.items(): - is_scattered = "residual" and is_residual_scattered + is_scattered = k == "residual" and is_residual_scattered copy_len = num_tokens // tp if is_scattered else \ num_tokens self.intermediate_tensors[k][:copy_len].copy_(