From 144030c84ebf03b4f911ad8bab6432729418729c Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 25 Mar 2026 20:52:12 -0700 Subject: [PATCH] Relocate Encoder CUDA graph manager (#38116) Signed-off-by: Woosuk Kwon Signed-off-by: Nick Hill Co-authored-by: Nick Hill --- tests/v1/cudagraph/test_encoder_cudagraph.py | 4 ++-- vllm/v1/worker/{gpu/mm => }/encoder_cudagraph.py | 0 vllm/v1/worker/{gpu/mm => }/encoder_cudagraph_defs.py | 0 vllm/v1/worker/gpu_model_runner.py | 6 ++---- 4 files changed, 4 insertions(+), 6 deletions(-) rename vllm/v1/worker/{gpu/mm => }/encoder_cudagraph.py (100%) rename vllm/v1/worker/{gpu/mm => }/encoder_cudagraph_defs.py (100%) diff --git a/tests/v1/cudagraph/test_encoder_cudagraph.py b/tests/v1/cudagraph/test_encoder_cudagraph.py index 94db43a5c..322fcb3ca 100644 --- a/tests/v1/cudagraph/test_encoder_cudagraph.py +++ b/tests/v1/cudagraph/test_encoder_cudagraph.py @@ -14,8 +14,6 @@ from typing import Any import pytest import torch - -from vllm.platforms import current_platform from vllm.v1.worker.gpu.mm.encoder_cudagraph import ( EncoderCudaGraphManager, ) @@ -25,6 +23,8 @@ from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import ( EncoderCudaGraphReplayBuffers, ) +from vllm.platforms import current_platform + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/vllm/v1/worker/gpu/mm/encoder_cudagraph.py b/vllm/v1/worker/encoder_cudagraph.py similarity index 100% rename from vllm/v1/worker/gpu/mm/encoder_cudagraph.py rename to vllm/v1/worker/encoder_cudagraph.py diff --git a/vllm/v1/worker/gpu/mm/encoder_cudagraph_defs.py b/vllm/v1/worker/encoder_cudagraph_defs.py similarity index 100% rename from vllm/v1/worker/gpu/mm/encoder_cudagraph_defs.py rename to vllm/v1/worker/encoder_cudagraph_defs.py diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index be7734487..d44bf74c3 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -208,7 +208,7 @@ from .utils import ( if TYPE_CHECKING: from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput from vllm.v1.spec_decode.ngram_proposer import NgramProposer - from vllm.v1.worker.gpu.mm.encoder_cudagraph import EncoderCudaGraphManager + from vllm.v1.worker.encoder_cudagraph import EncoderCudaGraphManager logger = init_logger(__name__) @@ -5972,9 +5972,7 @@ class GPUModelRunner( SupportsEncoderCudaGraph, supports_encoder_cudagraph, ) - from vllm.v1.worker.gpu.mm.encoder_cudagraph import ( - EncoderCudaGraphManager, - ) + from vllm.v1.worker.encoder_cudagraph import EncoderCudaGraphManager raw_model = self.get_model() if supports_encoder_cudagraph(raw_model):