diff --git a/tests/v1/cudagraph/test_encoder_cudagraph.py b/tests/v1/cudagraph/test_encoder_cudagraph.py index 94db43a5c..322fcb3ca 100644 --- a/tests/v1/cudagraph/test_encoder_cudagraph.py +++ b/tests/v1/cudagraph/test_encoder_cudagraph.py @@ -14,8 +14,6 @@ from typing import Any import pytest import torch - -from vllm.platforms import current_platform from vllm.v1.worker.gpu.mm.encoder_cudagraph import ( EncoderCudaGraphManager, ) @@ -25,6 +23,8 @@ from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import ( EncoderCudaGraphReplayBuffers, ) +from vllm.platforms import current_platform + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- diff --git a/vllm/v1/worker/gpu/mm/encoder_cudagraph.py b/vllm/v1/worker/encoder_cudagraph.py similarity index 100% rename from vllm/v1/worker/gpu/mm/encoder_cudagraph.py rename to vllm/v1/worker/encoder_cudagraph.py diff --git a/vllm/v1/worker/gpu/mm/encoder_cudagraph_defs.py b/vllm/v1/worker/encoder_cudagraph_defs.py similarity index 100% rename from vllm/v1/worker/gpu/mm/encoder_cudagraph_defs.py rename to vllm/v1/worker/encoder_cudagraph_defs.py diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index be7734487..d44bf74c3 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -208,7 +208,7 @@ from .utils import ( if TYPE_CHECKING: from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput from vllm.v1.spec_decode.ngram_proposer import NgramProposer - from vllm.v1.worker.gpu.mm.encoder_cudagraph import EncoderCudaGraphManager + from vllm.v1.worker.encoder_cudagraph import EncoderCudaGraphManager logger = init_logger(__name__) @@ -5972,9 +5972,7 @@ class GPUModelRunner( SupportsEncoderCudaGraph, supports_encoder_cudagraph, ) - from vllm.v1.worker.gpu.mm.encoder_cudagraph import ( - EncoderCudaGraphManager, - ) + from vllm.v1.worker.encoder_cudagraph import EncoderCudaGraphManager raw_model = self.get_model() if supports_encoder_cudagraph(raw_model):