[Bug] Fix Import paths for encoder_cudagraph modules (#38997)
Signed-off-by: greg pereira <grpereir@redhat.com> Signed-off-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com>
This commit is contained in:
@@ -14,17 +14,17 @@ from typing import Any
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph import (
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.v1.worker.encoder_cudagraph import (
|
||||
EncoderCudaGraphManager,
|
||||
)
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
|
||||
from vllm.v1.worker.encoder_cudagraph_defs import (
|
||||
EncoderCudaGraphCaptureInputs,
|
||||
EncoderCudaGraphConfig,
|
||||
EncoderCudaGraphReplayBuffers,
|
||||
)
|
||||
|
||||
from vllm.platforms import current_platform
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@@ -46,7 +46,7 @@ if TYPE_CHECKING:
|
||||
from vllm.multimodal.inputs import MultiModalFeatureSpec
|
||||
from vllm.multimodal.registry import _ProcessorFactories
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
|
||||
from vllm.v1.worker.encoder_cudagraph_defs import (
|
||||
EncoderCudaGraphCaptureInputs,
|
||||
EncoderCudaGraphConfig,
|
||||
EncoderCudaGraphReplayBuffers,
|
||||
|
||||
@@ -1733,7 +1733,7 @@ class Qwen3VLForConditionalGeneration(
|
||||
# -- SupportsEncoderCudaGraph protocol methods --
|
||||
|
||||
def get_encoder_cudagraph_config(self):
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
|
||||
from vllm.v1.worker.encoder_cudagraph_defs import (
|
||||
EncoderCudaGraphConfig,
|
||||
)
|
||||
|
||||
@@ -1818,7 +1818,7 @@ class Qwen3VLForConditionalGeneration(
|
||||
device: torch.device,
|
||||
dtype: torch.dtype,
|
||||
):
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
|
||||
from vllm.v1.worker.encoder_cudagraph_defs import (
|
||||
EncoderCudaGraphCaptureInputs,
|
||||
)
|
||||
|
||||
@@ -1872,7 +1872,7 @@ class Qwen3VLForConditionalGeneration(
|
||||
mm_kwargs: dict[str, Any],
|
||||
max_batch_size: int,
|
||||
):
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
|
||||
from vllm.v1.worker.encoder_cudagraph_defs import (
|
||||
EncoderCudaGraphReplayBuffers,
|
||||
)
|
||||
|
||||
|
||||
@@ -16,7 +16,7 @@ from vllm.distributed import (
|
||||
from vllm.logger import init_logger
|
||||
from vllm.model_executor.models.interfaces import SupportsEncoderCudaGraph
|
||||
from vllm.model_executor.models.vision import get_load_balance_assignment
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph_defs import (
|
||||
from vllm.v1.worker.encoder_cudagraph_defs import (
|
||||
EncoderCudaGraphConfig,
|
||||
)
|
||||
|
||||
|
||||
@@ -211,7 +211,7 @@ from .utils import (
|
||||
if TYPE_CHECKING:
|
||||
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
|
||||
from vllm.v1.spec_decode.ngram_proposer import NgramProposer
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph import EncoderCudaGraphManager
|
||||
from vllm.v1.worker.encoder_cudagraph import EncoderCudaGraphManager
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
@@ -5988,7 +5988,7 @@ class GPUModelRunner(
|
||||
SupportsEncoderCudaGraph,
|
||||
supports_encoder_cudagraph,
|
||||
)
|
||||
from vllm.v1.worker.gpu.mm.encoder_cudagraph import (
|
||||
from vllm.v1.worker.encoder_cudagraph import (
|
||||
EncoderCudaGraphManager,
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user