[Bugfix][Misc] Use TritonPlaceholderModule to defensively import triton (#15099)

Signed-off-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
Mengqing Cao
2025-04-25 13:51:02 +08:00
committed by GitHub
parent 5aa6efb9a5
commit 2f54045508
5 changed files with 53 additions and 6 deletions

View File

@@ -17,8 +17,14 @@ from torch.utils.benchmark import Measurement as TMeasurement
from utils import ArgPool, Bench, CudaGraphBenchParams
from weight_shapes import WEIGHT_SHAPES
from vllm.lora.ops.triton_ops import LoRAKernelMeta, lora_expand, lora_shrink
from vllm.lora.ops.triton_ops.utils import _LORA_A_PTR_DICT, _LORA_B_PTR_DICT
from vllm.triton_utils import HAS_TRITON
if HAS_TRITON:
from vllm.lora.ops.triton_ops import (LoRAKernelMeta, lora_expand,
lora_shrink)
from vllm.lora.ops.triton_ops.utils import (_LORA_A_PTR_DICT,
_LORA_B_PTR_DICT)
from vllm.utils import FlexibleArgumentParser
DEFAULT_MODELS = list(WEIGHT_SHAPES.keys())