[Bugfix] Allow vllm to still work if triton is not installed. (#6786)

Signed-off-by: Thomas Parnell <tpa@zurich.ibm.com>
This commit is contained in:
Thomas Parnell
2024-07-29 23:51:27 +02:00
committed by GitHub
parent 7f8d612d24
commit 9a7e2d0534
13 changed files with 65 additions and 37 deletions

View File

@@ -6,8 +6,7 @@ from torch.nn.parameter import Parameter
from vllm import _custom_ops as ops
from vllm.logger import init_logger
from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEMethodBase,
fused_moe)
from vllm.model_executor.layers.fused_moe import FusedMoE, FusedMoEMethodBase
from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase,
UnquantizedLinearMethod)
from vllm.model_executor.layers.quantization.base_config import (
@@ -404,6 +403,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
num_expert_group: Optional[int] = None,
topk_group: Optional[int] = None) -> torch.Tensor:
from vllm.model_executor.layers.fused_moe import fused_moe
return fused_moe(x,
layer.w13_weight,
layer.w2_weight,