[Bugfix] Enable Triton MoE for FP8 per-tensor dynamic (#33300)
Signed-off-by: mgoin <mgoin64@gmail.com>
(cherry picked from commit bfb9bdaf3f)
This commit is contained in:
@@ -927,6 +927,7 @@ class BatchedTritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
||||
SUPPORTED_W_A_FP8 = [
|
||||
(kFp8Static128BlockSym, kFp8Dynamic128Sym),
|
||||
(kFp8StaticChannelSym, kFp8DynamicTokenSym),
|
||||
(kFp8StaticTensorSym, kFp8DynamicTokenSym),
|
||||
(kFp8StaticTensorSym, kFp8StaticTensorSym),
|
||||
(kFp8StaticTensorSym, kFp8DynamicTensorSym),
|
||||
]
|
||||
|
||||
@@ -45,6 +45,7 @@ from vllm.model_executor.layers.quantization.utils.ocp_mx_utils import OCP_MX_Sc
|
||||
from vllm.model_executor.layers.quantization.utils.quant_utils import (
|
||||
QuantKey,
|
||||
kFp8Dynamic128Sym,
|
||||
kFp8DynamicTensorSym,
|
||||
kFp8DynamicTokenSym,
|
||||
kFp8Static128BlockSym,
|
||||
kFp8StaticChannelSym,
|
||||
@@ -1942,6 +1943,7 @@ class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute):
|
||||
(kFp8StaticChannelSym, kFp8DynamicTokenSym),
|
||||
(kFp8StaticTensorSym, kFp8DynamicTokenSym),
|
||||
(kFp8StaticTensorSym, kFp8StaticTensorSym),
|
||||
(kFp8StaticTensorSym, kFp8DynamicTensorSym),
|
||||
]
|
||||
return (weight_key, activation_key) in SUPPORTED_W_A
|
||||
|
||||
|
||||
Reference in New Issue
Block a user