[Bugfix] Disable RoutingMethodType.[Renormalize,RenormalizeNaive] TRTLLM per-tensor FP8 MoE (#33620)
Signed-off-by: mgoin <mgoin64@gmail.com>
This commit is contained in:
@@ -72,8 +72,10 @@ def _supports_routing_method(
|
|||||||
# NOTE(dbari): as above, potentially allow others here.
|
# NOTE(dbari): as above, potentially allow others here.
|
||||||
return routing_method in [
|
return routing_method in [
|
||||||
RoutingMethodType.Llama4,
|
RoutingMethodType.Llama4,
|
||||||
RoutingMethodType.Renormalize,
|
# NOTE(mgoin): Disabled to investigate accuracy issues.
|
||||||
RoutingMethodType.RenormalizeNaive,
|
# See https://github.com/vllm-project/vllm/issues/33532
|
||||||
|
# RoutingMethodType.Renormalize,
|
||||||
|
# RoutingMethodType.RenormalizeNaive,
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
raise ValueError("Unsupported quantization scheme.")
|
raise ValueError("Unsupported quantization scheme.")
|
||||||
|
|||||||
Reference in New Issue
Block a user