kernels/moe test pruning (#27053)
Signed-off-by: Fardin Hoque <kfhfar@amazon.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
@@ -295,6 +295,8 @@ def test_modular_kernel_combinations_singlegpu(
|
||||
world_size: int,
|
||||
pytestconfig,
|
||||
):
|
||||
"""Note: float8_e4m3fn is not supported on CUDA architecture < 89,
|
||||
and those tests will be skipped on unsupported hardware."""
|
||||
config = Config(
|
||||
Ms=Ms,
|
||||
K=k,
|
||||
@@ -309,6 +311,12 @@ def test_modular_kernel_combinations_singlegpu(
|
||||
world_size=world_size,
|
||||
)
|
||||
|
||||
if (
|
||||
quant_config is not None and quant_config.quant_dtype == torch.float8_e4m3fn
|
||||
) and not current_platform.has_device_capability(89):
|
||||
pytest.skip(
|
||||
"Triton limitation: fp8e4nv data type is not supported on CUDA arch < 89"
|
||||
)
|
||||
verbosity = pytestconfig.getoption("verbose")
|
||||
run(config, verbosity > 0)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user