kernels/moe test pruning (#27053)

Signed-off-by: Fardin Hoque <kfhfar@amazon.com>
Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com>
This commit is contained in:
Fardin Hoque
2025-10-29 21:10:34 -07:00
committed by GitHub
parent 17d055f527
commit b8c48c5d72
13 changed files with 34 additions and 56 deletions

View File

@@ -295,6 +295,8 @@ def test_modular_kernel_combinations_singlegpu(
world_size: int,
pytestconfig,
):
"""Note: float8_e4m3fn is not supported on CUDA architecture < 89,
and those tests will be skipped on unsupported hardware."""
config = Config(
Ms=Ms,
K=k,
@@ -309,6 +311,12 @@ def test_modular_kernel_combinations_singlegpu(
world_size=world_size,
)
if (
quant_config is not None and quant_config.quant_dtype == torch.float8_e4m3fn
) and not current_platform.has_device_capability(89):
pytest.skip(
"Triton limitation: fp8e4nv data type is not supported on CUDA arch < 89"
)
verbosity = pytestconfig.getoption("verbose")
run(config, verbosity > 0)