[Bugfix][Build/CI] Fix sparse CUTLASS compilation on CUDA [12.0, 12.2) (#11311)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
Tyler Michael Smith
2024-12-18 21:43:30 -05:00
committed by GitHub
parent fdea8ec167
commit 5a9da2e6e9
12 changed files with 89 additions and 20 deletions

View File

@@ -8,6 +8,8 @@ import pytest
import torch
from vllm import _custom_ops as ops
from vllm.model_executor.layers.quantization.utils.w8a8_utils import (
sparse_cutlass_supported)
from vllm.platforms import current_platform
CUDA_DEVICES = [
@@ -102,10 +104,11 @@ def baseline_scaled_mm(a: torch.Tensor,
return output
@pytest.mark.skipif(not current_platform.has_device_capability(90),
@pytest.mark.skipif(not sparse_cutlass_supported(),
reason="Sparse FP8 is not yet supported on this GPU type.")
# Test working with a subset of A and B for sparse matmul
def test_cutlass_sparse_subset():
big_m = 1024
m, n, k = 512, 512, 512